Merge branch 'vendor/GCC50' - gcc 5.0 snapshot 1 FEB 2015
[dragonfly.git] / contrib / gcc-5.0 / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics.  */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50    vector types, and their scalar components.  */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 typedef unsigned char  __mmask8;
56 typedef unsigned short __mmask16;
57
58 extern __inline __m512i
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm512_set_epi64 (long long __A, long long __B, long long __C,
61                   long long __D, long long __E, long long __F,
62                   long long __G, long long __H)
63 {
64   return __extension__ (__m512i) (__v8di)
65          { __H, __G, __F, __E, __D, __C, __B, __A };
66 }
67
68 /* Create the vector [A B C D E F G H I J K L M N O P].  */
69 extern __inline __m512i
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
72                   int __E, int __F, int __G, int __H,
73                   int __I, int __J, int __K, int __L,
74                   int __M, int __N, int __O, int __P)
75 {
76   return __extension__ (__m512i)(__v16si)
77          { __P, __O, __N, __M, __L, __K, __J, __I,
78            __H, __G, __F, __E, __D, __C, __B, __A };
79 }
80
81 extern __inline __m512d
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _mm512_set_pd (double __A, double __B, double __C, double __D,
84                double __E, double __F, double __G, double __H)
85 {
86   return __extension__ (__m512d)
87          { __H, __G, __F, __E, __D, __C, __B, __A };
88 }
89
90 extern __inline __m512
91 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 _mm512_set_ps (float __A, float __B, float __C, float __D,
93                float __E, float __F, float __G, float __H,
94                float __I, float __J, float __K, float __L,
95                float __M, float __N, float __O, float __P)
96 {
97   return __extension__ (__m512)
98          { __P, __O, __N, __M, __L, __K, __J, __I,
99            __H, __G, __F, __E, __D, __C, __B, __A };
100 }
101
102 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)                            \
103   _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
104
105 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,                            \
106                           e8,e9,e10,e11,e12,e13,e14,e15)                      \
107   _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
108
109 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)                               \
110   _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
111
112 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113   _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
114
115 extern __inline __m512
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm512_undefined_ps (void)
118 {
119   __m512 __Y = __Y;
120   return __Y;
121 }
122
123 extern __inline __m512d
124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125 _mm512_undefined_pd (void)
126 {
127   __m512d __Y = __Y;
128   return __Y;
129 }
130
131 extern __inline __m512i
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_undefined_si512 (void)
134 {
135   __m512i __Y = __Y;
136   return __Y;
137 }
138
139 extern __inline __m512i
140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141 _mm512_set1_epi8 (char __A)
142 {
143   return __extension__ (__m512i)(__v64qi)
144          { __A, __A, __A, __A, __A, __A, __A, __A,
145            __A, __A, __A, __A, __A, __A, __A, __A,
146            __A, __A, __A, __A, __A, __A, __A, __A,
147            __A, __A, __A, __A, __A, __A, __A, __A,
148            __A, __A, __A, __A, __A, __A, __A, __A,
149            __A, __A, __A, __A, __A, __A, __A, __A,
150            __A, __A, __A, __A, __A, __A, __A, __A,
151            __A, __A, __A, __A, __A, __A, __A, __A };
152 }
153
154 extern __inline __m512i
155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156 _mm512_set1_epi16 (short __A)
157 {
158   return __extension__ (__m512i)(__v32hi)
159          { __A, __A, __A, __A, __A, __A, __A, __A,
160            __A, __A, __A, __A, __A, __A, __A, __A,
161            __A, __A, __A, __A, __A, __A, __A, __A,
162            __A, __A, __A, __A, __A, __A, __A, __A };
163 }
164
165 extern __inline __m512d
166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
167 _mm512_set1_pd (double __A)
168 {
169   return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
170                                                   (__v2df) { __A, },
171                                                   (__v8df)
172                                                   _mm512_undefined_pd (),
173                                                   (__mmask8) -1);
174 }
175
176 extern __inline __m512
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm512_set1_ps (float __A)
179 {
180   return (__m512) __builtin_ia32_broadcastss512 (__extension__
181                                                  (__v4sf) { __A, },
182                                                  (__v16sf)
183                                                  _mm512_undefined_ps (),
184                                                  (__mmask16) -1);
185 }
186
187 /* Create the vector [A B C D A B C D A B C D A B C D].  */
188 extern __inline __m512i
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
191 {
192   return __extension__ (__m512i)(__v16si)
193          { __D, __C, __B, __A, __D, __C, __B, __A,
194            __D, __C, __B, __A, __D, __C, __B, __A };
195 }
196
197 extern __inline __m512i
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
200                    long long __D)
201 {
202   return __extension__ (__m512i) (__v8di)
203          { __D, __C, __B, __A, __D, __C, __B, __A };
204 }
205
206 extern __inline __m512d
207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208 _mm512_set4_pd (double __A, double __B, double __C, double __D)
209 {
210   return __extension__ (__m512d)
211          { __D, __C, __B, __A, __D, __C, __B, __A };
212 }
213
214 extern __inline __m512
215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216 _mm512_set4_ps (float __A, float __B, float __C, float __D)
217 {
218   return __extension__ (__m512)
219          { __D, __C, __B, __A, __D, __C, __B, __A,
220            __D, __C, __B, __A, __D, __C, __B, __A };
221 }
222
223 #define _mm512_setr4_epi64(e0,e1,e2,e3)                                       \
224   _mm512_set4_epi64(e3,e2,e1,e0)
225
226 #define _mm512_setr4_epi32(e0,e1,e2,e3)                                       \
227   _mm512_set4_epi32(e3,e2,e1,e0)
228
229 #define _mm512_setr4_pd(e0,e1,e2,e3)                                          \
230   _mm512_set4_pd(e3,e2,e1,e0)
231
232 #define _mm512_setr4_ps(e0,e1,e2,e3)                                          \
233   _mm512_set4_ps(e3,e2,e1,e0)
234
235 extern __inline __m512
236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
237 _mm512_setzero_ps (void)
238 {
239   return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
240                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
241 }
242
243 extern __inline __m512d
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245 _mm512_setzero_pd (void)
246 {
247   return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248 }
249
250 extern __inline __m512i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm512_setzero_epi32 (void)
253 {
254   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
255 }
256
257 extern __inline __m512i
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 _mm512_setzero_si512 (void)
260 {
261   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262 }
263
264 extern __inline __m512d
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
267 {
268   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
269                                                   (__v8df) __W,
270                                                   (__mmask8) __U);
271 }
272
273 extern __inline __m512d
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
276 {
277   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
278                                                   (__v8df)
279                                                   _mm512_setzero_pd (),
280                                                   (__mmask8) __U);
281 }
282
283 extern __inline __m512
284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
286 {
287   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
288                                                  (__v16sf) __W,
289                                                  (__mmask16) __U);
290 }
291
292 extern __inline __m512
293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
295 {
296   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
297                                                  (__v16sf)
298                                                  _mm512_setzero_ps (),
299                                                  (__mmask16) __U);
300 }
301
302 extern __inline __m512d
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 _mm512_load_pd (void const *__P)
305 {
306   return *(__m512d *) __P;
307 }
308
309 extern __inline __m512d
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
312 {
313   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
314                                                    (__v8df) __W,
315                                                    (__mmask8) __U);
316 }
317
318 extern __inline __m512d
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
321 {
322   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
323                                                    (__v8df)
324                                                    _mm512_setzero_pd (),
325                                                    (__mmask8) __U);
326 }
327
328 extern __inline void
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm512_store_pd (void *__P, __m512d __A)
331 {
332   *(__m512d *) __P = __A;
333 }
334
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
338 {
339   __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
340                                    (__mmask8) __U);
341 }
342
343 extern __inline __m512
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm512_load_ps (void const *__P)
346 {
347   return *(__m512 *) __P;
348 }
349
350 extern __inline __m512
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
353 {
354   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
355                                                   (__v16sf) __W,
356                                                   (__mmask16) __U);
357 }
358
359 extern __inline __m512
360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
361 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
362 {
363   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
364                                                   (__v16sf)
365                                                   _mm512_setzero_ps (),
366                                                   (__mmask16) __U);
367 }
368
369 extern __inline void
370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371 _mm512_store_ps (void *__P, __m512 __A)
372 {
373   *(__m512 *) __P = __A;
374 }
375
376 extern __inline void
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
379 {
380   __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
381                                    (__mmask16) __U);
382 }
383
384 extern __inline __m512i
385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
387 {
388   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
389                                                      (__v8di) __W,
390                                                      (__mmask8) __U);
391 }
392
393 extern __inline __m512i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
396 {
397   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
398                                                      (__v8di)
399                                                      _mm512_setzero_si512 (),
400                                                      (__mmask8) __U);
401 }
402
403 extern __inline __m512i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm512_load_epi64 (void const *__P)
406 {
407   return *(__m512i *) __P;
408 }
409
410 extern __inline __m512i
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
413 {
414   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
415                                                         (__v8di) __W,
416                                                         (__mmask8) __U);
417 }
418
419 extern __inline __m512i
420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
422 {
423   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
424                                                         (__v8di)
425                                                         _mm512_setzero_si512 (),
426                                                         (__mmask8) __U);
427 }
428
429 extern __inline void
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm512_store_epi64 (void *__P, __m512i __A)
432 {
433   *(__m512i *) __P = __A;
434 }
435
436 extern __inline void
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
439 {
440   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
441                                         (__mmask8) __U);
442 }
443
444 extern __inline __m512i
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
447 {
448   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
449                                                      (__v16si) __W,
450                                                      (__mmask16) __U);
451 }
452
453 extern __inline __m512i
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
456 {
457   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
458                                                      (__v16si)
459                                                      _mm512_setzero_si512 (),
460                                                      (__mmask16) __U);
461 }
462
463 extern __inline __m512i
464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465 _mm512_load_si512 (void const *__P)
466 {
467   return *(__m512i *) __P;
468 }
469
470 extern __inline __m512i
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm512_load_epi32 (void const *__P)
473 {
474   return *(__m512i *) __P;
475 }
476
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
480 {
481   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
482                                                         (__v16si) __W,
483                                                         (__mmask16) __U);
484 }
485
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
489 {
490   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
491                                                         (__v16si)
492                                                         _mm512_setzero_si512 (),
493                                                         (__mmask16) __U);
494 }
495
496 extern __inline void
497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
498 _mm512_store_si512 (void *__P, __m512i __A)
499 {
500   *(__m512i *) __P = __A;
501 }
502
503 extern __inline void
504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505 _mm512_store_epi32 (void *__P, __m512i __A)
506 {
507   *(__m512i *) __P = __A;
508 }
509
510 extern __inline void
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
513 {
514   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
515                                         (__mmask16) __U);
516 }
517
518 extern __inline __m512i
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
521 {
522   return (__m512i) ((__v16su) __A * (__v16su) __B);
523 }
524
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
528 {
529   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530                                                   (__v16si) __B,
531                                                   (__v16si)
532                                                   _mm512_setzero_si512 (),
533                                                   __M);
534 }
535
536 extern __inline __m512i
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
539 {
540   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541                                                   (__v16si) __B,
542                                                   (__v16si) __W, __M);
543 }
544
545 extern __inline __m512i
546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
548 {
549   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550                                                   (__v16si) __Y,
551                                                   (__v16si)
552                                                   _mm512_undefined_si512 (),
553                                                   (__mmask16) -1);
554 }
555
556 extern __inline __m512i
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
559 {
560   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561                                                   (__v16si) __Y,
562                                                   (__v16si) __W,
563                                                   (__mmask16) __U);
564 }
565
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
569 {
570   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571                                                   (__v16si) __Y,
572                                                   (__v16si)
573                                                   _mm512_setzero_si512 (),
574                                                   (__mmask16) __U);
575 }
576
577 extern __inline __m512i
578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
580 {
581   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582                                                   (__v16si) __Y,
583                                                   (__v16si)
584                                                   _mm512_undefined_si512 (),
585                                                   (__mmask16) -1);
586 }
587
588 extern __inline __m512i
589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
591 {
592   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593                                                   (__v16si) __Y,
594                                                   (__v16si) __W,
595                                                   (__mmask16) __U);
596 }
597
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
601 {
602   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603                                                   (__v16si) __Y,
604                                                   (__v16si)
605                                                   _mm512_setzero_si512 (),
606                                                   (__mmask16) __U);
607 }
608
609 extern __inline __m512i
610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
612 {
613   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614                                                   (__v16si) __Y,
615                                                   (__v16si)
616                                                   _mm512_undefined_si512 (),
617                                                   (__mmask16) -1);
618 }
619
620 extern __inline __m512i
621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
623 {
624   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625                                                   (__v16si) __Y,
626                                                   (__v16si) __W,
627                                                   (__mmask16) __U);
628 }
629
630 extern __inline __m512i
631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
633 {
634   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635                                                   (__v16si) __Y,
636                                                   (__v16si)
637                                                   _mm512_setzero_si512 (),
638                                                   (__mmask16) __U);
639 }
640
641 extern __inline __m512i
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm512_add_epi64 (__m512i __A, __m512i __B)
644 {
645   return (__m512i) ((__v8du) __A + (__v8du) __B);
646 }
647
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
651 {
652   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
653                                                  (__v8di) __B,
654                                                  (__v8di) __W,
655                                                  (__mmask8) __U);
656 }
657
658 extern __inline __m512i
659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
661 {
662   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
663                                                  (__v8di) __B,
664                                                  (__v8di)
665                                                  _mm512_setzero_si512 (),
666                                                  (__mmask8) __U);
667 }
668
669 extern __inline __m512i
670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671 _mm512_sub_epi64 (__m512i __A, __m512i __B)
672 {
673   return (__m512i) ((__v8du) __A - (__v8du) __B);
674 }
675
676 extern __inline __m512i
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
679 {
680   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
681                                                  (__v8di) __B,
682                                                  (__v8di) __W,
683                                                  (__mmask8) __U);
684 }
685
686 extern __inline __m512i
687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
689 {
690   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
691                                                  (__v8di) __B,
692                                                  (__v8di)
693                                                  _mm512_setzero_si512 (),
694                                                  (__mmask8) __U);
695 }
696
697 extern __inline __m512i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
700 {
701   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
702                                                  (__v8di) __Y,
703                                                  (__v8di)
704                                                  _mm512_undefined_pd (),
705                                                  (__mmask8) -1);
706 }
707
708 extern __inline __m512i
709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
711 {
712   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
713                                                  (__v8di) __Y,
714                                                  (__v8di) __W,
715                                                  (__mmask8) __U);
716 }
717
718 extern __inline __m512i
719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
721 {
722   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
723                                                  (__v8di) __Y,
724                                                  (__v8di)
725                                                  _mm512_setzero_si512 (),
726                                                  (__mmask8) __U);
727 }
728
729 extern __inline __m512i
730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
732 {
733   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
734                                                  (__v8di) __Y,
735                                                  (__v8di)
736                                                  _mm512_undefined_si512 (),
737                                                  (__mmask8) -1);
738 }
739
740 extern __inline __m512i
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
743 {
744   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
745                                                  (__v8di) __Y,
746                                                  (__v8di) __W,
747                                                  (__mmask8) __U);
748 }
749
750 extern __inline __m512i
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
753 {
754   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
755                                                  (__v8di) __Y,
756                                                  (__v8di)
757                                                  _mm512_setzero_si512 (),
758                                                  (__mmask8) __U);
759 }
760
761 extern __inline __m512i
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
764 {
765   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
766                                                  (__v8di) __Y,
767                                                  (__v8di)
768                                                  _mm512_undefined_si512 (),
769                                                  (__mmask8) -1);
770 }
771
772 extern __inline __m512i
773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
775 {
776   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
777                                                  (__v8di) __Y,
778                                                  (__v8di) __W,
779                                                  (__mmask8) __U);
780 }
781
782 extern __inline __m512i
783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
785 {
786   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
787                                                  (__v8di) __Y,
788                                                  (__v8di)
789                                                  _mm512_setzero_si512 (),
790                                                  (__mmask8) __U);
791 }
792
793 extern __inline __m512i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm512_add_epi32 (__m512i __A, __m512i __B)
796 {
797   return (__m512i) ((__v16su) __A + (__v16su) __B);
798 }
799
800 extern __inline __m512i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
803 {
804   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
805                                                  (__v16si) __B,
806                                                  (__v16si) __W,
807                                                  (__mmask16) __U);
808 }
809
810 extern __inline __m512i
811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
812 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
813 {
814   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
815                                                  (__v16si) __B,
816                                                  (__v16si)
817                                                  _mm512_setzero_si512 (),
818                                                  (__mmask16) __U);
819 }
820
821 extern __inline __m512i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
824 {
825   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
826                                                   (__v16si) __Y,
827                                                   (__v8di)
828                                                   _mm512_undefined_si512 (),
829                                                   (__mmask8) -1);
830 }
831
832 extern __inline __m512i
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
835 {
836   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
837                                                   (__v16si) __Y,
838                                                   (__v8di) __W, __M);
839 }
840
841 extern __inline __m512i
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
844 {
845   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
846                                                   (__v16si) __Y,
847                                                   (__v8di)
848                                                   _mm512_setzero_si512 (),
849                                                   __M);
850 }
851
852 extern __inline __m512i
853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854 _mm512_sub_epi32 (__m512i __A, __m512i __B)
855 {
856   return (__m512i) ((__v16su) __A - (__v16su) __B);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
862 {
863   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
864                                                  (__v16si) __B,
865                                                  (__v16si) __W,
866                                                  (__mmask16) __U);
867 }
868
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
872 {
873   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
874                                                  (__v16si) __B,
875                                                  (__v16si)
876                                                  _mm512_setzero_si512 (),
877                                                  (__mmask16) __U);
878 }
879
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
883 {
884   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
885                                                    (__v16si) __Y,
886                                                    (__v8di)
887                                                    _mm512_undefined_si512 (),
888                                                    (__mmask8) -1);
889 }
890
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
894 {
895   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
896                                                    (__v16si) __Y,
897                                                    (__v8di) __W, __M);
898 }
899
900 extern __inline __m512i
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
903 {
904   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
905                                                    (__v16si) __Y,
906                                                    (__v8di)
907                                                    _mm512_setzero_si512 (),
908                                                    __M);
909 }
910
911 #ifdef __OPTIMIZE__
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
915 {
916   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
917                                                   (__v8di)
918                                                   _mm512_undefined_si512 (),
919                                                   (__mmask8) -1);
920 }
921
922 extern __inline __m512i
923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
925                         unsigned int __B)
926 {
927   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
928                                                   (__v8di) __W,
929                                                   (__mmask8) __U);
930 }
931
932 extern __inline __m512i
933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
935 {
936   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
937                                                   (__v8di)
938                                                   _mm512_setzero_si512 (),
939                                                   (__mmask8) __U);
940 }
941 #else
942 #define _mm512_slli_epi64(X, C)                                            \
943   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
944     (__v8di)(__m512i)_mm512_undefined_si512 (),\
945     (__mmask8)-1))
946
947 #define _mm512_mask_slli_epi64(W, U, X, C)                                 \
948   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
949     (__v8di)(__m512i)(W),\
950     (__mmask8)(U)))
951
952 #define _mm512_maskz_slli_epi64(U, X, C)                                   \
953   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
954     (__v8di)(__m512i)_mm512_setzero_si512 (),\
955     (__mmask8)(U)))
956 #endif
957
958 extern __inline __m512i
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm512_sll_epi64 (__m512i __A, __m128i __B)
961 {
962   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
963                                                  (__v2di) __B,
964                                                  (__v8di)
965                                                  _mm512_undefined_si512 (),
966                                                  (__mmask8) -1);
967 }
968
969 extern __inline __m512i
970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
972 {
973   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
974                                                  (__v2di) __B,
975                                                  (__v8di) __W,
976                                                  (__mmask8) __U);
977 }
978
979 extern __inline __m512i
980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
982 {
983   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
984                                                  (__v2di) __B,
985                                                  (__v8di)
986                                                  _mm512_setzero_si512 (),
987                                                  (__mmask8) __U);
988 }
989
990 #ifdef __OPTIMIZE__
991 extern __inline __m512i
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
994 {
995   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
996                                                   (__v8di)
997                                                   _mm512_undefined_si512 (),
998                                                   (__mmask8) -1);
999 }
1000
1001 extern __inline __m512i
1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1004                         __m512i __A, unsigned int __B)
1005 {
1006   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1007                                                   (__v8di) __W,
1008                                                   (__mmask8) __U);
1009 }
1010
1011 extern __inline __m512i
1012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1014 {
1015   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1016                                                   (__v8di)
1017                                                   _mm512_setzero_si512 (),
1018                                                   (__mmask8) __U);
1019 }
1020 #else
1021 #define _mm512_srli_epi64(X, C)                                            \
1022   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1023     (__v8di)(__m512i)_mm512_undefined_si512 (),\
1024     (__mmask8)-1))
1025
1026 #define _mm512_mask_srli_epi64(W, U, X, C)                                 \
1027   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1028     (__v8di)(__m512i)(W),\
1029     (__mmask8)(U)))
1030
1031 #define _mm512_maskz_srli_epi64(U, X, C)                                   \
1032   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1033     (__v8di)(__m512i)_mm512_setzero_si512 (),\
1034     (__mmask8)(U)))
1035 #endif
1036
1037 extern __inline __m512i
1038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1040 {
1041   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1042                                                  (__v2di) __B,
1043                                                  (__v8di)
1044                                                  _mm512_undefined_si512 (),
1045                                                  (__mmask8) -1);
1046 }
1047
1048 extern __inline __m512i
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1051 {
1052   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1053                                                  (__v2di) __B,
1054                                                  (__v8di) __W,
1055                                                  (__mmask8) __U);
1056 }
1057
1058 extern __inline __m512i
1059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1061 {
1062   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1063                                                  (__v2di) __B,
1064                                                  (__v8di)
1065                                                  _mm512_setzero_si512 (),
1066                                                  (__mmask8) __U);
1067 }
1068
1069 #ifdef __OPTIMIZE__
1070 extern __inline __m512i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1073 {
1074   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1075                                                   (__v8di)
1076                                                   _mm512_undefined_si512 (),
1077                                                   (__mmask8) -1);
1078 }
1079
1080 extern __inline __m512i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1083                         unsigned int __B)
1084 {
1085   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1086                                                   (__v8di) __W,
1087                                                   (__mmask8) __U);
1088 }
1089
1090 extern __inline __m512i
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1093 {
1094   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1095                                                   (__v8di)
1096                                                   _mm512_setzero_si512 (),
1097                                                   (__mmask8) __U);
1098 }
1099 #else
1100 #define _mm512_srai_epi64(X, C)                                            \
1101   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1102     (__v8di)(__m512i)_mm512_undefined_si512 (),\
1103     (__mmask8)-1))
1104
1105 #define _mm512_mask_srai_epi64(W, U, X, C)                                 \
1106   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1107     (__v8di)(__m512i)(W),\
1108     (__mmask8)(U)))
1109
1110 #define _mm512_maskz_srai_epi64(U, X, C)                                   \
1111   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1112     (__v8di)(__m512i)_mm512_setzero_si512 (),\
1113     (__mmask8)(U)))
1114 #endif
1115
1116 extern __inline __m512i
1117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1119 {
1120   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1121                                                  (__v2di) __B,
1122                                                  (__v8di)
1123                                                  _mm512_undefined_si512 (),
1124                                                  (__mmask8) -1);
1125 }
1126
1127 extern __inline __m512i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1130 {
1131   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1132                                                  (__v2di) __B,
1133                                                  (__v8di) __W,
1134                                                  (__mmask8) __U);
1135 }
1136
1137 extern __inline __m512i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1140 {
1141   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1142                                                  (__v2di) __B,
1143                                                  (__v8di)
1144                                                  _mm512_setzero_si512 (),
1145                                                  (__mmask8) __U);
1146 }
1147
1148 #ifdef __OPTIMIZE__
1149 extern __inline __m512i
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1152 {
1153   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1154                                                   (__v16si)
1155                                                   _mm512_undefined_si512 (),
1156                                                   (__mmask16) -1);
1157 }
1158
1159 extern __inline __m512i
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1162                         unsigned int __B)
1163 {
1164   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1165                                                   (__v16si) __W,
1166                                                   (__mmask16) __U);
1167 }
1168
1169 extern __inline __m512i
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1172 {
1173   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1174                                                   (__v16si)
1175                                                   _mm512_setzero_si512 (),
1176                                                   (__mmask16) __U);
1177 }
1178 #else
1179 #define _mm512_slli_epi32(X, C)                                             \
1180   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1181     (__v16si)(__m512i)_mm512_undefined_si512 (),\
1182     (__mmask16)-1))
1183
1184 #define _mm512_mask_slli_epi32(W, U, X, C)                                  \
1185   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1186     (__v16si)(__m512i)(W),\
1187     (__mmask16)(U)))
1188
1189 #define _mm512_maskz_slli_epi32(U, X, C)                                    \
1190   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1191     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1192     (__mmask16)(U)))
1193 #endif
1194
1195 extern __inline __m512i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1198 {
1199   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1200                                                  (__v4si) __B,
1201                                                  (__v16si)
1202                                                  _mm512_undefined_si512 (),
1203                                                  (__mmask16) -1);
1204 }
1205
1206 extern __inline __m512i
1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1209 {
1210   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1211                                                  (__v4si) __B,
1212                                                  (__v16si) __W,
1213                                                  (__mmask16) __U);
1214 }
1215
1216 extern __inline __m512i
1217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1219 {
1220   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1221                                                  (__v4si) __B,
1222                                                  (__v16si)
1223                                                  _mm512_setzero_si512 (),
1224                                                  (__mmask16) __U);
1225 }
1226
1227 #ifdef __OPTIMIZE__
1228 extern __inline __m512i
1229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1230 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1231 {
1232   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1233                                                   (__v16si)
1234                                                   _mm512_undefined_si512 (),
1235                                                   (__mmask16) -1);
1236 }
1237
1238 extern __inline __m512i
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1241                         __m512i __A, unsigned int __B)
1242 {
1243   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1244                                                   (__v16si) __W,
1245                                                   (__mmask16) __U);
1246 }
1247
1248 extern __inline __m512i
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1251 {
1252   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1253                                                   (__v16si)
1254                                                   _mm512_setzero_si512 (),
1255                                                   (__mmask16) __U);
1256 }
1257 #else
1258 #define _mm512_srli_epi32(X, C)                                             \
1259   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1260     (__v16si)(__m512i)_mm512_undefined_si512 (),\
1261     (__mmask16)-1))
1262
1263 #define _mm512_mask_srli_epi32(W, U, X, C)                                  \
1264   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1265     (__v16si)(__m512i)(W),\
1266     (__mmask16)(U)))
1267
1268 #define _mm512_maskz_srli_epi32(U, X, C)                                    \
1269   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1270     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1271     (__mmask16)(U)))
1272 #endif
1273
1274 extern __inline __m512i
1275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1277 {
1278   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1279                                                  (__v4si) __B,
1280                                                  (__v16si)
1281                                                  _mm512_undefined_si512 (),
1282                                                  (__mmask16) -1);
1283 }
1284
1285 extern __inline __m512i
1286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1288 {
1289   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1290                                                  (__v4si) __B,
1291                                                  (__v16si) __W,
1292                                                  (__mmask16) __U);
1293 }
1294
1295 extern __inline __m512i
1296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1298 {
1299   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1300                                                  (__v4si) __B,
1301                                                  (__v16si)
1302                                                  _mm512_setzero_si512 (),
1303                                                  (__mmask16) __U);
1304 }
1305
1306 #ifdef __OPTIMIZE__
1307 extern __inline __m512i
1308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1310 {
1311   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1312                                                   (__v16si)
1313                                                   _mm512_undefined_si512 (),
1314                                                   (__mmask16) -1);
1315 }
1316
1317 extern __inline __m512i
1318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1320                         unsigned int __B)
1321 {
1322   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1323                                                   (__v16si) __W,
1324                                                   (__mmask16) __U);
1325 }
1326
1327 extern __inline __m512i
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1330 {
1331   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1332                                                   (__v16si)
1333                                                   _mm512_setzero_si512 (),
1334                                                   (__mmask16) __U);
1335 }
1336 #else
1337 #define _mm512_srai_epi32(X, C)                                             \
1338   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1339     (__v16si)(__m512i)_mm512_undefined_si512 (),\
1340     (__mmask16)-1))
1341
1342 #define _mm512_mask_srai_epi32(W, U, X, C)                                  \
1343   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1344     (__v16si)(__m512i)(W),\
1345     (__mmask16)(U)))
1346
1347 #define _mm512_maskz_srai_epi32(U, X, C)                                    \
1348   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1349     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1350     (__mmask16)(U)))
1351 #endif
1352
1353 extern __inline __m512i
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1356 {
1357   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1358                                                  (__v4si) __B,
1359                                                  (__v16si)
1360                                                  _mm512_undefined_si512 (),
1361                                                  (__mmask16) -1);
1362 }
1363
1364 extern __inline __m512i
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1367 {
1368   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1369                                                  (__v4si) __B,
1370                                                  (__v16si) __W,
1371                                                  (__mmask16) __U);
1372 }
1373
1374 extern __inline __m512i
1375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1377 {
1378   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1379                                                  (__v4si) __B,
1380                                                  (__v16si)
1381                                                  _mm512_setzero_si512 (),
1382                                                  (__mmask16) __U);
1383 }
1384
1385 #ifdef __OPTIMIZE__
1386 extern __inline __m128d
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1389 {
1390   return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1391                                                (__v2df) __B,
1392                                                __R);
1393 }
1394
1395 extern __inline __m128
1396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1398 {
1399   return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1400                                               (__v4sf) __B,
1401                                               __R);
1402 }
1403
1404 extern __inline __m128d
1405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1407 {
1408   return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1409                                                (__v2df) __B,
1410                                                __R);
1411 }
1412
1413 extern __inline __m128
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1416 {
1417   return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1418                                               (__v4sf) __B,
1419                                               __R);
1420 }
1421
1422 #else
1423 #define _mm_add_round_sd(A, B, C)            \
1424     (__m128d)__builtin_ia32_addsd_round(A, B, C)
1425
1426 #define _mm_add_round_ss(A, B, C)            \
1427     (__m128)__builtin_ia32_addss_round(A, B, C)
1428
1429 #define _mm_sub_round_sd(A, B, C)            \
1430     (__m128d)__builtin_ia32_subsd_round(A, B, C)
1431
1432 #define _mm_sub_round_ss(A, B, C)            \
1433     (__m128)__builtin_ia32_subss_round(A, B, C)
1434 #endif
1435
1436 #ifdef __OPTIMIZE__
1437 extern __inline __m512i
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1440 {
1441   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1442                                                      (__v8di) __B,
1443                                                      (__v8di) __C, imm,
1444                                                      (__mmask8) -1);
1445 }
1446
1447 extern __inline __m512i
1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1450                                 __m512i __C, const int imm)
1451 {
1452   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1453                                                      (__v8di) __B,
1454                                                      (__v8di) __C, imm,
1455                                                      (__mmask8) __U);
1456 }
1457
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1461                                  __m512i __C, const int imm)
1462 {
1463   return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1464                                                       (__v8di) __B,
1465                                                       (__v8di) __C,
1466                                                       imm, (__mmask8) __U);
1467 }
1468
1469 extern __inline __m512i
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1472 {
1473   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1474                                                      (__v16si) __B,
1475                                                      (__v16si) __C,
1476                                                      imm, (__mmask16) -1);
1477 }
1478
1479 extern __inline __m512i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1482                                 __m512i __C, const int imm)
1483 {
1484   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1485                                                      (__v16si) __B,
1486                                                      (__v16si) __C,
1487                                                      imm, (__mmask16) __U);
1488 }
1489
1490 extern __inline __m512i
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1493                                  __m512i __C, const int imm)
1494 {
1495   return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1496                                                       (__v16si) __B,
1497                                                       (__v16si) __C,
1498                                                       imm, (__mmask16) __U);
1499 }
1500 #else
1501 #define _mm512_ternarylogic_epi64(A, B, C, I)                           \
1502   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),    \
1503     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1504 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)                   \
1505   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),    \
1506     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1507 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)                  \
1508   ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),   \
1509     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1510 #define _mm512_ternarylogic_epi32(A, B, C, I)                           \
1511   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),   \
1512     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1513     (__mmask16)-1))
1514 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)                   \
1515   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),   \
1516     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1517     (__mmask16)(U)))
1518 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)                  \
1519   ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),  \
1520     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1521     (__mmask16)(U)))
1522 #endif
1523
1524 extern __inline __m512d
1525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526 _mm512_rcp14_pd (__m512d __A)
1527 {
1528   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1529                                                    (__v8df)
1530                                                    _mm512_undefined_pd (),
1531                                                    (__mmask8) -1);
1532 }
1533
1534 extern __inline __m512d
1535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1537 {
1538   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1539                                                    (__v8df) __W,
1540                                                    (__mmask8) __U);
1541 }
1542
1543 extern __inline __m512d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1546 {
1547   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548                                                    (__v8df)
1549                                                    _mm512_setzero_pd (),
1550                                                    (__mmask8) __U);
1551 }
1552
1553 extern __inline __m512
1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm512_rcp14_ps (__m512 __A)
1556 {
1557   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1558                                                   (__v16sf)
1559                                                   _mm512_undefined_ps (),
1560                                                   (__mmask16) -1);
1561 }
1562
1563 extern __inline __m512
1564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1566 {
1567   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1568                                                   (__v16sf) __W,
1569                                                   (__mmask16) __U);
1570 }
1571
1572 extern __inline __m512
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1575 {
1576   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577                                                   (__v16sf)
1578                                                   _mm512_setzero_ps (),
1579                                                   (__mmask16) __U);
1580 }
1581
1582 extern __inline __m128d
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm_rcp14_sd (__m128d __A, __m128d __B)
1585 {
1586   return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1587                                            (__v2df) __A);
1588 }
1589
1590 extern __inline __m128
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm_rcp14_ss (__m128 __A, __m128 __B)
1593 {
1594   return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1595                                           (__v4sf) __A);
1596 }
1597
1598 extern __inline __m512d
1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 _mm512_rsqrt14_pd (__m512d __A)
1601 {
1602   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1603                                                      (__v8df)
1604                                                      _mm512_undefined_pd (),
1605                                                      (__mmask8) -1);
1606 }
1607
1608 extern __inline __m512d
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1611 {
1612   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1613                                                      (__v8df) __W,
1614                                                      (__mmask8) __U);
1615 }
1616
1617 extern __inline __m512d
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1620 {
1621   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622                                                      (__v8df)
1623                                                      _mm512_setzero_pd (),
1624                                                      (__mmask8) __U);
1625 }
1626
1627 extern __inline __m512
1628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629 _mm512_rsqrt14_ps (__m512 __A)
1630 {
1631   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1632                                                     (__v16sf)
1633                                                     _mm512_undefined_ps (),
1634                                                     (__mmask16) -1);
1635 }
1636
1637 extern __inline __m512
1638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1640 {
1641   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1642                                                     (__v16sf) __W,
1643                                                     (__mmask16) __U);
1644 }
1645
1646 extern __inline __m512
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1649 {
1650   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651                                                     (__v16sf)
1652                                                     _mm512_setzero_ps (),
1653                                                     (__mmask16) __U);
1654 }
1655
1656 extern __inline __m128d
1657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1659 {
1660   return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1661                                              (__v2df) __A);
1662 }
1663
1664 extern __inline __m128
1665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1667 {
1668   return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1669                                             (__v4sf) __A);
1670 }
1671
1672 #ifdef __OPTIMIZE__
1673 extern __inline __m512d
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1676 {
1677   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1678                                                   (__v8df)
1679                                                   _mm512_undefined_pd (),
1680                                                   (__mmask8) -1, __R);
1681 }
1682
1683 extern __inline __m512d
1684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1686                            const int __R)
1687 {
1688   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1689                                                   (__v8df) __W,
1690                                                   (__mmask8) __U, __R);
1691 }
1692
1693 extern __inline __m512d
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1696 {
1697   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698                                                   (__v8df)
1699                                                   _mm512_setzero_pd (),
1700                                                   (__mmask8) __U, __R);
1701 }
1702
1703 extern __inline __m512
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1706 {
1707   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1708                                                  (__v16sf)
1709                                                  _mm512_undefined_ps (),
1710                                                  (__mmask16) -1, __R);
1711 }
1712
1713 extern __inline __m512
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1716 {
1717   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1718                                                  (__v16sf) __W,
1719                                                  (__mmask16) __U, __R);
1720 }
1721
1722 extern __inline __m512
1723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1725 {
1726   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727                                                  (__v16sf)
1728                                                  _mm512_setzero_ps (),
1729                                                  (__mmask16) __U, __R);
1730 }
1731
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1735 {
1736   return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1737                                                 (__v2df) __A,
1738                                                 __R);
1739 }
1740
1741 extern __inline __m128
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1744 {
1745   return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1746                                                (__v4sf) __A,
1747                                                __R);
1748 }
1749 #else
1750 #define _mm512_sqrt_round_pd(A, C)            \
1751     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1752
1753 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1754     (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1755
1756 #define _mm512_maskz_sqrt_round_pd(U, A, C)   \
1757     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1758
1759 #define _mm512_sqrt_round_ps(A, C)            \
1760     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1761
1762 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1763     (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1764
1765 #define _mm512_maskz_sqrt_round_ps(U, A, C)   \
1766     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1767
1768 #define _mm_sqrt_round_sd(A, B, C)            \
1769     (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1770
1771 #define _mm_sqrt_round_ss(A, B, C)            \
1772     (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1773 #endif
1774
1775 extern __inline __m512i
1776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777 _mm512_cvtepi8_epi32 (__m128i __A)
1778 {
1779   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1780                                                     (__v16si)
1781                                                     _mm512_undefined_si512 (),
1782                                                     (__mmask16) -1);
1783 }
1784
1785 extern __inline __m512i
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1788 {
1789   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1790                                                     (__v16si) __W,
1791                                                     (__mmask16) __U);
1792 }
1793
1794 extern __inline __m512i
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1797 {
1798   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799                                                     (__v16si)
1800                                                     _mm512_setzero_si512 (),
1801                                                     (__mmask16) __U);
1802 }
1803
1804 extern __inline __m512i
1805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806 _mm512_cvtepi8_epi64 (__m128i __A)
1807 {
1808   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1809                                                     (__v8di)
1810                                                     _mm512_undefined_si512 (),
1811                                                     (__mmask8) -1);
1812 }
1813
1814 extern __inline __m512i
1815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1816 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1817 {
1818   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1819                                                     (__v8di) __W,
1820                                                     (__mmask8) __U);
1821 }
1822
1823 extern __inline __m512i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1826 {
1827   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828                                                     (__v8di)
1829                                                     _mm512_setzero_si512 (),
1830                                                     (__mmask8) __U);
1831 }
1832
1833 extern __inline __m512i
1834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835 _mm512_cvtepi16_epi32 (__m256i __A)
1836 {
1837   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1838                                                     (__v16si)
1839                                                     _mm512_undefined_si512 (),
1840                                                     (__mmask16) -1);
1841 }
1842
1843 extern __inline __m512i
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1846 {
1847   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1848                                                     (__v16si) __W,
1849                                                     (__mmask16) __U);
1850 }
1851
1852 extern __inline __m512i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1855 {
1856   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857                                                     (__v16si)
1858                                                     _mm512_setzero_si512 (),
1859                                                     (__mmask16) __U);
1860 }
1861
1862 extern __inline __m512i
1863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864 _mm512_cvtepi16_epi64 (__m128i __A)
1865 {
1866   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1867                                                     (__v8di)
1868                                                     _mm512_undefined_si512 (),
1869                                                     (__mmask8) -1);
1870 }
1871
1872 extern __inline __m512i
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1875 {
1876   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1877                                                     (__v8di) __W,
1878                                                     (__mmask8) __U);
1879 }
1880
1881 extern __inline __m512i
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1884 {
1885   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886                                                     (__v8di)
1887                                                     _mm512_setzero_si512 (),
1888                                                     (__mmask8) __U);
1889 }
1890
1891 extern __inline __m512i
1892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893 _mm512_cvtepi32_epi64 (__m256i __X)
1894 {
1895   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1896                                                     (__v8di)
1897                                                     _mm512_undefined_si512 (),
1898                                                     (__mmask8) -1);
1899 }
1900
1901 extern __inline __m512i
1902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1904 {
1905   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1906                                                     (__v8di) __W,
1907                                                     (__mmask8) __U);
1908 }
1909
1910 extern __inline __m512i
1911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1913 {
1914   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915                                                     (__v8di)
1916                                                     _mm512_setzero_si512 (),
1917                                                     (__mmask8) __U);
1918 }
1919
1920 extern __inline __m512i
1921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1922 _mm512_cvtepu8_epi32 (__m128i __A)
1923 {
1924   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1925                                                     (__v16si)
1926                                                     _mm512_undefined_si512 (),
1927                                                     (__mmask16) -1);
1928 }
1929
1930 extern __inline __m512i
1931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1933 {
1934   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1935                                                     (__v16si) __W,
1936                                                     (__mmask16) __U);
1937 }
1938
1939 extern __inline __m512i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1942 {
1943   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944                                                     (__v16si)
1945                                                     _mm512_setzero_si512 (),
1946                                                     (__mmask16) __U);
1947 }
1948
1949 extern __inline __m512i
1950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951 _mm512_cvtepu8_epi64 (__m128i __A)
1952 {
1953   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1954                                                     (__v8di)
1955                                                     _mm512_undefined_si512 (),
1956                                                     (__mmask8) -1);
1957 }
1958
1959 extern __inline __m512i
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1962 {
1963   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1964                                                     (__v8di) __W,
1965                                                     (__mmask8) __U);
1966 }
1967
1968 extern __inline __m512i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1971 {
1972   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973                                                     (__v8di)
1974                                                     _mm512_setzero_si512 (),
1975                                                     (__mmask8) __U);
1976 }
1977
1978 extern __inline __m512i
1979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1980 _mm512_cvtepu16_epi32 (__m256i __A)
1981 {
1982   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1983                                                     (__v16si)
1984                                                     _mm512_undefined_si512 (),
1985                                                     (__mmask16) -1);
1986 }
1987
1988 extern __inline __m512i
1989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1990 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1991 {
1992   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1993                                                     (__v16si) __W,
1994                                                     (__mmask16) __U);
1995 }
1996
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2000 {
2001   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002                                                     (__v16si)
2003                                                     _mm512_setzero_si512 (),
2004                                                     (__mmask16) __U);
2005 }
2006
2007 extern __inline __m512i
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_cvtepu16_epi64 (__m128i __A)
2010 {
2011   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2012                                                     (__v8di)
2013                                                     _mm512_undefined_si512 (),
2014                                                     (__mmask8) -1);
2015 }
2016
2017 extern __inline __m512i
2018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2020 {
2021   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2022                                                     (__v8di) __W,
2023                                                     (__mmask8) __U);
2024 }
2025
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2029 {
2030   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031                                                     (__v8di)
2032                                                     _mm512_setzero_si512 (),
2033                                                     (__mmask8) __U);
2034 }
2035
2036 extern __inline __m512i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm512_cvtepu32_epi64 (__m256i __X)
2039 {
2040   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2041                                                     (__v8di)
2042                                                     _mm512_undefined_si512 (),
2043                                                     (__mmask8) -1);
2044 }
2045
2046 extern __inline __m512i
2047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2049 {
2050   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2051                                                     (__v8di) __W,
2052                                                     (__mmask8) __U);
2053 }
2054
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2058 {
2059   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060                                                     (__v8di)
2061                                                     _mm512_setzero_si512 (),
2062                                                     (__mmask8) __U);
2063 }
2064
2065 #ifdef __OPTIMIZE__
2066 extern __inline __m512d
2067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2069 {
2070   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2071                                                  (__v8df) __B,
2072                                                  (__v8df)
2073                                                  _mm512_undefined_pd (),
2074                                                  (__mmask8) -1, __R);
2075 }
2076
2077 extern __inline __m512d
2078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2079 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2080                           __m512d __B, const int __R)
2081 {
2082   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2083                                                  (__v8df) __B,
2084                                                  (__v8df) __W,
2085                                                  (__mmask8) __U, __R);
2086 }
2087
2088 extern __inline __m512d
2089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2091                            const int __R)
2092 {
2093   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2094                                                  (__v8df) __B,
2095                                                  (__v8df)
2096                                                  _mm512_setzero_pd (),
2097                                                  (__mmask8) __U, __R);
2098 }
2099
2100 extern __inline __m512
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2103 {
2104   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2105                                                 (__v16sf) __B,
2106                                                 (__v16sf)
2107                                                 _mm512_undefined_ps (),
2108                                                 (__mmask16) -1, __R);
2109 }
2110
2111 extern __inline __m512
2112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2114                           __m512 __B, const int __R)
2115 {
2116   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2117                                                 (__v16sf) __B,
2118                                                 (__v16sf) __W,
2119                                                 (__mmask16) __U, __R);
2120 }
2121
2122 extern __inline __m512
2123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2125 {
2126   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2127                                                 (__v16sf) __B,
2128                                                 (__v16sf)
2129                                                 _mm512_setzero_ps (),
2130                                                 (__mmask16) __U, __R);
2131 }
2132
2133 extern __inline __m512d
2134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2136 {
2137   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2138                                                  (__v8df) __B,
2139                                                  (__v8df)
2140                                                  _mm512_undefined_pd (),
2141                                                  (__mmask8) -1, __R);
2142 }
2143
2144 extern __inline __m512d
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147                           __m512d __B, const int __R)
2148 {
2149   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2150                                                  (__v8df) __B,
2151                                                  (__v8df) __W,
2152                                                  (__mmask8) __U, __R);
2153 }
2154
2155 extern __inline __m512d
2156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158                            const int __R)
2159 {
2160   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2161                                                  (__v8df) __B,
2162                                                  (__v8df)
2163                                                  _mm512_setzero_pd (),
2164                                                  (__mmask8) __U, __R);
2165 }
2166
2167 extern __inline __m512
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2170 {
2171   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2172                                                 (__v16sf) __B,
2173                                                 (__v16sf)
2174                                                 _mm512_undefined_ps (),
2175                                                 (__mmask16) -1, __R);
2176 }
2177
2178 extern __inline __m512
2179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181                           __m512 __B, const int __R)
2182 {
2183   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2184                                                 (__v16sf) __B,
2185                                                 (__v16sf) __W,
2186                                                 (__mmask16) __U, __R);
2187 }
2188
2189 extern __inline __m512
2190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2192 {
2193   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2194                                                 (__v16sf) __B,
2195                                                 (__v16sf)
2196                                                 _mm512_setzero_ps (),
2197                                                 (__mmask16) __U, __R);
2198 }
2199 #else
2200 #define _mm512_add_round_pd(A, B, C)            \
2201     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2202
2203 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2204     (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2205
2206 #define _mm512_maskz_add_round_pd(U, A, B, C)   \
2207     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2208
2209 #define _mm512_add_round_ps(A, B, C)            \
2210     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2211
2212 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2213     (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2214
2215 #define _mm512_maskz_add_round_ps(U, A, B, C)   \
2216     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2217
2218 #define _mm512_sub_round_pd(A, B, C)            \
2219     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2220
2221 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2222     (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2223
2224 #define _mm512_maskz_sub_round_pd(U, A, B, C)   \
2225     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2226
2227 #define _mm512_sub_round_ps(A, B, C)            \
2228     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2229
2230 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2231     (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2232
2233 #define _mm512_maskz_sub_round_ps(U, A, B, C)   \
2234     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2235 #endif
2236
2237 #ifdef __OPTIMIZE__
2238 extern __inline __m512d
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2241 {
2242   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2243                                                  (__v8df) __B,
2244                                                  (__v8df)
2245                                                  _mm512_undefined_pd (),
2246                                                  (__mmask8) -1, __R);
2247 }
2248
2249 extern __inline __m512d
2250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2252                           __m512d __B, const int __R)
2253 {
2254   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2255                                                  (__v8df) __B,
2256                                                  (__v8df) __W,
2257                                                  (__mmask8) __U, __R);
2258 }
2259
2260 extern __inline __m512d
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2263                            const int __R)
2264 {
2265   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2266                                                  (__v8df) __B,
2267                                                  (__v8df)
2268                                                  _mm512_setzero_pd (),
2269                                                  (__mmask8) __U, __R);
2270 }
2271
2272 extern __inline __m512
2273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2275 {
2276   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2277                                                 (__v16sf) __B,
2278                                                 (__v16sf)
2279                                                 _mm512_undefined_ps (),
2280                                                 (__mmask16) -1, __R);
2281 }
2282
2283 extern __inline __m512
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2286                           __m512 __B, const int __R)
2287 {
2288   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2289                                                 (__v16sf) __B,
2290                                                 (__v16sf) __W,
2291                                                 (__mmask16) __U, __R);
2292 }
2293
2294 extern __inline __m512
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2297 {
2298   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2299                                                 (__v16sf) __B,
2300                                                 (__v16sf)
2301                                                 _mm512_setzero_ps (),
2302                                                 (__mmask16) __U, __R);
2303 }
2304
2305 extern __inline __m512d
2306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2308 {
2309   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2310                                                  (__v8df) __V,
2311                                                  (__v8df)
2312                                                  _mm512_undefined_pd (),
2313                                                  (__mmask8) -1, __R);
2314 }
2315
2316 extern __inline __m512d
2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2319                           __m512d __V, const int __R)
2320 {
2321   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2322                                                  (__v8df) __V,
2323                                                  (__v8df) __W,
2324                                                  (__mmask8) __U, __R);
2325 }
2326
2327 extern __inline __m512d
2328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2330                            const int __R)
2331 {
2332   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2333                                                  (__v8df) __V,
2334                                                  (__v8df)
2335                                                  _mm512_setzero_pd (),
2336                                                  (__mmask8) __U, __R);
2337 }
2338
2339 extern __inline __m512
2340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2342 {
2343   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2344                                                 (__v16sf) __B,
2345                                                 (__v16sf)
2346                                                 _mm512_undefined_ps (),
2347                                                 (__mmask16) -1, __R);
2348 }
2349
2350 extern __inline __m512
2351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2353                           __m512 __B, const int __R)
2354 {
2355   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2356                                                 (__v16sf) __B,
2357                                                 (__v16sf) __W,
2358                                                 (__mmask16) __U, __R);
2359 }
2360
2361 extern __inline __m512
2362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2364 {
2365   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2366                                                 (__v16sf) __B,
2367                                                 (__v16sf)
2368                                                 _mm512_setzero_ps (),
2369                                                 (__mmask16) __U, __R);
2370 }
2371
2372 extern __inline __m128d
2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2375 {
2376   return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2377                                                (__v2df) __B,
2378                                                __R);
2379 }
2380
2381 extern __inline __m128
2382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2384 {
2385   return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2386                                               (__v4sf) __B,
2387                                               __R);
2388 }
2389
2390 extern __inline __m128d
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2393 {
2394   return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2395                                                (__v2df) __B,
2396                                                __R);
2397 }
2398
2399 extern __inline __m128
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2402 {
2403   return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2404                                               (__v4sf) __B,
2405                                               __R);
2406 }
2407
2408 #else
2409 #define _mm512_mul_round_pd(A, B, C)            \
2410     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2411
2412 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2413     (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2414
2415 #define _mm512_maskz_mul_round_pd(U, A, B, C)   \
2416     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2417
2418 #define _mm512_mul_round_ps(A, B, C)            \
2419     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2420
2421 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2422     (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2423
2424 #define _mm512_maskz_mul_round_ps(U, A, B, C)   \
2425     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2426
2427 #define _mm512_div_round_pd(A, B, C)            \
2428     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2429
2430 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2431     (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2432
2433 #define _mm512_maskz_div_round_pd(U, A, B, C)   \
2434     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2435
2436 #define _mm512_div_round_ps(A, B, C)            \
2437     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2438
2439 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2440     (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2441
2442 #define _mm512_maskz_div_round_ps(U, A, B, C)   \
2443     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2444
2445 #define _mm_mul_round_sd(A, B, C)            \
2446     (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2447
2448 #define _mm_mul_round_ss(A, B, C)            \
2449     (__m128)__builtin_ia32_mulss_round(A, B, C)
2450
2451 #define _mm_div_round_sd(A, B, C)            \
2452     (__m128d)__builtin_ia32_divsd_round(A, B, C)
2453
2454 #define _mm_div_round_ss(A, B, C)            \
2455     (__m128)__builtin_ia32_divss_round(A, B, C)
2456 #endif
2457
2458 #ifdef __OPTIMIZE__
2459 extern __inline __m512d
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2462 {
2463   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2464                                                  (__v8df) __B,
2465                                                  (__v8df)
2466                                                  _mm512_undefined_pd (),
2467                                                  (__mmask8) -1, __R);
2468 }
2469
2470 extern __inline __m512d
2471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2472 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2473                           __m512d __B, const int __R)
2474 {
2475   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2476                                                  (__v8df) __B,
2477                                                  (__v8df) __W,
2478                                                  (__mmask8) __U, __R);
2479 }
2480
2481 extern __inline __m512d
2482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2484                            const int __R)
2485 {
2486   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2487                                                  (__v8df) __B,
2488                                                  (__v8df)
2489                                                  _mm512_setzero_pd (),
2490                                                  (__mmask8) __U, __R);
2491 }
2492
2493 extern __inline __m512
2494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2496 {
2497   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2498                                                 (__v16sf) __B,
2499                                                 (__v16sf)
2500                                                 _mm512_undefined_ps (),
2501                                                 (__mmask16) -1, __R);
2502 }
2503
2504 extern __inline __m512
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2507                           __m512 __B, const int __R)
2508 {
2509   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2510                                                 (__v16sf) __B,
2511                                                 (__v16sf) __W,
2512                                                 (__mmask16) __U, __R);
2513 }
2514
2515 extern __inline __m512
2516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2517 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2518 {
2519   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2520                                                 (__v16sf) __B,
2521                                                 (__v16sf)
2522                                                 _mm512_setzero_ps (),
2523                                                 (__mmask16) __U, __R);
2524 }
2525
2526 extern __inline __m512d
2527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2529 {
2530   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2531                                                  (__v8df) __B,
2532                                                  (__v8df)
2533                                                  _mm512_undefined_pd (),
2534                                                  (__mmask8) -1, __R);
2535 }
2536
2537 extern __inline __m512d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540                           __m512d __B, const int __R)
2541 {
2542   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2543                                                  (__v8df) __B,
2544                                                  (__v8df) __W,
2545                                                  (__mmask8) __U, __R);
2546 }
2547
2548 extern __inline __m512d
2549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551                            const int __R)
2552 {
2553   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2554                                                  (__v8df) __B,
2555                                                  (__v8df)
2556                                                  _mm512_setzero_pd (),
2557                                                  (__mmask8) __U, __R);
2558 }
2559
2560 extern __inline __m512
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2563 {
2564   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2565                                                 (__v16sf) __B,
2566                                                 (__v16sf)
2567                                                 _mm512_undefined_ps (),
2568                                                 (__mmask16) -1, __R);
2569 }
2570
2571 extern __inline __m512
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574                           __m512 __B, const int __R)
2575 {
2576   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2577                                                 (__v16sf) __B,
2578                                                 (__v16sf) __W,
2579                                                 (__mmask16) __U, __R);
2580 }
2581
2582 extern __inline __m512
2583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2585 {
2586   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2587                                                 (__v16sf) __B,
2588                                                 (__v16sf)
2589                                                 _mm512_setzero_ps (),
2590                                                 (__mmask16) __U, __R);
2591 }
2592 #else
2593 #define _mm512_max_round_pd(A, B,  R) \
2594     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2595
2596 #define _mm512_mask_max_round_pd(W, U,  A, B, R) \
2597     (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2598
2599 #define _mm512_maskz_max_round_pd(U, A,  B, R) \
2600     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2601
2602 #define _mm512_max_round_ps(A, B,  R) \
2603     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2604
2605 #define _mm512_mask_max_round_ps(W, U,  A, B, R) \
2606     (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2607
2608 #define _mm512_maskz_max_round_ps(U, A,  B, R) \
2609     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2610
2611 #define _mm512_min_round_pd(A, B,  R) \
2612     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2613
2614 #define _mm512_mask_min_round_pd(W, U,  A, B, R) \
2615     (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2616
2617 #define _mm512_maskz_min_round_pd(U, A,  B, R) \
2618     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2619
2620 #define _mm512_min_round_ps(A, B, R) \
2621     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2622
2623 #define _mm512_mask_min_round_ps(W, U,  A, B, R) \
2624     (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2625
2626 #define _mm512_maskz_min_round_ps(U, A,  B, R) \
2627     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2628 #endif
2629
2630 #ifdef __OPTIMIZE__
2631 extern __inline __m512d
2632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2634 {
2635   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2636                                                     (__v8df) __B,
2637                                                     (__v8df)
2638                                                     _mm512_undefined_pd (),
2639                                                     (__mmask8) -1, __R);
2640 }
2641
2642 extern __inline __m512d
2643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2645                              __m512d __B, const int __R)
2646 {
2647   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2648                                                     (__v8df) __B,
2649                                                     (__v8df) __W,
2650                                                     (__mmask8) __U, __R);
2651 }
2652
2653 extern __inline __m512d
2654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2656                               const int __R)
2657 {
2658   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2659                                                     (__v8df) __B,
2660                                                     (__v8df)
2661                                                     _mm512_setzero_pd (),
2662                                                     (__mmask8) __U, __R);
2663 }
2664
2665 extern __inline __m512
2666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2668 {
2669   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2670                                                    (__v16sf) __B,
2671                                                    (__v16sf)
2672                                                    _mm512_undefined_ps (),
2673                                                    (__mmask16) -1, __R);
2674 }
2675
2676 extern __inline __m512
2677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2679                              __m512 __B, const int __R)
2680 {
2681   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2682                                                    (__v16sf) __B,
2683                                                    (__v16sf) __W,
2684                                                    (__mmask16) __U, __R);
2685 }
2686
2687 extern __inline __m512
2688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2689 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2690                               const int __R)
2691 {
2692   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2693                                                    (__v16sf) __B,
2694                                                    (__v16sf)
2695                                                    _mm512_setzero_ps (),
2696                                                    (__mmask16) __U, __R);
2697 }
2698
2699 extern __inline __m128d
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2702 {
2703   return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2704                                                   (__v2df) __B,
2705                                                   __R);
2706 }
2707
2708 extern __inline __m128
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2711 {
2712   return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2713                                                  (__v4sf) __B,
2714                                                  __R);
2715 }
2716 #else
2717 #define _mm512_scalef_round_pd(A, B, C)            \
2718     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2719
2720 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2721     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2722
2723 #define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
2724     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2725
2726 #define _mm512_scalef_round_ps(A, B, C)            \
2727     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2728
2729 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2730     (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2731
2732 #define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
2733     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2734
2735 #define _mm_scalef_round_sd(A, B, C)            \
2736     (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2737
2738 #define _mm_scalef_round_ss(A, B, C)            \
2739     (__m128)__builtin_ia32_scalefss_round(A, B, C)
2740 #endif
2741
2742 #ifdef __OPTIMIZE__
2743 extern __inline __m512d
2744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2746 {
2747   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2748                                                     (__v8df) __B,
2749                                                     (__v8df) __C,
2750                                                     (__mmask8) -1, __R);
2751 }
2752
2753 extern __inline __m512d
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2756                             __m512d __C, const int __R)
2757 {
2758   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759                                                     (__v8df) __B,
2760                                                     (__v8df) __C,
2761                                                     (__mmask8) __U, __R);
2762 }
2763
2764 extern __inline __m512d
2765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2767                              __mmask8 __U, const int __R)
2768 {
2769   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2770                                                      (__v8df) __B,
2771                                                      (__v8df) __C,
2772                                                      (__mmask8) __U, __R);
2773 }
2774
2775 extern __inline __m512d
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2778                              __m512d __C, const int __R)
2779 {
2780   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2781                                                      (__v8df) __B,
2782                                                      (__v8df) __C,
2783                                                      (__mmask8) __U, __R);
2784 }
2785
2786 extern __inline __m512
2787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2789 {
2790   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791                                                    (__v16sf) __B,
2792                                                    (__v16sf) __C,
2793                                                    (__mmask16) -1, __R);
2794 }
2795
2796 extern __inline __m512
2797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2798 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2799                             __m512 __C, const int __R)
2800 {
2801   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2802                                                    (__v16sf) __B,
2803                                                    (__v16sf) __C,
2804                                                    (__mmask16) __U, __R);
2805 }
2806
2807 extern __inline __m512
2808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2810                              __mmask16 __U, const int __R)
2811 {
2812   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2813                                                     (__v16sf) __B,
2814                                                     (__v16sf) __C,
2815                                                     (__mmask16) __U, __R);
2816 }
2817
2818 extern __inline __m512
2819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2821                              __m512 __C, const int __R)
2822 {
2823   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2824                                                     (__v16sf) __B,
2825                                                     (__v16sf) __C,
2826                                                     (__mmask16) __U, __R);
2827 }
2828
2829 extern __inline __m512d
2830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2832 {
2833   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2834                                                     (__v8df) __B,
2835                                                     -(__v8df) __C,
2836                                                     (__mmask8) -1, __R);
2837 }
2838
2839 extern __inline __m512d
2840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2842                             __m512d __C, const int __R)
2843 {
2844   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2845                                                     (__v8df) __B,
2846                                                     -(__v8df) __C,
2847                                                     (__mmask8) __U, __R);
2848 }
2849
2850 extern __inline __m512d
2851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2853                              __mmask8 __U, const int __R)
2854 {
2855   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2856                                                      (__v8df) __B,
2857                                                      (__v8df) __C,
2858                                                      (__mmask8) __U, __R);
2859 }
2860
2861 extern __inline __m512d
2862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2864                              __m512d __C, const int __R)
2865 {
2866   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2867                                                      (__v8df) __B,
2868                                                      -(__v8df) __C,
2869                                                      (__mmask8) __U, __R);
2870 }
2871
2872 extern __inline __m512
2873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2874 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2875 {
2876   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2877                                                    (__v16sf) __B,
2878                                                    -(__v16sf) __C,
2879                                                    (__mmask16) -1, __R);
2880 }
2881
2882 extern __inline __m512
2883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2884 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2885                             __m512 __C, const int __R)
2886 {
2887   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2888                                                    (__v16sf) __B,
2889                                                    -(__v16sf) __C,
2890                                                    (__mmask16) __U, __R);
2891 }
2892
2893 extern __inline __m512
2894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2896                              __mmask16 __U, const int __R)
2897 {
2898   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2899                                                     (__v16sf) __B,
2900                                                     (__v16sf) __C,
2901                                                     (__mmask16) __U, __R);
2902 }
2903
2904 extern __inline __m512
2905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2907                              __m512 __C, const int __R)
2908 {
2909   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2910                                                     (__v16sf) __B,
2911                                                     -(__v16sf) __C,
2912                                                     (__mmask16) __U, __R);
2913 }
2914
2915 extern __inline __m512d
2916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2918 {
2919   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920                                                        (__v8df) __B,
2921                                                        (__v8df) __C,
2922                                                        (__mmask8) -1, __R);
2923 }
2924
2925 extern __inline __m512d
2926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2928                                __m512d __C, const int __R)
2929 {
2930   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931                                                        (__v8df) __B,
2932                                                        (__v8df) __C,
2933                                                        (__mmask8) __U, __R);
2934 }
2935
2936 extern __inline __m512d
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2939                                 __mmask8 __U, const int __R)
2940 {
2941   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2942                                                         (__v8df) __B,
2943                                                         (__v8df) __C,
2944                                                         (__mmask8) __U, __R);
2945 }
2946
2947 extern __inline __m512d
2948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2950                                 __m512d __C, const int __R)
2951 {
2952   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2953                                                         (__v8df) __B,
2954                                                         (__v8df) __C,
2955                                                         (__mmask8) __U, __R);
2956 }
2957
2958 extern __inline __m512
2959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2961 {
2962   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2963                                                       (__v16sf) __B,
2964                                                       (__v16sf) __C,
2965                                                       (__mmask16) -1, __R);
2966 }
2967
2968 extern __inline __m512
2969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2971                                __m512 __C, const int __R)
2972 {
2973   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2974                                                       (__v16sf) __B,
2975                                                       (__v16sf) __C,
2976                                                       (__mmask16) __U, __R);
2977 }
2978
2979 extern __inline __m512
2980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2982                                 __mmask16 __U, const int __R)
2983 {
2984   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2985                                                        (__v16sf) __B,
2986                                                        (__v16sf) __C,
2987                                                        (__mmask16) __U, __R);
2988 }
2989
2990 extern __inline __m512
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2993                                 __m512 __C, const int __R)
2994 {
2995   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2996                                                        (__v16sf) __B,
2997                                                        (__v16sf) __C,
2998                                                        (__mmask16) __U, __R);
2999 }
3000
3001 extern __inline __m512d
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3004 {
3005   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3006                                                        (__v8df) __B,
3007                                                        -(__v8df) __C,
3008                                                        (__mmask8) -1, __R);
3009 }
3010
3011 extern __inline __m512d
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3014                                __m512d __C, const int __R)
3015 {
3016   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3017                                                        (__v8df) __B,
3018                                                        -(__v8df) __C,
3019                                                        (__mmask8) __U, __R);
3020 }
3021
3022 extern __inline __m512d
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3025                                 __mmask8 __U, const int __R)
3026 {
3027   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3028                                                         (__v8df) __B,
3029                                                         (__v8df) __C,
3030                                                         (__mmask8) __U, __R);
3031 }
3032
3033 extern __inline __m512d
3034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3036                                 __m512d __C, const int __R)
3037 {
3038   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3039                                                         (__v8df) __B,
3040                                                         -(__v8df) __C,
3041                                                         (__mmask8) __U, __R);
3042 }
3043
3044 extern __inline __m512
3045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3047 {
3048   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049                                                       (__v16sf) __B,
3050                                                       -(__v16sf) __C,
3051                                                       (__mmask16) -1, __R);
3052 }
3053
3054 extern __inline __m512
3055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3057                                __m512 __C, const int __R)
3058 {
3059   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060                                                       (__v16sf) __B,
3061                                                       -(__v16sf) __C,
3062                                                       (__mmask16) __U, __R);
3063 }
3064
3065 extern __inline __m512
3066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3068                                 __mmask16 __U, const int __R)
3069 {
3070   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3071                                                        (__v16sf) __B,
3072                                                        (__v16sf) __C,
3073                                                        (__mmask16) __U, __R);
3074 }
3075
3076 extern __inline __m512
3077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3079                                 __m512 __C, const int __R)
3080 {
3081   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082                                                        (__v16sf) __B,
3083                                                        -(__v16sf) __C,
3084                                                        (__mmask16) __U, __R);
3085 }
3086
3087 extern __inline __m512d
3088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3090 {
3091   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3092                                                     (__v8df) __B,
3093                                                     (__v8df) __C,
3094                                                     (__mmask8) -1, __R);
3095 }
3096
3097 extern __inline __m512d
3098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3100                              __m512d __C, const int __R)
3101 {
3102   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3103                                                      (__v8df) __B,
3104                                                      (__v8df) __C,
3105                                                      (__mmask8) __U, __R);
3106 }
3107
3108 extern __inline __m512d
3109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3111                               __mmask8 __U, const int __R)
3112 {
3113   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3114                                                      (__v8df) __B,
3115                                                      (__v8df) __C,
3116                                                      (__mmask8) __U, __R);
3117 }
3118
3119 extern __inline __m512d
3120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3121 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3122                               __m512d __C, const int __R)
3123 {
3124   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3125                                                      (__v8df) __B,
3126                                                      (__v8df) __C,
3127                                                      (__mmask8) __U, __R);
3128 }
3129
3130 extern __inline __m512
3131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3133 {
3134   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3135                                                    (__v16sf) __B,
3136                                                    (__v16sf) __C,
3137                                                    (__mmask16) -1, __R);
3138 }
3139
3140 extern __inline __m512
3141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3143                              __m512 __C, const int __R)
3144 {
3145   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3146                                                     (__v16sf) __B,
3147                                                     (__v16sf) __C,
3148                                                     (__mmask16) __U, __R);
3149 }
3150
3151 extern __inline __m512
3152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3154                               __mmask16 __U, const int __R)
3155 {
3156   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3157                                                     (__v16sf) __B,
3158                                                     (__v16sf) __C,
3159                                                     (__mmask16) __U, __R);
3160 }
3161
3162 extern __inline __m512
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3165                               __m512 __C, const int __R)
3166 {
3167   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3168                                                     (__v16sf) __B,
3169                                                     (__v16sf) __C,
3170                                                     (__mmask16) __U, __R);
3171 }
3172
3173 extern __inline __m512d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3176 {
3177   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3178                                                     (__v8df) __B,
3179                                                     -(__v8df) __C,
3180                                                     (__mmask8) -1, __R);
3181 }
3182
3183 extern __inline __m512d
3184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3186                              __m512d __C, const int __R)
3187 {
3188   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3189                                                      (__v8df) __B,
3190                                                      (__v8df) __C,
3191                                                      (__mmask8) __U, __R);
3192 }
3193
3194 extern __inline __m512d
3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3197                               __mmask8 __U, const int __R)
3198 {
3199   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3200                                                       (__v8df) __B,
3201                                                       (__v8df) __C,
3202                                                       (__mmask8) __U, __R);
3203 }
3204
3205 extern __inline __m512d
3206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3208                               __m512d __C, const int __R)
3209 {
3210   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3211                                                      (__v8df) __B,
3212                                                      -(__v8df) __C,
3213                                                      (__mmask8) __U, __R);
3214 }
3215
3216 extern __inline __m512
3217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3219 {
3220   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3221                                                    (__v16sf) __B,
3222                                                    -(__v16sf) __C,
3223                                                    (__mmask16) -1, __R);
3224 }
3225
3226 extern __inline __m512
3227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3229                              __m512 __C, const int __R)
3230 {
3231   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3232                                                     (__v16sf) __B,
3233                                                     (__v16sf) __C,
3234                                                     (__mmask16) __U, __R);
3235 }
3236
3237 extern __inline __m512
3238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3240                               __mmask16 __U, const int __R)
3241 {
3242   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3243                                                      (__v16sf) __B,
3244                                                      (__v16sf) __C,
3245                                                      (__mmask16) __U, __R);
3246 }
3247
3248 extern __inline __m512
3249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3251                               __m512 __C, const int __R)
3252 {
3253   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3254                                                     (__v16sf) __B,
3255                                                     -(__v16sf) __C,
3256                                                     (__mmask16) __U, __R);
3257 }
3258 #else
3259 #define _mm512_fmadd_round_pd(A, B, C, R)            \
3260     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3261
3262 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
3263     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3264
3265 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
3266     (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3267
3268 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
3269     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3270
3271 #define _mm512_fmadd_round_ps(A, B, C, R)            \
3272     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3273
3274 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
3275     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3276
3277 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
3278     (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3279
3280 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
3281     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3282
3283 #define _mm512_fmsub_round_pd(A, B, C, R)            \
3284     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3285
3286 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
3287     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3288
3289 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
3290     (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3291
3292 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
3293     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3294
3295 #define _mm512_fmsub_round_ps(A, B, C, R)            \
3296     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3297
3298 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
3299     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3300
3301 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
3302     (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3303
3304 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
3305     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3306
3307 #define _mm512_fmaddsub_round_pd(A, B, C, R)            \
3308     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3309
3310 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
3311     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3312
3313 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
3314     (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3315
3316 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
3317     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3318
3319 #define _mm512_fmaddsub_round_ps(A, B, C, R)            \
3320     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3321
3322 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
3323     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3324
3325 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
3326     (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3327
3328 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
3329     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3330
3331 #define _mm512_fmsubadd_round_pd(A, B, C, R)            \
3332     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3333
3334 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
3335     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3336
3337 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
3338     (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3339
3340 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
3341     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3342
3343 #define _mm512_fmsubadd_round_ps(A, B, C, R)            \
3344     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3345
3346 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
3347     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3348
3349 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
3350     (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3351
3352 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
3353     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3354
3355 #define _mm512_fnmadd_round_pd(A, B, C, R)            \
3356     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3357
3358 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
3359     (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3360
3361 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
3362     (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3363
3364 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
3365     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3366
3367 #define _mm512_fnmadd_round_ps(A, B, C, R)            \
3368     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3369
3370 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
3371     (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3372
3373 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
3374     (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3375
3376 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
3377     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3378
3379 #define _mm512_fnmsub_round_pd(A, B, C, R)            \
3380     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3381
3382 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
3383     (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3384
3385 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
3386     (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3387
3388 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
3389     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3390
3391 #define _mm512_fnmsub_round_ps(A, B, C, R)            \
3392     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3393
3394 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
3395     (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3396
3397 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
3398     (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3399
3400 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
3401     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3402 #endif
3403
3404 extern __inline __m512i
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm512_abs_epi64 (__m512i __A)
3407 {
3408   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3409                                                  (__v8di)
3410                                                  _mm512_undefined_si512 (),
3411                                                  (__mmask8) -1);
3412 }
3413
3414 extern __inline __m512i
3415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3417 {
3418   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3419                                                  (__v8di) __W,
3420                                                  (__mmask8) __U);
3421 }
3422
3423 extern __inline __m512i
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3426 {
3427   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428                                                  (__v8di)
3429                                                  _mm512_setzero_si512 (),
3430                                                  (__mmask8) __U);
3431 }
3432
3433 extern __inline __m512i
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm512_abs_epi32 (__m512i __A)
3436 {
3437   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3438                                                  (__v16si)
3439                                                  _mm512_undefined_si512 (),
3440                                                  (__mmask16) -1);
3441 }
3442
3443 extern __inline __m512i
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3446 {
3447   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3448                                                  (__v16si) __W,
3449                                                  (__mmask16) __U);
3450 }
3451
3452 extern __inline __m512i
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3455 {
3456   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457                                                  (__v16si)
3458                                                  _mm512_setzero_si512 (),
3459                                                  (__mmask16) __U);
3460 }
3461
3462 extern __inline __m512
3463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464 _mm512_broadcastss_ps (__m128 __A)
3465 {
3466   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3467                                                  (__v16sf)
3468                                                  _mm512_undefined_ps (),
3469                                                  (__mmask16) -1);
3470 }
3471
3472 extern __inline __m512
3473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3475 {
3476   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3477                                                  (__v16sf) __O, __M);
3478 }
3479
3480 extern __inline __m512
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3483 {
3484   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3485                                                  (__v16sf)
3486                                                  _mm512_setzero_ps (),
3487                                                  __M);
3488 }
3489
3490 extern __inline __m512d
3491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492 _mm512_broadcastsd_pd (__m128d __A)
3493 {
3494   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3495                                                   (__v8df)
3496                                                   _mm512_undefined_pd (),
3497                                                   (__mmask8) -1);
3498 }
3499
3500 extern __inline __m512d
3501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3502 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3503 {
3504   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3505                                                   (__v8df) __O, __M);
3506 }
3507
3508 extern __inline __m512d
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3511 {
3512   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3513                                                   (__v8df)
3514                                                   _mm512_setzero_pd (),
3515                                                   __M);
3516 }
3517
3518 extern __inline __m512i
3519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3520 _mm512_broadcastd_epi32 (__m128i __A)
3521 {
3522   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3523                                                   (__v16si)
3524                                                   _mm512_undefined_si512 (),
3525                                                   (__mmask16) -1);
3526 }
3527
3528 extern __inline __m512i
3529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3531 {
3532   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3533                                                   (__v16si) __O, __M);
3534 }
3535
3536 extern __inline __m512i
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3539 {
3540   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3541                                                   (__v16si)
3542                                                   _mm512_setzero_si512 (),
3543                                                   __M);
3544 }
3545
3546 extern __inline __m512i
3547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548 _mm512_set1_epi32 (int __A)
3549 {
3550   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3551                                                            (__v16si)
3552                                                            _mm512_undefined_si512 (),
3553                                                            (__mmask16)(-1));
3554 }
3555
3556 extern __inline __m512i
3557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3559 {
3560   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3561                                                            __M);
3562 }
3563
3564 extern __inline __m512i
3565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3567 {
3568   return (__m512i)
3569          __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3570                                                  (__v16si) _mm512_setzero_si512 (),
3571                                                  __M);
3572 }
3573
3574 extern __inline __m512i
3575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576 _mm512_broadcastq_epi64 (__m128i __A)
3577 {
3578   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3579                                                   (__v8di)
3580                                                   _mm512_undefined_si512 (),
3581                                                   (__mmask8) -1);
3582 }
3583
3584 extern __inline __m512i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3587 {
3588   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3589                                                   (__v8di) __O, __M);
3590 }
3591
3592 extern __inline __m512i
3593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3594 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3595 {
3596   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3597                                                   (__v8di)
3598                                                   _mm512_setzero_si512 (),
3599                                                   __M);
3600 }
3601
3602 extern __inline __m512i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604 _mm512_set1_epi64 (long long __A)
3605 {
3606   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3607                                                            (__v8di)
3608                                                            _mm512_undefined_si512 (),
3609                                                            (__mmask8)(-1));
3610 }
3611
3612 extern __inline __m512i
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3615 {
3616   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3617                                                            __M);
3618 }
3619
3620 extern __inline __m512i
3621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3623 {
3624   return (__m512i)
3625          __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3626                                                  (__v8di) _mm512_setzero_si512 (),
3627                                                  __M);
3628 }
3629
3630 extern __inline __m512
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 _mm512_broadcast_f32x4 (__m128 __A)
3633 {
3634   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3635                                                      (__v16sf)
3636                                                      _mm512_undefined_ps (),
3637                                                      (__mmask16) -1);
3638 }
3639
3640 extern __inline __m512
3641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3643 {
3644   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3645                                                      (__v16sf) __O,
3646                                                      __M);
3647 }
3648
3649 extern __inline __m512
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3652 {
3653   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654                                                      (__v16sf)
3655                                                      _mm512_setzero_ps (),
3656                                                      __M);
3657 }
3658
3659 extern __inline __m512i
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm512_broadcast_i32x4 (__m128i __A)
3662 {
3663   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3664                                                       (__v16si)
3665                                                       _mm512_undefined_si512 (),
3666                                                       (__mmask16) -1);
3667 }
3668
3669 extern __inline __m512i
3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3672 {
3673   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3674                                                       (__v16si) __O,
3675                                                       __M);
3676 }
3677
3678 extern __inline __m512i
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3681 {
3682   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683                                                       (__v16si)
3684                                                       _mm512_setzero_si512 (),
3685                                                       __M);
3686 }
3687
3688 extern __inline __m512d
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm512_broadcast_f64x4 (__m256d __A)
3691 {
3692   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3693                                                       (__v8df)
3694                                                       _mm512_undefined_pd (),
3695                                                       (__mmask8) -1);
3696 }
3697
3698 extern __inline __m512d
3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3701 {
3702   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3703                                                       (__v8df) __O,
3704                                                       __M);
3705 }
3706
3707 extern __inline __m512d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3710 {
3711   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712                                                       (__v8df)
3713                                                       _mm512_setzero_pd (),
3714                                                       __M);
3715 }
3716
3717 extern __inline __m512i
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm512_broadcast_i64x4 (__m256i __A)
3720 {
3721   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3722                                                       (__v8di)
3723                                                       _mm512_undefined_si512 (),
3724                                                       (__mmask8) -1);
3725 }
3726
3727 extern __inline __m512i
3728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3730 {
3731   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3732                                                       (__v8di) __O,
3733                                                       __M);
3734 }
3735
3736 extern __inline __m512i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3739 {
3740   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741                                                       (__v8di)
3742                                                       _mm512_setzero_si512 (),
3743                                                       __M);
3744 }
3745
3746 typedef enum
3747 {
3748   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3749   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3750   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3751   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3752   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3753   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3754   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3755   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3756   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3757   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3758   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3759   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3760   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3761   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3762   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3763   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3764   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3765   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3766   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3767   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3768   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3769   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3770   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3771   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3772   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3773   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3774   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3775   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3776   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3777   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3778   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3779   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3780   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3781   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3782   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3783   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3784   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3785   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3786   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3787   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3788   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3789   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3790   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3791   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3792   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3793   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3794   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3795   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3796   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3797   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3798   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3799   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3800   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3801   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3802   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3803   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3804   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3805   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3806   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3807   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3808   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3809   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3810   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3811   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3812   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3813   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3814   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3815   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3816   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3817   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3818   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3819   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3820   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3821   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3822   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3823   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3824   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3825   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3826   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3827   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3828   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3829   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3830   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3831   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3832   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3833   _MM_PERM_DDDD = 0xFF
3834 } _MM_PERM_ENUM;
3835
3836 #ifdef __OPTIMIZE__
3837 extern __inline __m512i
3838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3840 {
3841   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3842                                                   __mask,
3843                                                   (__v16si)
3844                                                   _mm512_undefined_si512 (),
3845                                                   (__mmask16) -1);
3846 }
3847
3848 extern __inline __m512i
3849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3851                            _MM_PERM_ENUM __mask)
3852 {
3853   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3854                                                   __mask,
3855                                                   (__v16si) __W,
3856                                                   (__mmask16) __U);
3857 }
3858
3859 extern __inline __m512i
3860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3861 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3862 {
3863   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3864                                                   __mask,
3865                                                   (__v16si)
3866                                                   _mm512_setzero_si512 (),
3867                                                   (__mmask16) __U);
3868 }
3869
3870 extern __inline __m512i
3871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3872 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3873 {
3874   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3875                                                    (__v8di) __B, __imm,
3876                                                    (__v8di)
3877                                                    _mm512_undefined_si512 (),
3878                                                    (__mmask8) -1);
3879 }
3880
3881 extern __inline __m512i
3882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3883 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3884                            __m512i __B, const int __imm)
3885 {
3886   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3887                                                    (__v8di) __B, __imm,
3888                                                    (__v8di) __W,
3889                                                    (__mmask8) __U);
3890 }
3891
3892 extern __inline __m512i
3893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3894 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3895                             const int __imm)
3896 {
3897   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3898                                                    (__v8di) __B, __imm,
3899                                                    (__v8di)
3900                                                    _mm512_setzero_si512 (),
3901                                                    (__mmask8) __U);
3902 }
3903
3904 extern __inline __m512i
3905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3906 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3907 {
3908   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3909                                                    (__v16si) __B,
3910                                                    __imm,
3911                                                    (__v16si)
3912                                                    _mm512_undefined_si512 (),
3913                                                    (__mmask16) -1);
3914 }
3915
3916 extern __inline __m512i
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3919                            __m512i __B, const int __imm)
3920 {
3921   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3922                                                    (__v16si) __B,
3923                                                    __imm,
3924                                                    (__v16si) __W,
3925                                                    (__mmask16) __U);
3926 }
3927
3928 extern __inline __m512i
3929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3930 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3931                             const int __imm)
3932 {
3933   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934                                                    (__v16si) __B,
3935                                                    __imm,
3936                                                    (__v16si)
3937                                                    _mm512_setzero_si512 (),
3938                                                    (__mmask16) __U);
3939 }
3940
3941 extern __inline __m512d
3942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3944 {
3945   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3946                                                    (__v8df) __B, __imm,
3947                                                    (__v8df)
3948                                                    _mm512_undefined_pd (),
3949                                                    (__mmask8) -1);
3950 }
3951
3952 extern __inline __m512d
3953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3954 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3955                            __m512d __B, const int __imm)
3956 {
3957   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3958                                                    (__v8df) __B, __imm,
3959                                                    (__v8df) __W,
3960                                                    (__mmask8) __U);
3961 }
3962
3963 extern __inline __m512d
3964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3966                             const int __imm)
3967 {
3968   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3969                                                    (__v8df) __B, __imm,
3970                                                    (__v8df)
3971                                                    _mm512_setzero_pd (),
3972                                                    (__mmask8) __U);
3973 }
3974
3975 extern __inline __m512
3976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3978 {
3979   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3980                                                   (__v16sf) __B, __imm,
3981                                                   (__v16sf)
3982                                                   _mm512_undefined_ps (),
3983                                                   (__mmask16) -1);
3984 }
3985
3986 extern __inline __m512
3987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3989                            __m512 __B, const int __imm)
3990 {
3991   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3992                                                   (__v16sf) __B, __imm,
3993                                                   (__v16sf) __W,
3994                                                   (__mmask16) __U);
3995 }
3996
3997 extern __inline __m512
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4000                             const int __imm)
4001 {
4002   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4003                                                   (__v16sf) __B, __imm,
4004                                                   (__v16sf)
4005                                                   _mm512_setzero_ps (),
4006                                                   (__mmask16) __U);
4007 }
4008
4009 #else
4010 #define _mm512_shuffle_epi32(X, C)                                      \
4011   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4012     (__v16si)(__m512i)_mm512_undefined_si512 (),\
4013     (__mmask16)-1))
4014
4015 #define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
4016   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4017     (__v16si)(__m512i)(W),\
4018     (__mmask16)(U)))
4019
4020 #define _mm512_maskz_shuffle_epi32(U, X, C)                             \
4021   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4022     (__v16si)(__m512i)_mm512_setzero_si512 (),\
4023     (__mmask16)(U)))
4024
4025 #define _mm512_shuffle_i64x2(X, Y, C)                                   \
4026   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4027       (__v8di)(__m512i)(Y), (int)(C),\
4028     (__v8di)(__m512i)_mm512_undefined_si512 (),\
4029     (__mmask8)-1))
4030
4031 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
4032   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4033       (__v8di)(__m512i)(Y), (int)(C),\
4034     (__v8di)(__m512i)(W),\
4035     (__mmask8)(U)))
4036
4037 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
4038   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4039       (__v8di)(__m512i)(Y), (int)(C),\
4040     (__v8di)(__m512i)_mm512_setzero_si512 (),\
4041     (__mmask8)(U)))
4042
4043 #define _mm512_shuffle_i32x4(X, Y, C)                                   \
4044   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4045       (__v16si)(__m512i)(Y), (int)(C),\
4046     (__v16si)(__m512i)_mm512_undefined_si512 (),\
4047     (__mmask16)-1))
4048
4049 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
4050   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4051       (__v16si)(__m512i)(Y), (int)(C),\
4052     (__v16si)(__m512i)(W),\
4053     (__mmask16)(U)))
4054
4055 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
4056   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4057       (__v16si)(__m512i)(Y), (int)(C),\
4058     (__v16si)(__m512i)_mm512_setzero_si512 (),\
4059     (__mmask16)(U)))
4060
4061 #define _mm512_shuffle_f64x2(X, Y, C)                                   \
4062   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4063       (__v8df)(__m512d)(Y), (int)(C),\
4064     (__v8df)(__m512d)_mm512_undefined_pd(),\
4065     (__mmask8)-1))
4066
4067 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
4068   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4069       (__v8df)(__m512d)(Y), (int)(C),\
4070     (__v8df)(__m512d)(W),\
4071     (__mmask8)(U)))
4072
4073 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
4074   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
4075       (__v8df)(__m512d)(Y), (int)(C),\
4076     (__v8df)(__m512d)_mm512_setzero_pd(),\
4077     (__mmask8)(U)))
4078
4079 #define _mm512_shuffle_f32x4(X, Y, C)                                  \
4080   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4081       (__v16sf)(__m512)(Y), (int)(C),\
4082     (__v16sf)(__m512)_mm512_undefined_ps(),\
4083     (__mmask16)-1))
4084
4085 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
4086   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4087       (__v16sf)(__m512)(Y), (int)(C),\
4088     (__v16sf)(__m512)(W),\
4089     (__mmask16)(U)))
4090
4091 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
4092   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4093       (__v16sf)(__m512)(Y), (int)(C),\
4094     (__v16sf)(__m512)_mm512_setzero_ps(),\
4095     (__mmask16)(U)))
4096 #endif
4097
4098 extern __inline __m512i
4099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4101 {
4102   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4103                                                   (__v16si) __B,
4104                                                   (__v16si)
4105                                                   _mm512_undefined_si512 (),
4106                                                   (__mmask16) -1);
4107 }
4108
4109 extern __inline __m512i
4110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4112 {
4113   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4114                                                   (__v16si) __B,
4115                                                   (__v16si) __W,
4116                                                   (__mmask16) __U);
4117 }
4118
4119 extern __inline __m512i
4120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4121 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4122 {
4123   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4124                                                   (__v16si) __B,
4125                                                   (__v16si)
4126                                                   _mm512_setzero_si512 (),
4127                                                   (__mmask16) __U);
4128 }
4129
4130 extern __inline __m512i
4131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4132 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4133 {
4134   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4135                                                   (__v16si) __B,
4136                                                   (__v16si)
4137                                                   _mm512_undefined_si512 (),
4138                                                   (__mmask16) -1);
4139 }
4140
4141 extern __inline __m512i
4142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4144 {
4145   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4146                                                   (__v16si) __B,
4147                                                   (__v16si) __W,
4148                                                   (__mmask16) __U);
4149 }
4150
4151 extern __inline __m512i
4152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4153 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4154 {
4155   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4156                                                   (__v16si) __B,
4157                                                   (__v16si)
4158                                                   _mm512_setzero_si512 (),
4159                                                   (__mmask16) __U);
4160 }
4161
4162 extern __inline __m512i
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4165 {
4166   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4167                                                   (__v8di) __B,
4168                                                   (__v8di)
4169                                                   _mm512_undefined_si512 (),
4170                                                   (__mmask8) -1);
4171 }
4172
4173 extern __inline __m512i
4174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4175 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4176 {
4177   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4178                                                   (__v8di) __B,
4179                                                   (__v8di) __W,
4180                                                   (__mmask8) __U);
4181 }
4182
4183 extern __inline __m512i
4184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4185 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4186 {
4187   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4188                                                   (__v8di) __B,
4189                                                   (__v8di)
4190                                                   _mm512_setzero_si512 (),
4191                                                   (__mmask8) __U);
4192 }
4193
4194 extern __inline __m512i
4195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4197 {
4198   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4199                                                   (__v8di) __B,
4200                                                   (__v8di)
4201                                                   _mm512_undefined_si512 (),
4202                                                   (__mmask8) -1);
4203 }
4204
4205 extern __inline __m512i
4206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4207 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4208 {
4209   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4210                                                   (__v8di) __B,
4211                                                   (__v8di) __W,
4212                                                   (__mmask8) __U);
4213 }
4214
4215 extern __inline __m512i
4216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4218 {
4219   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4220                                                   (__v8di) __B,
4221                                                   (__v8di)
4222                                                   _mm512_setzero_si512 (),
4223                                                   (__mmask8) __U);
4224 }
4225
4226 #ifdef __OPTIMIZE__
4227 extern __inline __m256i
4228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4229 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4230 {
4231   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4232                                                      (__v8si)
4233                                                      _mm256_undefined_si256 (),
4234                                                      (__mmask8) -1, __R);
4235 }
4236
4237 extern __inline __m256i
4238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4240                                 const int __R)
4241 {
4242   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4243                                                      (__v8si) __W,
4244                                                      (__mmask8) __U, __R);
4245 }
4246
4247 extern __inline __m256i
4248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4249 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4250 {
4251   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252                                                      (__v8si)
4253                                                      _mm256_setzero_si256 (),
4254                                                      (__mmask8) __U, __R);
4255 }
4256
4257 extern __inline __m256i
4258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4259 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4260 {
4261   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4262                                                       (__v8si)
4263                                                       _mm256_undefined_si256 (),
4264                                                       (__mmask8) -1, __R);
4265 }
4266
4267 extern __inline __m256i
4268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4270                                 const int __R)
4271 {
4272   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4273                                                       (__v8si) __W,
4274                                                       (__mmask8) __U, __R);
4275 }
4276
4277 extern __inline __m256i
4278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4280 {
4281   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282                                                       (__v8si)
4283                                                       _mm256_setzero_si256 (),
4284                                                       (__mmask8) __U, __R);
4285 }
4286 #else
4287 #define _mm512_cvtt_roundpd_epi32(A, B)              \
4288     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4289
4290 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
4291     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4292
4293 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
4294     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4295
4296 #define _mm512_cvtt_roundpd_epu32(A, B)              \
4297     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4298
4299 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
4300     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4301
4302 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
4303     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304 #endif
4305
4306 #ifdef __OPTIMIZE__
4307 extern __inline __m256i
4308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4309 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4310 {
4311   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4312                                                     (__v8si)
4313                                                     _mm256_undefined_si256 (),
4314                                                     (__mmask8) -1, __R);
4315 }
4316
4317 extern __inline __m256i
4318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4319 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4320                                const int __R)
4321 {
4322   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4323                                                     (__v8si) __W,
4324                                                     (__mmask8) __U, __R);
4325 }
4326
4327 extern __inline __m256i
4328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4329 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4330 {
4331   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332                                                     (__v8si)
4333                                                     _mm256_setzero_si256 (),
4334                                                     (__mmask8) __U, __R);
4335 }
4336
4337 extern __inline __m256i
4338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4340 {
4341   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4342                                                      (__v8si)
4343                                                      _mm256_undefined_si256 (),
4344                                                      (__mmask8) -1, __R);
4345 }
4346
4347 extern __inline __m256i
4348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4349 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4350                                const int __R)
4351 {
4352   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4353                                                      (__v8si) __W,
4354                                                      (__mmask8) __U, __R);
4355 }
4356
4357 extern __inline __m256i
4358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4359 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4360 {
4361   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362                                                      (__v8si)
4363                                                      _mm256_setzero_si256 (),
4364                                                      (__mmask8) __U, __R);
4365 }
4366 #else
4367 #define _mm512_cvt_roundpd_epi32(A, B)              \
4368     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4369
4370 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
4371     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4372
4373 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
4374     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4375
4376 #define _mm512_cvt_roundpd_epu32(A, B)              \
4377     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4378
4379 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
4380     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4381
4382 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
4383     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384 #endif
4385
4386 #ifdef __OPTIMIZE__
4387 extern __inline __m512i
4388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4390 {
4391   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4392                                                      (__v16si)
4393                                                      _mm512_undefined_si512 (),
4394                                                      (__mmask16) -1, __R);
4395 }
4396
4397 extern __inline __m512i
4398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4399 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4400                                 const int __R)
4401 {
4402   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4403                                                      (__v16si) __W,
4404                                                      (__mmask16) __U, __R);
4405 }
4406
4407 extern __inline __m512i
4408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4409 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4410 {
4411   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412                                                      (__v16si)
4413                                                      _mm512_setzero_si512 (),
4414                                                      (__mmask16) __U, __R);
4415 }
4416
4417 extern __inline __m512i
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4420 {
4421   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4422                                                       (__v16si)
4423                                                       _mm512_undefined_si512 (),
4424                                                       (__mmask16) -1, __R);
4425 }
4426
4427 extern __inline __m512i
4428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4429 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4430                                 const int __R)
4431 {
4432   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4433                                                       (__v16si) __W,
4434                                                       (__mmask16) __U, __R);
4435 }
4436
4437 extern __inline __m512i
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4440 {
4441   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442                                                       (__v16si)
4443                                                       _mm512_setzero_si512 (),
4444                                                       (__mmask16) __U, __R);
4445 }
4446 #else
4447 #define _mm512_cvtt_roundps_epi32(A, B)              \
4448     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4449
4450 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
4451     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4452
4453 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
4454     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4455
4456 #define _mm512_cvtt_roundps_epu32(A, B)              \
4457     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4458
4459 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
4460     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4461
4462 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
4463     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464 #endif
4465
4466 #ifdef __OPTIMIZE__
4467 extern __inline __m512i
4468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4470 {
4471   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4472                                                     (__v16si)
4473                                                     _mm512_undefined_si512 (),
4474                                                     (__mmask16) -1, __R);
4475 }
4476
4477 extern __inline __m512i
4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4480                                const int __R)
4481 {
4482   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4483                                                     (__v16si) __W,
4484                                                     (__mmask16) __U, __R);
4485 }
4486
4487 extern __inline __m512i
4488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4489 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4490 {
4491   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492                                                     (__v16si)
4493                                                     _mm512_setzero_si512 (),
4494                                                     (__mmask16) __U, __R);
4495 }
4496
4497 extern __inline __m512i
4498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4500 {
4501   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4502                                                      (__v16si)
4503                                                      _mm512_undefined_si512 (),
4504                                                      (__mmask16) -1, __R);
4505 }
4506
4507 extern __inline __m512i
4508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4509 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4510                                const int __R)
4511 {
4512   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4513                                                      (__v16si) __W,
4514                                                      (__mmask16) __U, __R);
4515 }
4516
4517 extern __inline __m512i
4518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4519 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4520 {
4521   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522                                                      (__v16si)
4523                                                      _mm512_setzero_si512 (),
4524                                                      (__mmask16) __U, __R);
4525 }
4526 #else
4527 #define _mm512_cvt_roundps_epi32(A, B)              \
4528     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4529
4530 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
4531     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4532
4533 #define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
4534     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4535
4536 #define _mm512_cvt_roundps_epu32(A, B)              \
4537     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4538
4539 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
4540     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4541
4542 #define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
4543     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544 #endif
4545
4546 extern __inline __m128d
4547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4548 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4549 {
4550   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4551 }
4552
4553 #ifdef __x86_64__
4554 #ifdef __OPTIMIZE__
4555 extern __inline __m128d
4556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4558 {
4559   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4560 }
4561
4562 extern __inline __m128d
4563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4565 {
4566   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4567 }
4568
4569 extern __inline __m128d
4570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4572 {
4573   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4574 }
4575 #else
4576 #define _mm_cvt_roundu64_sd(A, B, C)   \
4577     (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4578
4579 #define _mm_cvt_roundi64_sd(A, B, C)   \
4580     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4581
4582 #define _mm_cvt_roundsi64_sd(A, B, C)   \
4583     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4584 #endif
4585
4586 #endif
4587
4588 #ifdef __OPTIMIZE__
4589 extern __inline __m128
4590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4592 {
4593   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4594 }
4595
4596 extern __inline __m128
4597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4599 {
4600   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4601 }
4602
4603 extern __inline __m128
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4606 {
4607   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4608 }
4609 #else
4610 #define _mm_cvt_roundu32_ss(A, B, C)   \
4611     (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4612
4613 #define _mm_cvt_roundi32_ss(A, B, C)   \
4614     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4615
4616 #define _mm_cvt_roundsi32_ss(A, B, C)   \
4617     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4618 #endif
4619
4620 #ifdef __x86_64__
4621 #ifdef __OPTIMIZE__
4622 extern __inline __m128
4623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4624 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4625 {
4626   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4627 }
4628
4629 extern __inline __m128
4630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4631 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4632 {
4633   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4634 }
4635
4636 extern __inline __m128
4637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4638 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4639 {
4640   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4641 }
4642 #else
4643 #define _mm_cvt_roundu64_ss(A, B, C)   \
4644     (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4645
4646 #define _mm_cvt_roundi64_ss(A, B, C)   \
4647     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4648
4649 #define _mm_cvt_roundsi64_ss(A, B, C)   \
4650     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4651 #endif
4652
4653 #endif
4654
4655 extern __inline __m128i
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_cvtepi32_epi8 (__m512i __A)
4658 {
4659   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4660                                                   (__v16qi)
4661                                                   _mm_undefined_si128 (),
4662                                                   (__mmask16) -1);
4663 }
4664
4665 extern __inline void
4666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4667 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4668 {
4669   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4670 }
4671
4672 extern __inline __m128i
4673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4675 {
4676   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4677                                                   (__v16qi) __O, __M);
4678 }
4679
4680 extern __inline __m128i
4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4683 {
4684   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685                                                   (__v16qi)
4686                                                   _mm_setzero_si128 (),
4687                                                   __M);
4688 }
4689
4690 extern __inline __m128i
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692 _mm512_cvtsepi32_epi8 (__m512i __A)
4693 {
4694   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4695                                                    (__v16qi)
4696                                                    _mm_undefined_si128 (),
4697                                                    (__mmask16) -1);
4698 }
4699
4700 extern __inline void
4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4703 {
4704   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4705 }
4706
4707 extern __inline __m128i
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4710 {
4711   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4712                                                    (__v16qi) __O, __M);
4713 }
4714
4715 extern __inline __m128i
4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4718 {
4719   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720                                                    (__v16qi)
4721                                                    _mm_setzero_si128 (),
4722                                                    __M);
4723 }
4724
4725 extern __inline __m128i
4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727 _mm512_cvtusepi32_epi8 (__m512i __A)
4728 {
4729   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4730                                                     (__v16qi)
4731                                                     _mm_undefined_si128 (),
4732                                                     (__mmask16) -1);
4733 }
4734
4735 extern __inline void
4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4738 {
4739   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4740 }
4741
4742 extern __inline __m128i
4743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4744 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4745 {
4746   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4747                                                     (__v16qi) __O,
4748                                                     __M);
4749 }
4750
4751 extern __inline __m128i
4752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4754 {
4755   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756                                                     (__v16qi)
4757                                                     _mm_setzero_si128 (),
4758                                                     __M);
4759 }
4760
4761 extern __inline __m256i
4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763 _mm512_cvtepi32_epi16 (__m512i __A)
4764 {
4765   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4766                                                   (__v16hi)
4767                                                   _mm256_undefined_si256 (),
4768                                                   (__mmask16) -1);
4769 }
4770
4771 extern __inline void
4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4774 {
4775   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4776 }
4777
4778 extern __inline __m256i
4779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4781 {
4782   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4783                                                   (__v16hi) __O, __M);
4784 }
4785
4786 extern __inline __m256i
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4789 {
4790   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791                                                   (__v16hi)
4792                                                   _mm256_setzero_si256 (),
4793                                                   __M);
4794 }
4795
4796 extern __inline __m256i
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm512_cvtsepi32_epi16 (__m512i __A)
4799 {
4800   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4801                                                    (__v16hi)
4802                                                    _mm256_undefined_si256 (),
4803                                                    (__mmask16) -1);
4804 }
4805
4806 extern __inline void
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4809 {
4810   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4811 }
4812
4813 extern __inline __m256i
4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4816 {
4817   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4818                                                    (__v16hi) __O, __M);
4819 }
4820
4821 extern __inline __m256i
4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4824 {
4825   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826                                                    (__v16hi)
4827                                                    _mm256_setzero_si256 (),
4828                                                    __M);
4829 }
4830
4831 extern __inline __m256i
4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833 _mm512_cvtusepi32_epi16 (__m512i __A)
4834 {
4835   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4836                                                     (__v16hi)
4837                                                     _mm256_undefined_si256 (),
4838                                                     (__mmask16) -1);
4839 }
4840
4841 extern __inline void
4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4844 {
4845   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4846 }
4847
4848 extern __inline __m256i
4849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4850 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4851 {
4852   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4853                                                     (__v16hi) __O,
4854                                                     __M);
4855 }
4856
4857 extern __inline __m256i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4860 {
4861   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862                                                     (__v16hi)
4863                                                     _mm256_setzero_si256 (),
4864                                                     __M);
4865 }
4866
4867 extern __inline __m256i
4868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869 _mm512_cvtepi64_epi32 (__m512i __A)
4870 {
4871   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4872                                                   (__v8si)
4873                                                   _mm256_undefined_si256 (),
4874                                                   (__mmask8) -1);
4875 }
4876
4877 extern __inline void
4878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4880 {
4881   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4882 }
4883
4884 extern __inline __m256i
4885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4887 {
4888   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4889                                                   (__v8si) __O, __M);
4890 }
4891
4892 extern __inline __m256i
4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4895 {
4896   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897                                                   (__v8si)
4898                                                   _mm256_setzero_si256 (),
4899                                                   __M);
4900 }
4901
4902 extern __inline __m256i
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm512_cvtsepi64_epi32 (__m512i __A)
4905 {
4906   __v8si __O;
4907   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4908                                                    (__v8si)
4909                                                    _mm256_undefined_si256 (),
4910                                                    (__mmask8) -1);
4911 }
4912
4913 extern __inline void
4914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4916 {
4917   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4918 }
4919
4920 extern __inline __m256i
4921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4922 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4923 {
4924   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4925                                                    (__v8si) __O, __M);
4926 }
4927
4928 extern __inline __m256i
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4931 {
4932   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933                                                    (__v8si)
4934                                                    _mm256_setzero_si256 (),
4935                                                    __M);
4936 }
4937
4938 extern __inline __m256i
4939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940 _mm512_cvtusepi64_epi32 (__m512i __A)
4941 {
4942   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4943                                                     (__v8si)
4944                                                     _mm256_undefined_si256 (),
4945                                                     (__mmask8) -1);
4946 }
4947
4948 extern __inline void
4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4950 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4951 {
4952   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4953 }
4954
4955 extern __inline __m256i
4956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4957 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4958 {
4959   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4960                                                     (__v8si) __O, __M);
4961 }
4962
4963 extern __inline __m256i
4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4966 {
4967   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968                                                     (__v8si)
4969                                                     _mm256_setzero_si256 (),
4970                                                     __M);
4971 }
4972
4973 extern __inline __m128i
4974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4975 _mm512_cvtepi64_epi16 (__m512i __A)
4976 {
4977   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4978                                                   (__v8hi)
4979                                                   _mm_undefined_si128 (),
4980                                                   (__mmask8) -1);
4981 }
4982
4983 extern __inline void
4984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4986 {
4987   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4988 }
4989
4990 extern __inline __m128i
4991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4992 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4993 {
4994   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4995                                                   (__v8hi) __O, __M);
4996 }
4997
4998 extern __inline __m128i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5001 {
5002   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003                                                   (__v8hi)
5004                                                   _mm_setzero_si128 (),
5005                                                   __M);
5006 }
5007
5008 extern __inline __m128i
5009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5010 _mm512_cvtsepi64_epi16 (__m512i __A)
5011 {
5012   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5013                                                    (__v8hi)
5014                                                    _mm_undefined_si128 (),
5015                                                    (__mmask8) -1);
5016 }
5017
5018 extern __inline void
5019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5020 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5021 {
5022   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5023 }
5024
5025 extern __inline __m128i
5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5028 {
5029   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5030                                                    (__v8hi) __O, __M);
5031 }
5032
5033 extern __inline __m128i
5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5036 {
5037   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038                                                    (__v8hi)
5039                                                    _mm_setzero_si128 (),
5040                                                    __M);
5041 }
5042
5043 extern __inline __m128i
5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045 _mm512_cvtusepi64_epi16 (__m512i __A)
5046 {
5047   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5048                                                     (__v8hi)
5049                                                     _mm_undefined_si128 (),
5050                                                     (__mmask8) -1);
5051 }
5052
5053 extern __inline void
5054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5055 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5056 {
5057   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5058 }
5059
5060 extern __inline __m128i
5061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5062 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5063 {
5064   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5065                                                     (__v8hi) __O, __M);
5066 }
5067
5068 extern __inline __m128i
5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5071 {
5072   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073                                                     (__v8hi)
5074                                                     _mm_setzero_si128 (),
5075                                                     __M);
5076 }
5077
5078 extern __inline __m128i
5079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5080 _mm512_cvtepi64_epi8 (__m512i __A)
5081 {
5082   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5083                                                   (__v16qi)
5084                                                   _mm_undefined_si128 (),
5085                                                   (__mmask8) -1);
5086 }
5087
5088 extern __inline void
5089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5090 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5091 {
5092   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5093 }
5094
5095 extern __inline __m128i
5096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5097 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5098 {
5099   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5100                                                   (__v16qi) __O, __M);
5101 }
5102
5103 extern __inline __m128i
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5106 {
5107   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108                                                   (__v16qi)
5109                                                   _mm_setzero_si128 (),
5110                                                   __M);
5111 }
5112
5113 extern __inline __m128i
5114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5115 _mm512_cvtsepi64_epi8 (__m512i __A)
5116 {
5117   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5118                                                    (__v16qi)
5119                                                    _mm_undefined_si128 (),
5120                                                    (__mmask8) -1);
5121 }
5122
5123 extern __inline void
5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5126 {
5127   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5128 }
5129
5130 extern __inline __m128i
5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5133 {
5134   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5135                                                    (__v16qi) __O, __M);
5136 }
5137
5138 extern __inline __m128i
5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5141 {
5142   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143                                                    (__v16qi)
5144                                                    _mm_setzero_si128 (),
5145                                                    __M);
5146 }
5147
5148 extern __inline __m128i
5149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5150 _mm512_cvtusepi64_epi8 (__m512i __A)
5151 {
5152   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5153                                                     (__v16qi)
5154                                                     _mm_undefined_si128 (),
5155                                                     (__mmask8) -1);
5156 }
5157
5158 extern __inline void
5159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5160 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5161 {
5162   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5163 }
5164
5165 extern __inline __m128i
5166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5167 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5168 {
5169   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5170                                                     (__v16qi) __O,
5171                                                     __M);
5172 }
5173
5174 extern __inline __m128i
5175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5177 {
5178   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5179                                                     (__v16qi)
5180                                                     _mm_setzero_si128 (),
5181                                                     __M);
5182 }
5183
5184 extern __inline __m512d
5185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5186 _mm512_cvtepi32_pd (__m256i __A)
5187 {
5188   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5189                                                     (__v8df)
5190                                                     _mm512_undefined_pd (),
5191                                                     (__mmask8) -1);
5192 }
5193
5194 extern __inline __m512d
5195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5196 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5197 {
5198   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5199                                                     (__v8df) __W,
5200                                                     (__mmask8) __U);
5201 }
5202
5203 extern __inline __m512d
5204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5206 {
5207   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5208                                                     (__v8df)
5209                                                     _mm512_setzero_pd (),
5210                                                     (__mmask8) __U);
5211 }
5212
5213 extern __inline __m512d
5214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215 _mm512_cvtepu32_pd (__m256i __A)
5216 {
5217   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5218                                                      (__v8df)
5219                                                      _mm512_undefined_pd (),
5220                                                      (__mmask8) -1);
5221 }
5222
5223 extern __inline __m512d
5224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5226 {
5227   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5228                                                      (__v8df) __W,
5229                                                      (__mmask8) __U);
5230 }
5231
5232 extern __inline __m512d
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5235 {
5236   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5237                                                      (__v8df)
5238                                                      _mm512_setzero_pd (),
5239                                                      (__mmask8) __U);
5240 }
5241
5242 #ifdef __OPTIMIZE__
5243 extern __inline __m512
5244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5245 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5246 {
5247   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5248                                                    (__v16sf)
5249                                                    _mm512_undefined_ps (),
5250                                                    (__mmask16) -1, __R);
5251 }
5252
5253 extern __inline __m512
5254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5255 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5256                                const int __R)
5257 {
5258   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5259                                                    (__v16sf) __W,
5260                                                    (__mmask16) __U, __R);
5261 }
5262
5263 extern __inline __m512
5264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5265 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5266 {
5267   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5268                                                    (__v16sf)
5269                                                    _mm512_setzero_ps (),
5270                                                    (__mmask16) __U, __R);
5271 }
5272
5273 extern __inline __m512
5274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5275 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5276 {
5277   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5278                                                     (__v16sf)
5279                                                     _mm512_undefined_ps (),
5280                                                     (__mmask16) -1, __R);
5281 }
5282
5283 extern __inline __m512
5284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5286                                const int __R)
5287 {
5288   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5289                                                     (__v16sf) __W,
5290                                                     (__mmask16) __U, __R);
5291 }
5292
5293 extern __inline __m512
5294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5296 {
5297   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5298                                                     (__v16sf)
5299                                                     _mm512_setzero_ps (),
5300                                                     (__mmask16) __U, __R);
5301 }
5302
5303 #else
5304 #define _mm512_cvt_roundepi32_ps(A, B)        \
5305     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5306
5307 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
5308     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5309
5310 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
5311     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5312
5313 #define _mm512_cvt_roundepu32_ps(A, B)        \
5314     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5315
5316 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
5317     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5318
5319 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
5320     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5321 #endif
5322
5323 #ifdef __OPTIMIZE__
5324 extern __inline __m256d
5325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5326 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5327 {
5328   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5329                                                      __imm,
5330                                                      (__v4df)
5331                                                      _mm256_undefined_pd (),
5332                                                      (__mmask8) -1);
5333 }
5334
5335 extern __inline __m256d
5336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5338                              const int __imm)
5339 {
5340   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5341                                                      __imm,
5342                                                      (__v4df) __W,
5343                                                      (__mmask8) __U);
5344 }
5345
5346 extern __inline __m256d
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5349 {
5350   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5351                                                      __imm,
5352                                                      (__v4df)
5353                                                      _mm256_setzero_pd (),
5354                                                      (__mmask8) __U);
5355 }
5356
5357 extern __inline __m128
5358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5359 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5360 {
5361   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5362                                                     __imm,
5363                                                     (__v4sf)
5364                                                     _mm_undefined_ps (),
5365                                                     (__mmask8) -1);
5366 }
5367
5368 extern __inline __m128
5369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5371                              const int __imm)
5372 {
5373   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5374                                                     __imm,
5375                                                     (__v4sf) __W,
5376                                                     (__mmask8) __U);
5377 }
5378
5379 extern __inline __m128
5380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5382 {
5383   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5384                                                     __imm,
5385                                                     (__v4sf)
5386                                                     _mm_setzero_ps (),
5387                                                     (__mmask8) __U);
5388 }
5389
5390 extern __inline __m256i
5391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5393 {
5394   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5395                                                      __imm,
5396                                                      (__v4di)
5397                                                      _mm256_undefined_si256 (),
5398                                                      (__mmask8) -1);
5399 }
5400
5401 extern __inline __m256i
5402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5404                                 const int __imm)
5405 {
5406   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5407                                                      __imm,
5408                                                      (__v4di) __W,
5409                                                      (__mmask8) __U);
5410 }
5411
5412 extern __inline __m256i
5413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5414 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5415 {
5416   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5417                                                      __imm,
5418                                                      (__v4di)
5419                                                      _mm256_setzero_si256 (),
5420                                                      (__mmask8) __U);
5421 }
5422
5423 extern __inline __m128i
5424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5426 {
5427   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5428                                                      __imm,
5429                                                      (__v4si)
5430                                                      _mm_undefined_si128 (),
5431                                                      (__mmask8) -1);
5432 }
5433
5434 extern __inline __m128i
5435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5436 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5437                                 const int __imm)
5438 {
5439   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5440                                                      __imm,
5441                                                      (__v4si) __W,
5442                                                      (__mmask8) __U);
5443 }
5444
5445 extern __inline __m128i
5446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5447 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5448 {
5449   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5450                                                      __imm,
5451                                                      (__v4si)
5452                                                      _mm_setzero_si128 (),
5453                                                      (__mmask8) __U);
5454 }
5455 #else
5456
5457 #define _mm512_extractf64x4_pd(X, C)                                    \
5458   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5459     (int) (C),\
5460     (__v4df)(__m256d)_mm256_undefined_pd(),\
5461     (__mmask8)-1))
5462
5463 #define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
5464   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5465     (int) (C),\
5466     (__v4df)(__m256d)(W),\
5467     (__mmask8)(U)))
5468
5469 #define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
5470   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5471     (int) (C),\
5472     (__v4df)(__m256d)_mm256_setzero_pd(),\
5473     (__mmask8)(U)))
5474
5475 #define _mm512_extractf32x4_ps(X, C)                                    \
5476   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5477     (int) (C),\
5478     (__v4sf)(__m128)_mm_undefined_ps(),\
5479     (__mmask8)-1))
5480
5481 #define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
5482   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5483     (int) (C),\
5484     (__v4sf)(__m128)(W),\
5485     (__mmask8)(U)))
5486
5487 #define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
5488   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5489     (int) (C),\
5490     (__v4sf)(__m128)_mm_setzero_ps(),\
5491     (__mmask8)(U)))
5492
5493 #define _mm512_extracti64x4_epi64(X, C)                                 \
5494   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5495     (int) (C),\
5496     (__v4di)(__m256i)_mm256_undefined_si256 (),\
5497     (__mmask8)-1))
5498
5499 #define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
5500   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5501     (int) (C),\
5502     (__v4di)(__m256i)(W),\
5503     (__mmask8)(U)))
5504
5505 #define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
5506   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5507     (int) (C),\
5508     (__v4di)(__m256i)_mm256_setzero_si256 (),\
5509     (__mmask8)(U)))
5510
5511 #define _mm512_extracti32x4_epi32(X, C)                                 \
5512   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5513     (int) (C),\
5514     (__v4si)(__m128i)_mm_undefined_si128 (),\
5515     (__mmask8)-1))
5516
5517 #define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
5518   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5519     (int) (C),\
5520     (__v4si)(__m128i)(W),\
5521     (__mmask8)(U)))
5522
5523 #define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
5524   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5525     (int) (C),\
5526     (__v4si)(__m128i)_mm_setzero_si128 (),\
5527     (__mmask8)(U)))
5528 #endif
5529
5530 #ifdef __OPTIMIZE__
5531 extern __inline __m512i
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5534 {
5535   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5536                                                     (__v4si) __B,
5537                                                     __imm,
5538                                                     (__v16si) __A, -1);
5539 }
5540
5541 extern __inline __m512
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5544 {
5545   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5546                                                    (__v4sf) __B,
5547                                                    __imm,
5548                                                    (__v16sf) __A, -1);
5549 }
5550
5551 extern __inline __m512i
5552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5554 {
5555   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5556                                                     (__v4di) __B,
5557                                                     __imm,
5558                                                     (__v8di)
5559                                                     _mm512_undefined_si512 (),
5560                                                     (__mmask8) -1);
5561 }
5562
5563 extern __inline __m512i
5564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5565 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5566                          __m256i __B, const int __imm)
5567 {
5568   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5569                                                     (__v4di) __B,
5570                                                     __imm,
5571                                                     (__v8di) __W,
5572                                                     (__mmask8) __U);
5573 }
5574
5575 extern __inline __m512i
5576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5578                           const int __imm)
5579 {
5580   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5581                                                     (__v4di) __B,
5582                                                     __imm,
5583                                                     (__v8di)
5584                                                     _mm512_setzero_si512 (),
5585                                                     (__mmask8) __U);
5586 }
5587
5588 extern __inline __m512d
5589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5590 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5591 {
5592   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5593                                                     (__v4df) __B,
5594                                                     __imm,
5595                                                     (__v8df)
5596                                                     _mm512_undefined_pd (),
5597                                                     (__mmask8) -1);
5598 }
5599
5600 extern __inline __m512d
5601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5603                          __m256d __B, const int __imm)
5604 {
5605   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5606                                                     (__v4df) __B,
5607                                                     __imm,
5608                                                     (__v8df) __W,
5609                                                     (__mmask8) __U);
5610 }
5611
5612 extern __inline __m512d
5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5615                           const int __imm)
5616 {
5617   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5618                                                     (__v4df) __B,
5619                                                     __imm,
5620                                                     (__v8df)
5621                                                     _mm512_setzero_pd (),
5622                                                     (__mmask8) __U);
5623 }
5624 #else
5625 #define _mm512_insertf32x4(X, Y, C)                                     \
5626   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
5627     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5628
5629 #define _mm512_inserti32x4(X, Y, C)                                     \
5630   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
5631     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5632
5633 #define _mm512_insertf64x4(X, Y, C)                                     \
5634   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5635     (__v4df)(__m256d) (Y), (int) (C),                                   \
5636     (__v8df)(__m512d)_mm512_undefined_pd(),                             \
5637     (__mmask8)-1))
5638
5639 #define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
5640   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5641     (__v4df)(__m256d) (Y), (int) (C),                                   \
5642     (__v8df)(__m512d)(W),                                               \
5643     (__mmask8)(U)))
5644
5645 #define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
5646   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5647     (__v4df)(__m256d) (Y), (int) (C),                                   \
5648     (__v8df)(__m512d)_mm512_setzero_pd(),                               \
5649     (__mmask8)(U)))
5650
5651 #define _mm512_inserti64x4(X, Y, C)                                     \
5652   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5653     (__v4di)(__m256i) (Y), (int) (C),                                   \
5654     (__v8di)(__m512i)_mm512_undefined_si512 (),                         \
5655     (__mmask8)-1))
5656
5657 #define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
5658   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5659     (__v4di)(__m256i) (Y), (int) (C),\
5660     (__v8di)(__m512i)(W),\
5661     (__mmask8)(U)))
5662
5663 #define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
5664   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5665     (__v4di)(__m256i) (Y), (int) (C),                                   \
5666     (__v8di)(__m512i)_mm512_setzero_si512 (),                           \
5667     (__mmask8)(U)))
5668 #endif
5669
5670 extern __inline __m512d
5671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5672 _mm512_loadu_pd (void const *__P)
5673 {
5674   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5675                                                    (__v8df)
5676                                                    _mm512_undefined_pd (),
5677                                                    (__mmask8) -1);
5678 }
5679
5680 extern __inline __m512d
5681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5683 {
5684   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5685                                                    (__v8df) __W,
5686                                                    (__mmask8) __U);
5687 }
5688
5689 extern __inline __m512d
5690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5692 {
5693   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5694                                                    (__v8df)
5695                                                    _mm512_setzero_pd (),
5696                                                    (__mmask8) __U);
5697 }
5698
5699 extern __inline void
5700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5701 _mm512_storeu_pd (void *__P, __m512d __A)
5702 {
5703   __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5704                                    (__mmask8) -1);
5705 }
5706
5707 extern __inline void
5708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5709 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5710 {
5711   __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5712                                    (__mmask8) __U);
5713 }
5714
5715 extern __inline __m512
5716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5717 _mm512_loadu_ps (void const *__P)
5718 {
5719   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5720                                                   (__v16sf)
5721                                                   _mm512_undefined_ps (),
5722                                                   (__mmask16) -1);
5723 }
5724
5725 extern __inline __m512
5726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5727 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5728 {
5729   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5730                                                   (__v16sf) __W,
5731                                                   (__mmask16) __U);
5732 }
5733
5734 extern __inline __m512
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5737 {
5738   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5739                                                   (__v16sf)
5740                                                   _mm512_setzero_ps (),
5741                                                   (__mmask16) __U);
5742 }
5743
5744 extern __inline void
5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746 _mm512_storeu_ps (void *__P, __m512 __A)
5747 {
5748   __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5749                                    (__mmask16) -1);
5750 }
5751
5752 extern __inline void
5753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5755 {
5756   __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5757                                    (__mmask16) __U);
5758 }
5759
5760 extern __inline __m512i
5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5763 {
5764   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5765                                                      (__v8di) __W,
5766                                                      (__mmask8) __U);
5767 }
5768
5769 extern __inline __m512i
5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5772 {
5773   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5774                                                      (__v8di)
5775                                                      _mm512_setzero_si512 (),
5776                                                      (__mmask8) __U);
5777 }
5778
5779 extern __inline void
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5782 {
5783   __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5784                                      (__mmask8) __U);
5785 }
5786
5787 extern __inline __m512i
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_loadu_si512 (void const *__P)
5790 {
5791   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5792                                                      (__v16si)
5793                                                      _mm512_setzero_si512 (),
5794                                                      (__mmask16) -1);
5795 }
5796
5797 extern __inline __m512i
5798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5800 {
5801   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5802                                                      (__v16si) __W,
5803                                                      (__mmask16) __U);
5804 }
5805
5806 extern __inline __m512i
5807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5808 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5809 {
5810   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5811                                                      (__v16si)
5812                                                      _mm512_setzero_si512 (),
5813                                                      (__mmask16) __U);
5814 }
5815
5816 extern __inline void
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 _mm512_storeu_si512 (void *__P, __m512i __A)
5819 {
5820   __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5821                                      (__mmask16) -1);
5822 }
5823
5824 extern __inline void
5825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5826 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5827 {
5828   __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5829                                      (__mmask16) __U);
5830 }
5831
5832 extern __inline __m512d
5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5835 {
5836   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5837                                                         (__v8di) __C,
5838                                                         (__v8df)
5839                                                         _mm512_undefined_pd (),
5840                                                         (__mmask8) -1);
5841 }
5842
5843 extern __inline __m512d
5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5846 {
5847   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5848                                                         (__v8di) __C,
5849                                                         (__v8df) __W,
5850                                                         (__mmask8) __U);
5851 }
5852
5853 extern __inline __m512d
5854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5855 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5856 {
5857   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5858                                                         (__v8di) __C,
5859                                                         (__v8df)
5860                                                         _mm512_setzero_pd (),
5861                                                         (__mmask8) __U);
5862 }
5863
5864 extern __inline __m512
5865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5866 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5867 {
5868   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5869                                                        (__v16si) __C,
5870                                                        (__v16sf)
5871                                                        _mm512_undefined_ps (),
5872                                                        (__mmask16) -1);
5873 }
5874
5875 extern __inline __m512
5876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5878 {
5879   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5880                                                        (__v16si) __C,
5881                                                        (__v16sf) __W,
5882                                                        (__mmask16) __U);
5883 }
5884
5885 extern __inline __m512
5886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5887 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5888 {
5889   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5890                                                        (__v16si) __C,
5891                                                        (__v16sf)
5892                                                        _mm512_setzero_ps (),
5893                                                        (__mmask16) __U);
5894 }
5895
5896 extern __inline __m512i
5897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5898 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5899 {
5900   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5901                                                        /* idx */ ,
5902                                                        (__v8di) __A,
5903                                                        (__v8di) __B,
5904                                                        (__mmask8) -1);
5905 }
5906
5907 extern __inline __m512i
5908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5909 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5910                                 __m512i __B)
5911 {
5912   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5913                                                        /* idx */ ,
5914                                                        (__v8di) __A,
5915                                                        (__v8di) __B,
5916                                                        (__mmask8) __U);
5917 }
5918
5919 extern __inline __m512i
5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5922                                  __mmask8 __U, __m512i __B)
5923 {
5924   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5925                                                        (__v8di) __I
5926                                                        /* idx */ ,
5927                                                        (__v8di) __B,
5928                                                        (__mmask8) __U);
5929 }
5930
5931 extern __inline __m512i
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5934                                  __m512i __I, __m512i __B)
5935 {
5936   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5937                                                         /* idx */ ,
5938                                                         (__v8di) __A,
5939                                                         (__v8di) __B,
5940                                                         (__mmask8) __U);
5941 }
5942
5943 extern __inline __m512i
5944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5945 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5946 {
5947   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5948                                                        /* idx */ ,
5949                                                        (__v16si) __A,
5950                                                        (__v16si) __B,
5951                                                        (__mmask16) -1);
5952 }
5953
5954 extern __inline __m512i
5955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5956 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5957                                 __m512i __I, __m512i __B)
5958 {
5959   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5960                                                        /* idx */ ,
5961                                                        (__v16si) __A,
5962                                                        (__v16si) __B,
5963                                                        (__mmask16) __U);
5964 }
5965
5966 extern __inline __m512i
5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5969                                  __mmask16 __U, __m512i __B)
5970 {
5971   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5972                                                        (__v16si) __I
5973                                                        /* idx */ ,
5974                                                        (__v16si) __B,
5975                                                        (__mmask16) __U);
5976 }
5977
5978 extern __inline __m512i
5979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5981                                  __m512i __I, __m512i __B)
5982 {
5983   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5984                                                         /* idx */ ,
5985                                                         (__v16si) __A,
5986                                                         (__v16si) __B,
5987                                                         (__mmask16) __U);
5988 }
5989
5990 extern __inline __m512d
5991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5993 {
5994   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5995                                                         /* idx */ ,
5996                                                         (__v8df) __A,
5997                                                         (__v8df) __B,
5998                                                         (__mmask8) -1);
5999 }
6000
6001 extern __inline __m512d
6002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6003 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6004                              __m512d __B)
6005 {
6006   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6007                                                         /* idx */ ,
6008                                                         (__v8df) __A,
6009                                                         (__v8df) __B,
6010                                                         (__mmask8) __U);
6011 }
6012
6013 extern __inline __m512d
6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6016                               __m512d __B)
6017 {
6018   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6019                                                         (__v8di) __I
6020                                                         /* idx */ ,
6021                                                         (__v8df) __B,
6022                                                         (__mmask8) __U);
6023 }
6024
6025 extern __inline __m512d
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6028                               __m512d __B)
6029 {
6030   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6031                                                          /* idx */ ,
6032                                                          (__v8df) __A,
6033                                                          (__v8df) __B,
6034                                                          (__mmask8) __U);
6035 }
6036
6037 extern __inline __m512
6038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6040 {
6041   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6042                                                        /* idx */ ,
6043                                                        (__v16sf) __A,
6044                                                        (__v16sf) __B,
6045                                                        (__mmask16) -1);
6046 }
6047
6048 extern __inline __m512
6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6051 {
6052   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6053                                                        /* idx */ ,
6054                                                        (__v16sf) __A,
6055                                                        (__v16sf) __B,
6056                                                        (__mmask16) __U);
6057 }
6058
6059 extern __inline __m512
6060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6061 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6062                               __m512 __B)
6063 {
6064   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6065                                                        (__v16si) __I
6066                                                        /* idx */ ,
6067                                                        (__v16sf) __B,
6068                                                        (__mmask16) __U);
6069 }
6070
6071 extern __inline __m512
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6074                               __m512 __B)
6075 {
6076   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6077                                                         /* idx */ ,
6078                                                         (__v16sf) __A,
6079                                                         (__v16sf) __B,
6080                                                         (__mmask16) __U);
6081 }
6082
6083 #ifdef __OPTIMIZE__
6084 extern __inline __m512d
6085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6086 _mm512_permute_pd (__m512d __X, const int __C)
6087 {
6088   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6089                                                      (__v8df)
6090                                                      _mm512_undefined_pd (),
6091                                                      (__mmask8) -1);
6092 }
6093
6094 extern __inline __m512d
6095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6097 {
6098   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6099                                                      (__v8df) __W,
6100                                                      (__mmask8) __U);
6101 }
6102
6103 extern __inline __m512d
6104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6105 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6106 {
6107   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6108                                                      (__v8df)
6109                                                      _mm512_setzero_pd (),
6110                                                      (__mmask8) __U);
6111 }
6112
6113 extern __inline __m512
6114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115 _mm512_permute_ps (__m512 __X, const int __C)
6116 {
6117   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6118                                                     (__v16sf)
6119                                                     _mm512_undefined_ps (),
6120                                                     (__mmask16) -1);
6121 }
6122
6123 extern __inline __m512
6124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6126 {
6127   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6128                                                     (__v16sf) __W,
6129                                                     (__mmask16) __U);
6130 }
6131
6132 extern __inline __m512
6133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6134 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6135 {
6136   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6137                                                     (__v16sf)
6138                                                     _mm512_setzero_ps (),
6139                                                     (__mmask16) __U);
6140 }
6141 #else
6142 #define _mm512_permute_pd(X, C)                                                     \
6143   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6144                                               (__v8df)(__m512d)_mm512_undefined_pd(),\
6145                                               (__mmask8)(-1)))
6146
6147 #define _mm512_mask_permute_pd(W, U, X, C)                                          \
6148   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6149                                               (__v8df)(__m512d)(W),                 \
6150                                               (__mmask8)(U)))
6151
6152 #define _mm512_maskz_permute_pd(U, X, C)                                            \
6153   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6154                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
6155                                               (__mmask8)(U)))
6156
6157 #define _mm512_permute_ps(X, C)                                                     \
6158   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6159                                               (__v16sf)(__m512)_mm512_undefined_ps(),\
6160                                               (__mmask16)(-1)))
6161
6162 #define _mm512_mask_permute_ps(W, U, X, C)                                          \
6163   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6164                                               (__v16sf)(__m512)(W),                 \
6165                                               (__mmask16)(U)))
6166
6167 #define _mm512_maskz_permute_ps(U, X, C)                                            \
6168   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6169                                               (__v16sf)(__m512)_mm512_setzero_ps(), \
6170                                               (__mmask16)(U)))
6171 #endif
6172
6173 #ifdef __OPTIMIZE__
6174 extern __inline __m512i
6175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176 _mm512_permutex_epi64 (__m512i __X, const int __I)
6177 {
6178   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6179                                                   (__v8di)
6180                                                   _mm512_undefined_si512 (),
6181                                                   (__mmask8) (-1));
6182 }
6183
6184 extern __inline __m512i
6185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6186 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6187                             __m512i __X, const int __I)
6188 {
6189   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6190                                                   (__v8di) __W,
6191                                                   (__mmask8) __M);
6192 }
6193
6194 extern __inline __m512i
6195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6197 {
6198   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6199                                                   (__v8di)
6200                                                   _mm512_setzero_si512 (),
6201                                                   (__mmask8) __M);
6202 }
6203
6204 extern __inline __m512d
6205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6206 _mm512_permutex_pd (__m512d __X, const int __M)
6207 {
6208   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6209                                                   (__v8df)
6210                                                   _mm512_undefined_pd (),
6211                                                   (__mmask8) -1);
6212 }
6213
6214 extern __inline __m512d
6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6217 {
6218   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6219                                                   (__v8df) __W,
6220                                                   (__mmask8) __U);
6221 }
6222
6223 extern __inline __m512d
6224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6225 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6226 {
6227   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6228                                                   (__v8df)
6229                                                   _mm512_setzero_pd (),
6230                                                   (__mmask8) __U);
6231 }
6232 #else
6233 #define _mm512_permutex_pd(X, M)                                                \
6234   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6235                                             (__v8df)(__m512d)_mm512_undefined_pd(),\
6236                                             (__mmask8)-1))
6237
6238 #define _mm512_mask_permutex_pd(W, U, X, M)                                     \
6239   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6240                                             (__v8df)(__m512d)(W), (__mmask8)(U)))
6241
6242 #define _mm512_maskz_permutex_pd(U, X, M)                                       \
6243   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6244                                             (__v8df)(__m512d)_mm512_setzero_pd(),\
6245                                             (__mmask8)(U)))
6246
6247 #define _mm512_permutex_epi64(X, I)                               \
6248   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6249                                             (int)(I),             \
6250                                             (__v8di)(__m512i)     \
6251                                             (_mm512_undefined_si512 ()),\
6252                                             (__mmask8)(-1)))
6253
6254 #define _mm512_maskz_permutex_epi64(M, X, I)                 \
6255   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6256                                             (int)(I),             \
6257                                             (__v8di)(__m512i)     \
6258                                             (_mm512_setzero_si512 ()),\
6259                                             (__mmask8)(M)))
6260
6261 #define _mm512_mask_permutex_epi64(W, M, X, I)               \
6262   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6263                                             (int)(I),             \
6264                                             (__v8di)(__m512i)(W), \
6265                                             (__mmask8)(M)))
6266 #endif
6267
6268 extern __inline __m512i
6269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6271 {
6272   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6273                                                      (__v8di) __X,
6274                                                      (__v8di)
6275                                                      _mm512_setzero_si512 (),
6276                                                      __M);
6277 }
6278
6279 extern __inline __m512i
6280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6281 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6282 {
6283   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6284                                                      (__v8di) __X,
6285                                                      (__v8di)
6286                                                      _mm512_undefined_si512 (),
6287                                                      (__mmask8) -1);
6288 }
6289
6290 extern __inline __m512i
6291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6292 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6293                                __m512i __Y)
6294 {
6295   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6296                                                      (__v8di) __X,
6297                                                      (__v8di) __W,
6298                                                      __M);
6299 }
6300
6301 extern __inline __m512i
6302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6303 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6304 {
6305   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6306                                                      (__v16si) __X,
6307                                                      (__v16si)
6308                                                      _mm512_setzero_si512 (),
6309                                                      __M);
6310 }
6311
6312 extern __inline __m512i
6313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6315 {
6316   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6317                                                      (__v16si) __X,
6318                                                      (__v16si)
6319                                                      _mm512_undefined_si512 (),
6320                                                      (__mmask16) -1);
6321 }
6322
6323 extern __inline __m512i
6324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6326                                __m512i __Y)
6327 {
6328   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6329                                                      (__v16si) __X,
6330                                                      (__v16si) __W,
6331                                                      __M);
6332 }
6333
6334 extern __inline __m512d
6335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6337 {
6338   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6339                                                      (__v8di) __X,
6340                                                      (__v8df)
6341                                                      _mm512_undefined_pd (),
6342                                                      (__mmask8) -1);
6343 }
6344
6345 extern __inline __m512d
6346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6347 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6348 {
6349   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6350                                                      (__v8di) __X,
6351                                                      (__v8df) __W,
6352                                                      (__mmask8) __U);
6353 }
6354
6355 extern __inline __m512d
6356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6358 {
6359   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6360                                                      (__v8di) __X,
6361                                                      (__v8df)
6362                                                      _mm512_setzero_pd (),
6363                                                      (__mmask8) __U);
6364 }
6365
6366 extern __inline __m512
6367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6368 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6369 {
6370   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6371                                                     (__v16si) __X,
6372                                                     (__v16sf)
6373                                                     _mm512_undefined_ps (),
6374                                                     (__mmask16) -1);
6375 }
6376
6377 extern __inline __m512
6378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6379 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6380 {
6381   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6382                                                     (__v16si) __X,
6383                                                     (__v16sf) __W,
6384                                                     (__mmask16) __U);
6385 }
6386
6387 extern __inline __m512
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6390 {
6391   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6392                                                     (__v16si) __X,
6393                                                     (__v16sf)
6394                                                     _mm512_setzero_ps (),
6395                                                     (__mmask16) __U);
6396 }
6397
6398 #ifdef __OPTIMIZE__
6399 extern __inline __m512
6400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6402 {
6403   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6404                                                  (__v16sf) __V, __imm,
6405                                                  (__v16sf)
6406                                                  _mm512_undefined_ps (),
6407                                                  (__mmask16) -1);
6408 }
6409
6410 extern __inline __m512
6411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6413                         __m512 __V, const int __imm)
6414 {
6415   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6416                                                  (__v16sf) __V, __imm,
6417                                                  (__v16sf) __W,
6418                                                  (__mmask16) __U);
6419 }
6420
6421 extern __inline __m512
6422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6424 {
6425   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6426                                                  (__v16sf) __V, __imm,
6427                                                  (__v16sf)
6428                                                  _mm512_setzero_ps (),
6429                                                  (__mmask16) __U);
6430 }
6431
6432 extern __inline __m512d
6433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6435 {
6436   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6437                                                   (__v8df) __V, __imm,
6438                                                   (__v8df)
6439                                                   _mm512_undefined_pd (),
6440                                                   (__mmask8) -1);
6441 }
6442
6443 extern __inline __m512d
6444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6446                         __m512d __V, const int __imm)
6447 {
6448   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6449                                                   (__v8df) __V, __imm,
6450                                                   (__v8df) __W,
6451                                                   (__mmask8) __U);
6452 }
6453
6454 extern __inline __m512d
6455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6457                          const int __imm)
6458 {
6459   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6460                                                   (__v8df) __V, __imm,
6461                                                   (__v8df)
6462                                                   _mm512_setzero_pd (),
6463                                                   (__mmask8) __U);
6464 }
6465
6466 extern __inline __m512d
6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6469                           const int __imm, const int __R)
6470 {
6471   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6472                                                       (__v8df) __B,
6473                                                       (__v8di) __C,
6474                                                       __imm,
6475                                                       (__mmask8) -1, __R);
6476 }
6477
6478 extern __inline __m512d
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6481                                __m512i __C, const int __imm, const int __R)
6482 {
6483   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6484                                                       (__v8df) __B,
6485                                                       (__v8di) __C,
6486                                                       __imm,
6487                                                       (__mmask8) __U, __R);
6488 }
6489
6490 extern __inline __m512d
6491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6493                                 __m512i __C, const int __imm, const int __R)
6494 {
6495   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6496                                                        (__v8df) __B,
6497                                                        (__v8di) __C,
6498                                                        __imm,
6499                                                        (__mmask8) __U, __R);
6500 }
6501
6502 extern __inline __m512
6503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6505                           const int __imm, const int __R)
6506 {
6507   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6508                                                      (__v16sf) __B,
6509                                                      (__v16si) __C,
6510                                                      __imm,
6511                                                      (__mmask16) -1, __R);
6512 }
6513
6514 extern __inline __m512
6515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6517                                __m512i __C, const int __imm, const int __R)
6518 {
6519   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6520                                                      (__v16sf) __B,
6521                                                      (__v16si) __C,
6522                                                      __imm,
6523                                                      (__mmask16) __U, __R);
6524 }
6525
6526 extern __inline __m512
6527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6528 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6529                                 __m512i __C, const int __imm, const int __R)
6530 {
6531   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6532                                                       (__v16sf) __B,
6533                                                       (__v16si) __C,
6534                                                       __imm,
6535                                                       (__mmask16) __U, __R);
6536 }
6537
6538 extern __inline __m128d
6539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6541                        const int __imm, const int __R)
6542 {
6543   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6544                                                    (__v2df) __B,
6545                                                    (__v2di) __C, __imm,
6546                                                    (__mmask8) -1, __R);
6547 }
6548
6549 extern __inline __m128d
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6552                             __m128i __C, const int __imm, const int __R)
6553 {
6554   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6555                                                    (__v2df) __B,
6556                                                    (__v2di) __C, __imm,
6557                                                    (__mmask8) __U, __R);
6558 }
6559
6560 extern __inline __m128d
6561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6563                              __m128i __C, const int __imm, const int __R)
6564 {
6565   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6566                                                     (__v2df) __B,
6567                                                     (__v2di) __C,
6568                                                     __imm,
6569                                                     (__mmask8) __U, __R);
6570 }
6571
6572 extern __inline __m128
6573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6574 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6575                        const int __imm, const int __R)
6576 {
6577   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6578                                                   (__v4sf) __B,
6579                                                   (__v4si) __C, __imm,
6580                                                   (__mmask8) -1, __R);
6581 }
6582
6583 extern __inline __m128
6584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6585 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6586                             __m128i __C, const int __imm, const int __R)
6587 {
6588   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6589                                                   (__v4sf) __B,
6590                                                   (__v4si) __C, __imm,
6591                                                   (__mmask8) __U, __R);
6592 }
6593
6594 extern __inline __m128
6595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6597                              __m128i __C, const int __imm, const int __R)
6598 {
6599   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6600                                                    (__v4sf) __B,
6601                                                    (__v4si) __C, __imm,
6602                                                    (__mmask8) __U, __R);
6603 }
6604
6605 #else
6606 #define _mm512_shuffle_pd(X, Y, C)                                      \
6607     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6608         (__v8df)(__m512d)(Y), (int)(C),\
6609     (__v8df)(__m512d)_mm512_undefined_pd(),\
6610     (__mmask8)-1))
6611
6612 #define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
6613     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6614         (__v8df)(__m512d)(Y), (int)(C),\
6615     (__v8df)(__m512d)(W),\
6616     (__mmask8)(U)))
6617
6618 #define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
6619     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6620         (__v8df)(__m512d)(Y), (int)(C),\
6621     (__v8df)(__m512d)_mm512_setzero_pd(),\
6622     (__mmask8)(U)))
6623
6624 #define _mm512_shuffle_ps(X, Y, C)                                      \
6625     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6626         (__v16sf)(__m512)(Y), (int)(C),\
6627     (__v16sf)(__m512)_mm512_undefined_ps(),\
6628     (__mmask16)-1))
6629
6630 #define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
6631     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6632         (__v16sf)(__m512)(Y), (int)(C),\
6633     (__v16sf)(__m512)(W),\
6634     (__mmask16)(U)))
6635
6636 #define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
6637     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6638         (__v16sf)(__m512)(Y), (int)(C),\
6639     (__v16sf)(__m512)_mm512_setzero_ps(),\
6640     (__mmask16)(U)))
6641
6642 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R)                                 \
6643   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
6644       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6645       (__mmask8)(-1), (R)))
6646
6647 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
6648   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
6649       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6650       (__mmask8)(U), (R)))
6651
6652 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
6653   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
6654       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6655       (__mmask8)(U), (R)))
6656
6657 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R)                                 \
6658   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
6659     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6660     (__mmask16)(-1), (R)))
6661
6662 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
6663   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
6664     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6665     (__mmask16)(U), (R)))
6666
6667 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
6668   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
6669     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6670     (__mmask16)(U), (R)))
6671
6672 #define _mm_fixupimm_round_sd(X, Y, Z, C, R)                                    \
6673     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
6674       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
6675       (__mmask8)(-1), (R)))
6676
6677 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)                            \
6678     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
6679       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
6680       (__mmask8)(U), (R)))
6681
6682 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)                           \
6683     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),    \
6684       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
6685       (__mmask8)(U), (R)))
6686
6687 #define _mm_fixupimm_round_ss(X, Y, Z, C, R)                                    \
6688     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
6689       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
6690       (__mmask8)(-1), (R)))
6691
6692 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)                            \
6693     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
6694       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
6695       (__mmask8)(U), (R)))
6696
6697 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)                           \
6698     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),      \
6699       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
6700       (__mmask8)(U), (R)))
6701 #endif
6702
6703 extern __inline __m512
6704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705 _mm512_movehdup_ps (__m512 __A)
6706 {
6707   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6708                                                    (__v16sf)
6709                                                    _mm512_undefined_ps (),
6710                                                    (__mmask16) -1);
6711 }
6712
6713 extern __inline __m512
6714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6716 {
6717   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6718                                                    (__v16sf) __W,
6719                                                    (__mmask16) __U);
6720 }
6721
6722 extern __inline __m512
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6725 {
6726   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6727                                                    (__v16sf)
6728                                                    _mm512_setzero_ps (),
6729                                                    (__mmask16) __U);
6730 }
6731
6732 extern __inline __m512
6733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734 _mm512_moveldup_ps (__m512 __A)
6735 {
6736   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6737                                                    (__v16sf)
6738                                                    _mm512_undefined_ps (),
6739                                                    (__mmask16) -1);
6740 }
6741
6742 extern __inline __m512
6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6745 {
6746   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6747                                                    (__v16sf) __W,
6748                                                    (__mmask16) __U);
6749 }
6750
6751 extern __inline __m512
6752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6754 {
6755   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6756                                                    (__v16sf)
6757                                                    _mm512_setzero_ps (),
6758                                                    (__mmask16) __U);
6759 }
6760
6761 extern __inline __m512i
6762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763 _mm512_or_si512 (__m512i __A, __m512i __B)
6764 {
6765   return (__m512i) ((__v16su) __A | (__v16su) __B);
6766 }
6767
6768 extern __inline __m512i
6769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6770 _mm512_or_epi32 (__m512i __A, __m512i __B)
6771 {
6772   return (__m512i) ((__v16su) __A | (__v16su) __B);
6773 }
6774
6775 extern __inline __m512i
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6778 {
6779   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6780                                                 (__v16si) __B,
6781                                                 (__v16si) __W,
6782                                                 (__mmask16) __U);
6783 }
6784
6785 extern __inline __m512i
6786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6787 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6788 {
6789   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6790                                                 (__v16si) __B,
6791                                                 (__v16si)
6792                                                 _mm512_setzero_si512 (),
6793                                                 (__mmask16) __U);
6794 }
6795
6796 extern __inline __m512i
6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798 _mm512_or_epi64 (__m512i __A, __m512i __B)
6799 {
6800   return (__m512i) ((__v8du) __A | (__v8du) __B);
6801 }
6802
6803 extern __inline __m512i
6804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6805 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6806 {
6807   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6808                                                 (__v8di) __B,
6809                                                 (__v8di) __W,
6810                                                 (__mmask8) __U);
6811 }
6812
6813 extern __inline __m512i
6814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6816 {
6817   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6818                                                 (__v8di) __B,
6819                                                 (__v8di)
6820                                                 _mm512_setzero_si512 (),
6821                                                 (__mmask8) __U);
6822 }
6823
6824 extern __inline __m512i
6825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826 _mm512_xor_si512 (__m512i __A, __m512i __B)
6827 {
6828   return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6829 }
6830
6831 extern __inline __m512i
6832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6833 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6834 {
6835   return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6836 }
6837
6838 extern __inline __m512i
6839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6840 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6841 {
6842   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6843                                                  (__v16si) __B,
6844                                                  (__v16si) __W,
6845                                                  (__mmask16) __U);
6846 }
6847
6848 extern __inline __m512i
6849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6850 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6851 {
6852   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6853                                                  (__v16si) __B,
6854                                                  (__v16si)
6855                                                  _mm512_setzero_si512 (),
6856                                                  (__mmask16) __U);
6857 }
6858
6859 extern __inline __m512i
6860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6861 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6862 {
6863   return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6864 }
6865
6866 extern __inline __m512i
6867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6868 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6869 {
6870   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6871                                                  (__v8di) __B,
6872                                                  (__v8di) __W,
6873                                                  (__mmask8) __U);
6874 }
6875
6876 extern __inline __m512i
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6879 {
6880   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6881                                                  (__v8di) __B,
6882                                                  (__v8di)
6883                                                  _mm512_setzero_si512 (),
6884                                                  (__mmask8) __U);
6885 }
6886
6887 #ifdef __OPTIMIZE__
6888 extern __inline __m512i
6889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890 _mm512_rol_epi32 (__m512i __A, const int __B)
6891 {
6892   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6893                                                  (__v16si)
6894                                                  _mm512_undefined_si512 (),
6895                                                  (__mmask16) -1);
6896 }
6897
6898 extern __inline __m512i
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6901 {
6902   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6903                                                  (__v16si) __W,
6904                                                  (__mmask16) __U);
6905 }
6906
6907 extern __inline __m512i
6908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6910 {
6911   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6912                                                  (__v16si)
6913                                                  _mm512_setzero_si512 (),
6914                                                  (__mmask16) __U);
6915 }
6916
6917 extern __inline __m512i
6918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919 _mm512_ror_epi32 (__m512i __A, int __B)
6920 {
6921   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6922                                                  (__v16si)
6923                                                  _mm512_undefined_si512 (),
6924                                                  (__mmask16) -1);
6925 }
6926
6927 extern __inline __m512i
6928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6929 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6930 {
6931   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6932                                                  (__v16si) __W,
6933                                                  (__mmask16) __U);
6934 }
6935
6936 extern __inline __m512i
6937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6938 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6939 {
6940   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6941                                                  (__v16si)
6942                                                  _mm512_setzero_si512 (),
6943                                                  (__mmask16) __U);
6944 }
6945
6946 extern __inline __m512i
6947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6948 _mm512_rol_epi64 (__m512i __A, const int __B)
6949 {
6950   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6951                                                  (__v8di)
6952                                                  _mm512_undefined_si512 (),
6953                                                  (__mmask8) -1);
6954 }
6955
6956 extern __inline __m512i
6957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6958 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6959 {
6960   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6961                                                  (__v8di) __W,
6962                                                  (__mmask8) __U);
6963 }
6964
6965 extern __inline __m512i
6966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6968 {
6969   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6970                                                  (__v8di)
6971                                                  _mm512_setzero_si512 (),
6972                                                  (__mmask8) __U);
6973 }
6974
6975 extern __inline __m512i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm512_ror_epi64 (__m512i __A, int __B)
6978 {
6979   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6980                                                  (__v8di)
6981                                                  _mm512_undefined_si512 (),
6982                                                  (__mmask8) -1);
6983 }
6984
6985 extern __inline __m512i
6986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6987 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6988 {
6989   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6990                                                  (__v8di) __W,
6991                                                  (__mmask8) __U);
6992 }
6993
6994 extern __inline __m512i
6995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6996 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6997 {
6998   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6999                                                  (__v8di)
7000                                                  _mm512_setzero_si512 (),
7001                                                  (__mmask8) __U);
7002 }
7003
7004 #else
7005 #define _mm512_rol_epi32(A, B)                                            \
7006     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7007                                             (int)(B),                     \
7008                                             (__v16si)_mm512_undefined_si512 (), \
7009                                             (__mmask16)(-1)))
7010 #define _mm512_mask_rol_epi32(W, U, A, B)                                 \
7011     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7012                                             (int)(B),                     \
7013                                             (__v16si)(__m512i)(W),        \
7014                                             (__mmask16)(U)))
7015 #define _mm512_maskz_rol_epi32(U, A, B)                                   \
7016     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7017                                             (int)(B),                     \
7018                                             (__v16si)_mm512_setzero_si512 (), \
7019                                             (__mmask16)(U)))
7020 #define _mm512_ror_epi32(A, B)                                            \
7021     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7022                                             (int)(B),                     \
7023                                             (__v16si)_mm512_undefined_si512 (), \
7024                                             (__mmask16)(-1)))
7025 #define _mm512_mask_ror_epi32(W, U, A, B)                                 \
7026     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7027                                             (int)(B),                     \
7028                                             (__v16si)(__m512i)(W),        \
7029                                             (__mmask16)(U)))
7030 #define _mm512_maskz_ror_epi32(U, A, B)                                   \
7031     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7032                                             (int)(B),                     \
7033                                             (__v16si)_mm512_setzero_si512 (), \
7034                                             (__mmask16)(U)))
7035 #define _mm512_rol_epi64(A, B)                                            \
7036     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7037                                             (int)(B),                     \
7038                                             (__v8di)_mm512_undefined_si512 (),  \
7039                                             (__mmask8)(-1)))
7040 #define _mm512_mask_rol_epi64(W, U, A, B)                                 \
7041     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7042                                             (int)(B),                     \
7043                                             (__v8di)(__m512i)(W),         \
7044                                             (__mmask8)(U)))
7045 #define _mm512_maskz_rol_epi64(U, A, B)                                   \
7046     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7047                                             (int)(B),                     \
7048                                             (__v8di)_mm512_setzero_si512 (),  \
7049                                             (__mmask8)(U)))
7050
7051 #define _mm512_ror_epi64(A, B)                                            \
7052     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7053                                             (int)(B),                     \
7054                                             (__v8di)_mm512_undefined_si512 (),  \
7055                                             (__mmask8)(-1)))
7056 #define _mm512_mask_ror_epi64(W, U, A, B)                                 \
7057     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7058                                             (int)(B),                     \
7059                                             (__v8di)(__m512i)(W),         \
7060                                             (__mmask8)(U)))
7061 #define _mm512_maskz_ror_epi64(U, A, B)                                   \
7062     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7063                                             (int)(B),                     \
7064                                             (__v8di)_mm512_setzero_si512 (),  \
7065                                             (__mmask8)(U)))
7066 #endif
7067
7068 extern __inline __m512i
7069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070 _mm512_and_si512 (__m512i __A, __m512i __B)
7071 {
7072   return (__m512i) ((__v16su) __A & (__v16su) __B);
7073 }
7074
7075 extern __inline __m512i
7076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7077 _mm512_and_epi32 (__m512i __A, __m512i __B)
7078 {
7079   return (__m512i) ((__v16su) __A & (__v16su) __B);
7080 }
7081
7082 extern __inline __m512i
7083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7084 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7085 {
7086   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7087                                                  (__v16si) __B,
7088                                                  (__v16si) __W,
7089                                                  (__mmask16) __U);
7090 }
7091
7092 extern __inline __m512i
7093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7094 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7095 {
7096   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7097                                                  (__v16si) __B,
7098                                                  (__v16si)
7099                                                  _mm512_setzero_si512 (),
7100                                                  (__mmask16) __U);
7101 }
7102
7103 extern __inline __m512i
7104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7105 _mm512_and_epi64 (__m512i __A, __m512i __B)
7106 {
7107   return (__m512i) ((__v8du) __A & (__v8du) __B);
7108 }
7109
7110 extern __inline __m512i
7111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7112 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7113 {
7114   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7115                                                  (__v8di) __B,
7116                                                  (__v8di) __W, __U);
7117 }
7118
7119 extern __inline __m512i
7120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7121 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7122 {
7123   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7124                                                  (__v8di) __B,
7125                                                  (__v8di)
7126                                                  _mm512_setzero_pd (),
7127                                                  __U);
7128 }
7129
7130 extern __inline __m512i
7131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7132 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7133 {
7134   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7135                                                   (__v16si) __B,
7136                                                   (__v16si)
7137                                                   _mm512_undefined_si512 (),
7138                                                   (__mmask16) -1);
7139 }
7140
7141 extern __inline __m512i
7142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7143 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7144 {
7145   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7146                                                   (__v16si) __B,
7147                                                   (__v16si)
7148                                                   _mm512_undefined_si512 (),
7149                                                   (__mmask16) -1);
7150 }
7151
7152 extern __inline __m512i
7153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7154 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7155 {
7156   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7157                                                   (__v16si) __B,
7158                                                   (__v16si) __W,
7159                                                   (__mmask16) __U);
7160 }
7161
7162 extern __inline __m512i
7163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7165 {
7166   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7167                                                   (__v16si) __B,
7168                                                   (__v16si)
7169                                                   _mm512_setzero_si512 (),
7170                                                   (__mmask16) __U);
7171 }
7172
7173 extern __inline __m512i
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7176 {
7177   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7178                                                   (__v8di) __B,
7179                                                   (__v8di)
7180                                                   _mm512_undefined_si512 (),
7181                                                   (__mmask8) -1);
7182 }
7183
7184 extern __inline __m512i
7185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7187 {
7188   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7189                                                   (__v8di) __B,
7190                                                   (__v8di) __W, __U);
7191 }
7192
7193 extern __inline __m512i
7194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7195 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7196 {
7197   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7198                                                   (__v8di) __B,
7199                                                   (__v8di)
7200                                                   _mm512_setzero_pd (),
7201                                                   __U);
7202 }
7203
7204 extern __inline __mmask16
7205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7206 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7207 {
7208   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7209                                                 (__v16si) __B,
7210                                                 (__mmask16) -1);
7211 }
7212
7213 extern __inline __mmask16
7214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7215 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7216 {
7217   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7218                                                 (__v16si) __B, __U);
7219 }
7220
7221 extern __inline __mmask8
7222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7223 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7224 {
7225   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7226                                                (__v8di) __B,
7227                                                (__mmask8) -1);
7228 }
7229
7230 extern __inline __mmask8
7231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7232 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7233 {
7234   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7235 }
7236
7237 extern __inline __mmask16
7238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7240 {
7241   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7242                                                  (__v16si) __B,
7243                                                  (__mmask16) -1);
7244 }
7245
7246 extern __inline __mmask16
7247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7248 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7249 {
7250   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7251                                                  (__v16si) __B, __U);
7252 }
7253
7254 extern __inline __mmask8
7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7257 {
7258   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7259                                                 (__v8di) __B,
7260                                                 (__mmask8) -1);
7261 }
7262
7263 extern __inline __mmask8
7264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7266 {
7267   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7268                                                 (__v8di) __B, __U);
7269 }
7270
7271 extern __inline __m512i
7272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7274 {
7275   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7276                                                      (__v16si) __B,
7277                                                      (__v16si)
7278                                                      _mm512_undefined_si512 (),
7279                                                      (__mmask16) -1);
7280 }
7281
7282 extern __inline __m512i
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7285                             __m512i __B)
7286 {
7287   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7288                                                      (__v16si) __B,
7289                                                      (__v16si) __W,
7290                                                      (__mmask16) __U);
7291 }
7292
7293 extern __inline __m512i
7294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7295 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7296 {
7297   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7298                                                      (__v16si) __B,
7299                                                      (__v16si)
7300                                                      _mm512_setzero_si512 (),
7301                                                      (__mmask16) __U);
7302 }
7303
7304 extern __inline __m512i
7305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7306 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7307 {
7308   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7309                                                       (__v8di) __B,
7310                                                       (__v8di)
7311                                                       _mm512_undefined_si512 (),
7312                                                       (__mmask8) -1);
7313 }
7314
7315 extern __inline __m512i
7316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7317 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7318 {
7319   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7320                                                       (__v8di) __B,
7321                                                       (__v8di) __W,
7322                                                       (__mmask8) __U);
7323 }
7324
7325 extern __inline __m512i
7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7328 {
7329   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7330                                                       (__v8di) __B,
7331                                                       (__v8di)
7332                                                       _mm512_setzero_si512 (),
7333                                                       (__mmask8) __U);
7334 }
7335
7336 extern __inline __m512i
7337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7338 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7339 {
7340   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7341                                                      (__v16si) __B,
7342                                                      (__v16si)
7343                                                      _mm512_undefined_si512 (),
7344                                                      (__mmask16) -1);
7345 }
7346
7347 extern __inline __m512i
7348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7349 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7350                             __m512i __B)
7351 {
7352   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7353                                                      (__v16si) __B,
7354                                                      (__v16si) __W,
7355                                                      (__mmask16) __U);
7356 }
7357
7358 extern __inline __m512i
7359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7360 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7361 {
7362   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7363                                                      (__v16si) __B,
7364                                                      (__v16si)
7365                                                      _mm512_setzero_si512 (),
7366                                                      (__mmask16) __U);
7367 }
7368
7369 extern __inline __m512i
7370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7371 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7372 {
7373   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7374                                                       (__v8di) __B,
7375                                                       (__v8di)
7376                                                       _mm512_undefined_si512 (),
7377                                                       (__mmask8) -1);
7378 }
7379
7380 extern __inline __m512i
7381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7382 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7383 {
7384   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7385                                                       (__v8di) __B,
7386                                                       (__v8di) __W,
7387                                                       (__mmask8) __U);
7388 }
7389
7390 extern __inline __m512i
7391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7392 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7393 {
7394   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7395                                                       (__v8di) __B,
7396                                                       (__v8di)
7397                                                       _mm512_setzero_si512 (),
7398                                                       (__mmask8) __U);
7399 }
7400
7401 #ifdef __x86_64__
7402 #ifdef __OPTIMIZE__
7403 extern __inline unsigned long long
7404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7405 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7406 {
7407   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7408 }
7409
7410 extern __inline long long
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7413 {
7414   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7415 }
7416
7417 extern __inline long long
7418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7419 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7420 {
7421   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7422 }
7423
7424 extern __inline unsigned long long
7425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7426 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7427 {
7428   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7429 }
7430
7431 extern __inline long long
7432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7434 {
7435   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7436 }
7437
7438 extern __inline long long
7439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7440 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7441 {
7442   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7443 }
7444 #else
7445 #define _mm_cvt_roundss_u64(A, B)   \
7446     ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7447
7448 #define _mm_cvt_roundss_si64(A, B)   \
7449     ((long long)__builtin_ia32_vcvtss2si64(A, B))
7450
7451 #define _mm_cvt_roundss_i64(A, B)   \
7452     ((long long)__builtin_ia32_vcvtss2si64(A, B))
7453
7454 #define _mm_cvtt_roundss_u64(A, B)  \
7455     ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7456
7457 #define _mm_cvtt_roundss_i64(A, B)  \
7458     ((long long)__builtin_ia32_vcvttss2si64(A, B))
7459
7460 #define _mm_cvtt_roundss_si64(A, B)  \
7461     ((long long)__builtin_ia32_vcvttss2si64(A, B))
7462 #endif
7463 #endif
7464
7465 #ifdef __OPTIMIZE__
7466 extern __inline unsigned
7467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7468 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7469 {
7470   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7471 }
7472
7473 extern __inline int
7474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7476 {
7477   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7478 }
7479
7480 extern __inline int
7481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7483 {
7484   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7485 }
7486
7487 extern __inline unsigned
7488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7490 {
7491   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7492 }
7493
7494 extern __inline int
7495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7496 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7497 {
7498   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7499 }
7500
7501 extern __inline int
7502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7503 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7504 {
7505   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7506 }
7507 #else
7508 #define _mm_cvt_roundss_u32(A, B)   \
7509     ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7510
7511 #define _mm_cvt_roundss_si32(A, B)   \
7512     ((int)__builtin_ia32_vcvtss2si32(A, B))
7513
7514 #define _mm_cvt_roundss_i32(A, B)   \
7515     ((int)__builtin_ia32_vcvtss2si32(A, B))
7516
7517 #define _mm_cvtt_roundss_u32(A, B)  \
7518     ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7519
7520 #define _mm_cvtt_roundss_si32(A, B)  \
7521     ((int)__builtin_ia32_vcvttss2si32(A, B))
7522
7523 #define _mm_cvtt_roundss_i32(A, B)  \
7524     ((int)__builtin_ia32_vcvttss2si32(A, B))
7525 #endif
7526
7527 #ifdef __x86_64__
7528 #ifdef __OPTIMIZE__
7529 extern __inline unsigned long long
7530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7531 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7532 {
7533   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7534 }
7535
7536 extern __inline long long
7537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7538 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7539 {
7540   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7541 }
7542
7543 extern __inline long long
7544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7545 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7546 {
7547   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7548 }
7549
7550 extern __inline unsigned long long
7551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7552 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7553 {
7554   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7555 }
7556
7557 extern __inline long long
7558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7559 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7560 {
7561   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7562 }
7563
7564 extern __inline long long
7565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7566 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7567 {
7568   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7569 }
7570 #else
7571 #define _mm_cvt_roundsd_u64(A, B)   \
7572     ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7573
7574 #define _mm_cvt_roundsd_si64(A, B)   \
7575     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7576
7577 #define _mm_cvt_roundsd_i64(A, B)   \
7578     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7579
7580 #define _mm_cvtt_roundsd_u64(A, B)   \
7581     ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7582
7583 #define _mm_cvtt_roundsd_si64(A, B)   \
7584     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7585
7586 #define _mm_cvtt_roundsd_i64(A, B)   \
7587     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7588 #endif
7589 #endif
7590
7591 #ifdef __OPTIMIZE__
7592 extern __inline unsigned
7593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7594 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7595 {
7596   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7597 }
7598
7599 extern __inline int
7600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7601 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7602 {
7603   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7604 }
7605
7606 extern __inline int
7607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7608 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7609 {
7610   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7611 }
7612
7613 extern __inline unsigned
7614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7616 {
7617   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7618 }
7619
7620 extern __inline int
7621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7622 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7623 {
7624   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7625 }
7626
7627 extern __inline int
7628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7630 {
7631   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7632 }
7633 #else
7634 #define _mm_cvt_roundsd_u32(A, B)   \
7635     ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7636
7637 #define _mm_cvt_roundsd_si32(A, B)   \
7638     ((int)__builtin_ia32_vcvtsd2si32(A, B))
7639
7640 #define _mm_cvt_roundsd_i32(A, B)   \
7641     ((int)__builtin_ia32_vcvtsd2si32(A, B))
7642
7643 #define _mm_cvtt_roundsd_u32(A, B)   \
7644     ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7645
7646 #define _mm_cvtt_roundsd_si32(A, B)   \
7647     ((int)__builtin_ia32_vcvttsd2si32(A, B))
7648
7649 #define _mm_cvtt_roundsd_i32(A, B)   \
7650     ((int)__builtin_ia32_vcvttsd2si32(A, B))
7651 #endif
7652
7653 extern __inline __m512d
7654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7655 _mm512_movedup_pd (__m512d __A)
7656 {
7657   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7658                                                    (__v8df)
7659                                                    _mm512_undefined_pd (),
7660                                                    (__mmask8) -1);
7661 }
7662
7663 extern __inline __m512d
7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7666 {
7667   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7668                                                    (__v8df) __W,
7669                                                    (__mmask8) __U);
7670 }
7671
7672 extern __inline __m512d
7673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7675 {
7676   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7677                                                    (__v8df)
7678                                                    _mm512_setzero_pd (),
7679                                                    (__mmask8) __U);
7680 }
7681
7682 extern __inline __m512d
7683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7685 {
7686   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7687                                                     (__v8df) __B,
7688                                                     (__v8df)
7689                                                     _mm512_undefined_pd (),
7690                                                     (__mmask8) -1);
7691 }
7692
7693 extern __inline __m512d
7694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7695 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7696 {
7697   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7698                                                     (__v8df) __B,
7699                                                     (__v8df) __W,
7700                                                     (__mmask8) __U);
7701 }
7702
7703 extern __inline __m512d
7704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7705 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7706 {
7707   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7708                                                     (__v8df) __B,
7709                                                     (__v8df)
7710                                                     _mm512_setzero_pd (),
7711                                                     (__mmask8) __U);
7712 }
7713
7714 extern __inline __m512d
7715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7716 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7717 {
7718   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7719                                                     (__v8df) __B,
7720                                                     (__v8df)
7721                                                     _mm512_undefined_pd (),
7722                                                     (__mmask8) -1);
7723 }
7724
7725 extern __inline __m512d
7726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7727 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7728 {
7729   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7730                                                     (__v8df) __B,
7731                                                     (__v8df) __W,
7732                                                     (__mmask8) __U);
7733 }
7734
7735 extern __inline __m512d
7736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7737 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7738 {
7739   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7740                                                     (__v8df) __B,
7741                                                     (__v8df)
7742                                                     _mm512_setzero_pd (),
7743                                                     (__mmask8) __U);
7744 }
7745
7746 extern __inline __m512
7747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7748 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7749 {
7750   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7751                                                    (__v16sf) __B,
7752                                                    (__v16sf)
7753                                                    _mm512_undefined_ps (),
7754                                                    (__mmask16) -1);
7755 }
7756
7757 extern __inline __m512
7758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7760 {
7761   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7762                                                    (__v16sf) __B,
7763                                                    (__v16sf) __W,
7764                                                    (__mmask16) __U);
7765 }
7766
7767 extern __inline __m512
7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7770 {
7771   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7772                                                    (__v16sf) __B,
7773                                                    (__v16sf)
7774                                                    _mm512_setzero_ps (),
7775                                                    (__mmask16) __U);
7776 }
7777
7778 #ifdef __OPTIMIZE__
7779 extern __inline __m512d
7780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7781 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7782 {
7783   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7784                                                     (__v8df)
7785                                                     _mm512_undefined_pd (),
7786                                                     (__mmask8) -1, __R);
7787 }
7788
7789 extern __inline __m512d
7790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7792                             const int __R)
7793 {
7794   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7795                                                     (__v8df) __W,
7796                                                     (__mmask8) __U, __R);
7797 }
7798
7799 extern __inline __m512d
7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7802 {
7803   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7804                                                     (__v8df)
7805                                                     _mm512_setzero_pd (),
7806                                                     (__mmask8) __U, __R);
7807 }
7808
7809 extern __inline __m512
7810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7811 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7812 {
7813   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7814                                                     (__v16sf)
7815                                                     _mm512_undefined_ps (),
7816                                                     (__mmask16) -1, __R);
7817 }
7818
7819 extern __inline __m512
7820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7821 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7822                             const int __R)
7823 {
7824   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7825                                                     (__v16sf) __W,
7826                                                     (__mmask16) __U, __R);
7827 }
7828
7829 extern __inline __m512
7830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7831 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7832 {
7833   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7834                                                     (__v16sf)
7835                                                     _mm512_setzero_ps (),
7836                                                     (__mmask16) __U, __R);
7837 }
7838
7839 extern __inline __m256i
7840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7842 {
7843   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7844                                                      __I,
7845                                                      (__v16hi)
7846                                                      _mm256_undefined_si256 (),
7847                                                      -1);
7848 }
7849
7850 extern __inline __m256i
7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852 _mm512_cvtps_ph (__m512 __A, const int __I)
7853 {
7854   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7855                                                      __I,
7856                                                      (__v16hi)
7857                                                      _mm256_undefined_si256 (),
7858                                                      -1);
7859 }
7860
7861 extern __inline __m256i
7862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7863 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7864                             const int __I)
7865 {
7866   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7867                                                      __I,
7868                                                      (__v16hi) __U,
7869                                                      (__mmask16) __W);
7870 }
7871
7872 extern __inline __m256i
7873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7874 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7875 {
7876   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7877                                                      __I,
7878                                                      (__v16hi) __U,
7879                                                      (__mmask16) __W);
7880 }
7881
7882 extern __inline __m256i
7883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7885 {
7886   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7887                                                      __I,
7888                                                      (__v16hi)
7889                                                      _mm256_setzero_si256 (),
7890                                                      (__mmask16) __W);
7891 }
7892
7893 extern __inline __m256i
7894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7895 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7896 {
7897   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7898                                                      __I,
7899                                                      (__v16hi)
7900                                                      _mm256_setzero_si256 (),
7901                                                      (__mmask16) __W);
7902 }
7903 #else
7904 #define _mm512_cvt_roundps_pd(A, B)              \
7905     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7906
7907 #define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
7908     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7909
7910 #define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
7911     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7912
7913 #define _mm512_cvt_roundph_ps(A, B)              \
7914     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7915
7916 #define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
7917     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7918
7919 #define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
7920     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7921
7922 #define _mm512_cvt_roundps_ph(A, I)                                              \
7923   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7924     (__v16hi)_mm256_undefined_si256 (), -1))
7925 #define _mm512_cvtps_ph(A, I)                                            \
7926   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7927     (__v16hi)_mm256_undefined_si256 (), -1))
7928 #define _mm512_mask_cvt_roundps_ph(U, W, A, I)                           \
7929   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7930     (__v16hi)(__m256i)(U), (__mmask16) (W)))
7931 #define _mm512_mask_cvtps_ph(U, W, A, I)                                 \
7932   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7933     (__v16hi)(__m256i)(U), (__mmask16) (W)))
7934 #define _mm512_maskz_cvt_roundps_ph(W, A, I)                                     \
7935   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7936     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7937 #define _mm512_maskz_cvtps_ph(W, A, I)                                   \
7938   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7939     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7940 #endif
7941
7942 #ifdef __OPTIMIZE__
7943 extern __inline __m256
7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7946 {
7947   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7948                                                    (__v8sf)
7949                                                    _mm256_undefined_ps (),
7950                                                    (__mmask8) -1, __R);
7951 }
7952
7953 extern __inline __m256
7954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7955 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7956                             const int __R)
7957 {
7958   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7959                                                    (__v8sf) __W,
7960                                                    (__mmask8) __U, __R);
7961 }
7962
7963 extern __inline __m256
7964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7965 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7966 {
7967   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7968                                                    (__v8sf)
7969                                                    _mm256_setzero_ps (),
7970                                                    (__mmask8) __U, __R);
7971 }
7972
7973 extern __inline __m128
7974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7976 {
7977   return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7978                                                  (__v2df) __B,
7979                                                  __R);
7980 }
7981
7982 extern __inline __m128d
7983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7984 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7985 {
7986   return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7987                                                   (__v4sf) __B,
7988                                                   __R);
7989 }
7990 #else
7991 #define _mm512_cvt_roundpd_ps(A, B)              \
7992     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7993
7994 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
7995     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7996
7997 #define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
7998     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7999
8000 #define _mm_cvt_roundsd_ss(A, B, C)              \
8001     (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8002
8003 #define _mm_cvt_roundss_sd(A, B, C)              \
8004     (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8005 #endif
8006
8007 extern __inline void
8008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8009 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8010 {
8011   __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8012 }
8013
8014 extern __inline void
8015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8016 _mm512_stream_ps (float *__P, __m512 __A)
8017 {
8018   __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8019 }
8020
8021 extern __inline void
8022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8023 _mm512_stream_pd (double *__P, __m512d __A)
8024 {
8025   __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8026 }
8027
8028 extern __inline __m512i
8029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8030 _mm512_stream_load_si512 (void *__P)
8031 {
8032   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8033 }
8034
8035 /* Constants for mantissa extraction */
8036 typedef enum
8037 {
8038   _MM_MANT_NORM_1_2,            /* interval [1, 2)      */
8039   _MM_MANT_NORM_p5_2,           /* interval [0.5, 2)    */
8040   _MM_MANT_NORM_p5_1,           /* interval [0.5, 1)    */
8041   _MM_MANT_NORM_p75_1p5         /* interval [0.75, 1.5) */
8042 } _MM_MANTISSA_NORM_ENUM;
8043
8044 typedef enum
8045 {
8046   _MM_MANT_SIGN_src,            /* sign = sign(SRC)     */
8047   _MM_MANT_SIGN_zero,           /* sign = 0             */
8048   _MM_MANT_SIGN_nan             /* DEST = NaN if sign(SRC) = 1 */
8049 } _MM_MANTISSA_SIGN_ENUM;
8050
8051 #ifdef __OPTIMIZE__
8052 extern __inline __m128
8053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8055 {
8056   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8057                                                     (__v4sf) __B,
8058                                                     __R);
8059 }
8060
8061 extern __inline __m128d
8062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8064 {
8065   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8066                                                      (__v2df) __B,
8067                                                      __R);
8068 }
8069
8070 extern __inline __m512
8071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8072 _mm512_getexp_round_ps (__m512 __A, const int __R)
8073 {
8074   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8075                                                    (__v16sf)
8076                                                    _mm512_undefined_ps (),
8077                                                    (__mmask16) -1, __R);
8078 }
8079
8080 extern __inline __m512
8081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8083                              const int __R)
8084 {
8085   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8086                                                    (__v16sf) __W,
8087                                                    (__mmask16) __U, __R);
8088 }
8089
8090 extern __inline __m512
8091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8093 {
8094   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8095                                                    (__v16sf)
8096                                                    _mm512_setzero_ps (),
8097                                                    (__mmask16) __U, __R);
8098 }
8099
8100 extern __inline __m512d
8101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102 _mm512_getexp_round_pd (__m512d __A, const int __R)
8103 {
8104   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8105                                                     (__v8df)
8106                                                     _mm512_undefined_pd (),
8107                                                     (__mmask8) -1, __R);
8108 }
8109
8110 extern __inline __m512d
8111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8112 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8113                              const int __R)
8114 {
8115   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8116                                                     (__v8df) __W,
8117                                                     (__mmask8) __U, __R);
8118 }
8119
8120 extern __inline __m512d
8121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8122 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8123 {
8124   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8125                                                     (__v8df)
8126                                                     _mm512_setzero_pd (),
8127                                                     (__mmask8) __U, __R);
8128 }
8129
8130 extern __inline __m512d
8131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8132 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8133                          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8134 {
8135   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8136                                                      (__C << 2) | __B,
8137                                                      _mm512_undefined_pd (),
8138                                                      (__mmask8) -1, __R);
8139 }
8140
8141 extern __inline __m512d
8142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8143 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8144                               _MM_MANTISSA_NORM_ENUM __B,
8145                               _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8146 {
8147   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8148                                                      (__C << 2) | __B,
8149                                                      (__v8df) __W, __U,
8150                                                      __R);
8151 }
8152
8153 extern __inline __m512d
8154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8156                                _MM_MANTISSA_NORM_ENUM __B,
8157                                _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8158 {
8159   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8160                                                      (__C << 2) | __B,
8161                                                      (__v8df)
8162                                                      _mm512_setzero_pd (),
8163                                                      __U, __R);
8164 }
8165
8166 extern __inline __m512
8167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8168 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8169                          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8170 {
8171   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8172                                                     (__C << 2) | __B,
8173                                                     _mm512_undefined_ps (),
8174                                                     (__mmask16) -1, __R);
8175 }
8176
8177 extern __inline __m512
8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8180                               _MM_MANTISSA_NORM_ENUM __B,
8181                               _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8182 {
8183   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8184                                                     (__C << 2) | __B,
8185                                                     (__v16sf) __W, __U,
8186                                                     __R);
8187 }
8188
8189 extern __inline __m512
8190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8192                                _MM_MANTISSA_NORM_ENUM __B,
8193                                _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8194 {
8195   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8196                                                     (__C << 2) | __B,
8197                                                     (__v16sf)
8198                                                     _mm512_setzero_ps (),
8199                                                     __U, __R);
8200 }
8201
8202 extern __inline __m128d
8203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8204 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8205                       _MM_MANTISSA_NORM_ENUM __C,
8206                       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8207 {
8208   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8209                                                   (__v2df) __B,
8210                                                   (__D << 2) | __C,
8211                                                    __R);
8212 }
8213
8214 extern __inline __m128
8215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8216 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8217                       _MM_MANTISSA_NORM_ENUM __C,
8218                       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8219 {
8220   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8221                                                   (__v4sf) __B,
8222                                                   (__D << 2) | __C,
8223                                                   __R);
8224 }
8225
8226 #else
8227 #define _mm512_getmant_round_pd(X, B, C, R)                                                  \
8228   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8229                                               (int)(((C)<<2) | (B)),                \
8230                                               (__v8df)(__m512d)_mm512_undefined_pd(), \
8231                                               (__mmask8)-1,\
8232                                               (R)))
8233
8234 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
8235   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8236                                               (int)(((C)<<2) | (B)),                \
8237                                               (__v8df)(__m512d)(W),                 \
8238                                               (__mmask8)(U),\
8239                                               (R)))
8240
8241 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
8242   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8243                                               (int)(((C)<<2) | (B)),                \
8244                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
8245                                               (__mmask8)(U),\
8246                                               (R)))
8247 #define _mm512_getmant_round_ps(X, B, C, R)                                                  \
8248   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8249                                              (int)(((C)<<2) | (B)),                 \
8250                                              (__v16sf)(__m512)_mm512_undefined_ps(), \
8251                                              (__mmask16)-1,\
8252                                              (R)))
8253
8254 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
8255   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8256                                              (int)(((C)<<2) | (B)),                 \
8257                                              (__v16sf)(__m512)(W),                  \
8258                                              (__mmask16)(U),\
8259                                              (R)))
8260
8261 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
8262   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8263                                              (int)(((C)<<2) | (B)),                 \
8264                                              (__v16sf)(__m512)_mm512_setzero_ps(),  \
8265                                              (__mmask16)(U),\
8266                                              (R)))
8267 #define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
8268   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
8269                                             (__v2df)(__m128d)(Y),       \
8270                                             (int)(((D)<<2) | (C)),      \
8271                                             (R)))
8272
8273 #define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
8274   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
8275                                            (__v4sf)(__m128)(Y),         \
8276                                            (int)(((D)<<2) | (C)),       \
8277                                            (R)))
8278
8279 #define _mm_getexp_round_ss(A, B, R)                                                  \
8280   ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8281
8282 #define _mm_getexp_round_sd(A, B, R)                                                   \
8283   ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8284
8285 #define _mm512_getexp_round_ps(A, R)                                            \
8286   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8287   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8288
8289 #define _mm512_mask_getexp_round_ps(W, U, A, R)                                 \
8290   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8291   (__v16sf)(__m512)(W), (__mmask16)(U), R))
8292
8293 #define _mm512_maskz_getexp_round_ps(U, A, R)                                   \
8294   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8295   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8296
8297 #define _mm512_getexp_round_pd(A, R)                                            \
8298   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8299   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8300
8301 #define _mm512_mask_getexp_round_pd(W, U, A, R)                                 \
8302   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8303   (__v8df)(__m512d)(W), (__mmask8)(U), R))
8304
8305 #define _mm512_maskz_getexp_round_pd(U, A, R)                                   \
8306   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8307   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8308 #endif
8309
8310 #ifdef __OPTIMIZE__
8311 extern __inline __m512
8312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8314 {
8315   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8316                                                   (__v16sf)
8317                                                   _mm512_undefined_ps (),
8318                                                   -1, __R);
8319 }
8320
8321 extern __inline __m512
8322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8324                                  const int __imm, const int __R)
8325 {
8326   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8327                                                   (__v16sf) __A,
8328                                                   (__mmask16) __B, __R);
8329 }
8330
8331 extern __inline __m512
8332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8333 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8334                                   const int __imm, const int __R)
8335 {
8336   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8337                                                   __imm,
8338                                                   (__v16sf)
8339                                                   _mm512_setzero_ps (),
8340                                                   (__mmask16) __A, __R);
8341 }
8342
8343 extern __inline __m512d
8344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8346 {
8347   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8348                                                    (__v8df)
8349                                                    _mm512_undefined_pd (),
8350                                                    -1, __R);
8351 }
8352
8353 extern __inline __m512d
8354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8356                                  __m512d __C, const int __imm, const int __R)
8357 {
8358   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8359                                                    (__v8df) __A,
8360                                                    (__mmask8) __B, __R);
8361 }
8362
8363 extern __inline __m512d
8364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8366                                   const int __imm, const int __R)
8367 {
8368   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8369                                                    __imm,
8370                                                    (__v8df)
8371                                                    _mm512_setzero_pd (),
8372                                                    (__mmask8) __A, __R);
8373 }
8374
8375 extern __inline __m128
8376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8377 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8378 {
8379   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8380                                                    (__v4sf) __B, __imm, __R);
8381 }
8382
8383 extern __inline __m128d
8384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8385 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8386                          const int __R)
8387 {
8388   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8389                                                     (__v2df) __B, __imm, __R);
8390 }
8391
8392 #else
8393 #define _mm512_roundscale_round_ps(A, B, R) \
8394   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8395     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8396 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R)                          \
8397   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),       \
8398                                             (int)(D),                   \
8399                                             (__v16sf)(__m512)(A),       \
8400                                             (__mmask16)(B), R))
8401 #define _mm512_maskz_roundscale_round_ps(A, B, C, R)                            \
8402   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),       \
8403                                             (int)(C),                   \
8404                                             (__v16sf)_mm512_setzero_ps(),\
8405                                             (__mmask16)(A), R))
8406 #define _mm512_roundscale_round_pd(A, B, R) \
8407   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8408     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8409 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R)                          \
8410   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),      \
8411                                              (int)(D),                  \
8412                                              (__v8df)(__m512d)(A),      \
8413                                              (__mmask8)(B), R))
8414 #define _mm512_maskz_roundscale_round_pd(A, B, C, R)                            \
8415   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),      \
8416                                              (int)(C),                  \
8417                                              (__v8df)_mm512_setzero_pd(),\
8418                                              (__mmask8)(A), R))
8419 #define _mm_roundscale_round_ss(A, B, C, R)                                     \
8420   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),       \
8421     (__v4sf)(__m128)(B), (int)(C), R))
8422 #define _mm_roundscale_round_sd(A, B, C, R)                                     \
8423   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),     \
8424     (__v2df)(__m128d)(B), (int)(C), R))
8425 #endif
8426
8427 extern __inline __m512
8428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429 _mm512_floor_ps (__m512 __A)
8430 {
8431   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8432                                                   _MM_FROUND_FLOOR,
8433                                                   (__v16sf) __A, -1,
8434                                                   _MM_FROUND_CUR_DIRECTION);
8435 }
8436
8437 extern __inline __m512d
8438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439 _mm512_floor_pd (__m512d __A)
8440 {
8441   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8442                                                    _MM_FROUND_FLOOR,
8443                                                    (__v8df) __A, -1,
8444                                                    _MM_FROUND_CUR_DIRECTION);
8445 }
8446
8447 extern __inline __m512
8448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449 _mm512_ceil_ps (__m512 __A)
8450 {
8451   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8452                                                   _MM_FROUND_CEIL,
8453                                                   (__v16sf) __A, -1,
8454                                                   _MM_FROUND_CUR_DIRECTION);
8455 }
8456
8457 extern __inline __m512d
8458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8459 _mm512_ceil_pd (__m512d __A)
8460 {
8461   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8462                                                    _MM_FROUND_CEIL,
8463                                                    (__v8df) __A, -1,
8464                                                    _MM_FROUND_CUR_DIRECTION);
8465 }
8466
8467 extern __inline __m512
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8470 {
8471   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8472                                                   _MM_FROUND_FLOOR,
8473                                                   (__v16sf) __W, __U,
8474                                                   _MM_FROUND_CUR_DIRECTION);
8475 }
8476
8477 extern __inline __m512d
8478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8479 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8480 {
8481   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8482                                                    _MM_FROUND_FLOOR,
8483                                                    (__v8df) __W, __U,
8484                                                    _MM_FROUND_CUR_DIRECTION);
8485 }
8486
8487 extern __inline __m512
8488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8489 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8490 {
8491   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8492                                                   _MM_FROUND_CEIL,
8493                                                   (__v16sf) __W, __U,
8494                                                   _MM_FROUND_CUR_DIRECTION);
8495 }
8496
8497 extern __inline __m512d
8498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8499 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8500 {
8501   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8502                                                    _MM_FROUND_CEIL,
8503                                                    (__v8df) __W, __U,
8504                                                    _MM_FROUND_CUR_DIRECTION);
8505 }
8506
8507 #ifdef __OPTIMIZE__
8508 extern __inline __m512i
8509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8511 {
8512   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8513                                                   (__v16si) __B, __imm,
8514                                                   (__v16si)
8515                                                   _mm512_undefined_si512 (),
8516                                                   (__mmask16) -1);
8517 }
8518
8519 extern __inline __m512i
8520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8521 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8522                           __m512i __B, const int __imm)
8523 {
8524   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8525                                                   (__v16si) __B, __imm,
8526                                                   (__v16si) __W,
8527                                                   (__mmask16) __U);
8528 }
8529
8530 extern __inline __m512i
8531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8532 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8533                            const int __imm)
8534 {
8535   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8536                                                   (__v16si) __B, __imm,
8537                                                   (__v16si)
8538                                                   _mm512_setzero_si512 (),
8539                                                   (__mmask16) __U);
8540 }
8541
8542 extern __inline __m512i
8543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8545 {
8546   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8547                                                   (__v8di) __B, __imm,
8548                                                   (__v8di)
8549                                                   _mm512_undefined_si512 (),
8550                                                   (__mmask8) -1);
8551 }
8552
8553 extern __inline __m512i
8554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8556                           __m512i __B, const int __imm)
8557 {
8558   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8559                                                   (__v8di) __B, __imm,
8560                                                   (__v8di) __W,
8561                                                   (__mmask8) __U);
8562 }
8563
8564 extern __inline __m512i
8565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8567                            const int __imm)
8568 {
8569   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8570                                                   (__v8di) __B, __imm,
8571                                                   (__v8di)
8572                                                   _mm512_setzero_si512 (),
8573                                                   (__mmask8) __U);
8574 }
8575 #else
8576 #define _mm512_alignr_epi32(X, Y, C)                                        \
8577     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8578         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
8579         (__mmask16)-1))
8580
8581 #define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
8582     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8583         (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
8584         (__mmask16)(U)))
8585
8586 #define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
8587     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8588         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8589         (__mmask16)(U)))
8590
8591 #define _mm512_alignr_epi64(X, Y, C)                                        \
8592     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8593         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (),  \
8594         (__mmask8)-1))
8595
8596 #define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
8597     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8598         (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8599
8600 #define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
8601     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8602         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8603         (__mmask8)(U)))
8604 #endif
8605
8606 extern __inline __mmask16
8607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8609 {
8610   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8611                                                      (__v16si) __B,
8612                                                      (__mmask16) -1);
8613 }
8614
8615 extern __inline __mmask16
8616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8617 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8618 {
8619   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8620                                                      (__v16si) __B, __U);
8621 }
8622
8623 extern __inline __mmask8
8624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8625 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8626 {
8627   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8628                                                     (__v8di) __B, __U);
8629 }
8630
8631 extern __inline __mmask8
8632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8633 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8634 {
8635   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8636                                                     (__v8di) __B,
8637                                                     (__mmask8) -1);
8638 }
8639
8640 extern __inline __mmask16
8641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8643 {
8644   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8645                                                      (__v16si) __B,
8646                                                      (__mmask16) -1);
8647 }
8648
8649 extern __inline __mmask16
8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8652 {
8653   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8654                                                      (__v16si) __B, __U);
8655 }
8656
8657 extern __inline __mmask8
8658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8660 {
8661   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8662                                                     (__v8di) __B, __U);
8663 }
8664
8665 extern __inline __mmask8
8666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8667 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8668 {
8669   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8670                                                     (__v8di) __B,
8671                                                     (__mmask8) -1);
8672 }
8673
8674 extern __inline __mmask16
8675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8676 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8677 {
8678   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8679                                                     (__v16si) __Y, 5,
8680                                                     (__mmask16) -1);
8681 }
8682
8683 extern __inline __mmask16
8684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8685 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8686 {
8687   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8688                                                     (__v16si) __Y, 5,
8689                                                     (__mmask16) __M);
8690 }
8691
8692 extern __inline __mmask16
8693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8694 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8695 {
8696   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8697                                                     (__v16si) __Y, 5,
8698                                                     (__mmask16) __M);
8699 }
8700
8701 extern __inline __mmask16
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8704 {
8705   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8706                                                     (__v16si) __Y, 5,
8707                                                     (__mmask16) -1);
8708 }
8709
8710 extern __inline __mmask8
8711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8713 {
8714   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8715                                                     (__v8di) __Y, 5,
8716                                                     (__mmask8) __M);
8717 }
8718
8719 extern __inline __mmask8
8720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8722 {
8723   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8724                                                     (__v8di) __Y, 5,
8725                                                     (__mmask8) -1);
8726 }
8727
8728 extern __inline __mmask8
8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8731 {
8732   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8733                                                     (__v8di) __Y, 5,
8734                                                     (__mmask8) __M);
8735 }
8736
8737 extern __inline __mmask8
8738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8740 {
8741   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8742                                                     (__v8di) __Y, 5,
8743                                                     (__mmask8) -1);
8744 }
8745
8746 extern __inline __mmask16
8747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8749 {
8750   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8751                                                     (__v16si) __Y, 2,
8752                                                     (__mmask16) __M);
8753 }
8754
8755 extern __inline __mmask16
8756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8757 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8758 {
8759   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8760                                                     (__v16si) __Y, 2,
8761                                                     (__mmask16) -1);
8762 }
8763
8764 extern __inline __mmask16
8765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8767 {
8768   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8769                                                     (__v16si) __Y, 2,
8770                                                     (__mmask16) __M);
8771 }
8772
8773 extern __inline __mmask16
8774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8776 {
8777   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8778                                                     (__v16si) __Y, 2,
8779                                                     (__mmask16) -1);
8780 }
8781
8782 extern __inline __mmask8
8783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8785 {
8786   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8787                                                     (__v8di) __Y, 2,
8788                                                     (__mmask8) __M);
8789 }
8790
8791 extern __inline __mmask8
8792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8794 {
8795   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8796                                                     (__v8di) __Y, 2,
8797                                                     (__mmask8) -1);
8798 }
8799
8800 extern __inline __mmask8
8801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8803 {
8804   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8805                                                     (__v8di) __Y, 2,
8806                                                     (__mmask8) __M);
8807 }
8808
8809 extern __inline __mmask8
8810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8812 {
8813   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8814                                                     (__v8di) __Y, 2,
8815                                                     (__mmask8) -1);
8816 }
8817
8818 extern __inline __mmask16
8819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8821 {
8822   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8823                                                     (__v16si) __Y, 1,
8824                                                     (__mmask16) __M);
8825 }
8826
8827 extern __inline __mmask16
8828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8830 {
8831   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8832                                                     (__v16si) __Y, 1,
8833                                                     (__mmask16) -1);
8834 }
8835
8836 extern __inline __mmask16
8837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8839 {
8840   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8841                                                     (__v16si) __Y, 1,
8842                                                     (__mmask16) __M);
8843 }
8844
8845 extern __inline __mmask16
8846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8848 {
8849   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8850                                                     (__v16si) __Y, 1,
8851                                                     (__mmask16) -1);
8852 }
8853
8854 extern __inline __mmask8
8855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8857 {
8858   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8859                                                     (__v8di) __Y, 1,
8860                                                     (__mmask8) __M);
8861 }
8862
8863 extern __inline __mmask8
8864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8866 {
8867   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8868                                                     (__v8di) __Y, 1,
8869                                                     (__mmask8) -1);
8870 }
8871
8872 extern __inline __mmask8
8873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8875 {
8876   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8877                                                     (__v8di) __Y, 1,
8878                                                     (__mmask8) __M);
8879 }
8880
8881 extern __inline __mmask8
8882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8884 {
8885   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8886                                                     (__v8di) __Y, 1,
8887                                                     (__mmask8) -1);
8888 }
8889
8890 extern __inline __mmask16
8891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8893 {
8894   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8895                                                     (__v16si) __Y, 4,
8896                                                     (__mmask16) -1);
8897 }
8898
8899 extern __inline __mmask16
8900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8902 {
8903   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8904                                                     (__v16si) __Y, 4,
8905                                                     (__mmask16) __M);
8906 }
8907
8908 extern __inline __mmask16
8909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8911 {
8912   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8913                                                     (__v16si) __Y, 4,
8914                                                     (__mmask16) __M);
8915 }
8916
8917 extern __inline __mmask16
8918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8919 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8920 {
8921   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8922                                                     (__v16si) __Y, 4,
8923                                                     (__mmask16) -1);
8924 }
8925
8926 extern __inline __mmask8
8927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8929 {
8930   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8931                                                     (__v8di) __Y, 4,
8932                                                     (__mmask8) __M);
8933 }
8934
8935 extern __inline __mmask8
8936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8938 {
8939   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8940                                                     (__v8di) __Y, 4,
8941                                                     (__mmask8) -1);
8942 }
8943
8944 extern __inline __mmask8
8945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8947 {
8948   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8949                                                     (__v8di) __Y, 4,
8950                                                     (__mmask8) __M);
8951 }
8952
8953 extern __inline __mmask8
8954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8955 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8956 {
8957   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8958                                                     (__v8di) __Y, 4,
8959                                                     (__mmask8) -1);
8960 }
8961
8962 #define _MM_CMPINT_EQ       0x0
8963 #define _MM_CMPINT_LT       0x1
8964 #define _MM_CMPINT_LE       0x2
8965 #define _MM_CMPINT_UNUSED   0x3
8966 #define _MM_CMPINT_NE       0x4
8967 #define _MM_CMPINT_NLT      0x5
8968 #define _MM_CMPINT_GE       0x5
8969 #define _MM_CMPINT_NLE      0x6
8970 #define _MM_CMPINT_GT       0x6
8971
8972 #ifdef __OPTIMIZE__
8973 extern __inline __mmask8
8974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8975 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8976 {
8977   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8978                                                  (__v8di) __Y, __P,
8979                                                  (__mmask8) -1);
8980 }
8981
8982 extern __inline __mmask16
8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8985 {
8986   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8987                                                   (__v16si) __Y, __P,
8988                                                   (__mmask16) -1);
8989 }
8990
8991 extern __inline __mmask8
8992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8994 {
8995   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8996                                                   (__v8di) __Y, __P,
8997                                                   (__mmask8) -1);
8998 }
8999
9000 extern __inline __mmask16
9001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9003 {
9004   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9005                                                    (__v16si) __Y, __P,
9006                                                    (__mmask16) -1);
9007 }
9008
9009 extern __inline __mmask8
9010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9012                           const int __R)
9013 {
9014   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9015                                                   (__v8df) __Y, __P,
9016                                                   (__mmask8) -1, __R);
9017 }
9018
9019 extern __inline __mmask16
9020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9021 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9022 {
9023   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9024                                                    (__v16sf) __Y, __P,
9025                                                    (__mmask16) -1, __R);
9026 }
9027
9028 extern __inline __mmask8
9029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9030 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9031                             const int __P)
9032 {
9033   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9034                                                  (__v8di) __Y, __P,
9035                                                  (__mmask8) __U);
9036 }
9037
9038 extern __inline __mmask16
9039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9040 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9041                             const int __P)
9042 {
9043   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9044                                                   (__v16si) __Y, __P,
9045                                                   (__mmask16) __U);
9046 }
9047
9048 extern __inline __mmask8
9049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9051                             const int __P)
9052 {
9053   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9054                                                   (__v8di) __Y, __P,
9055                                                   (__mmask8) __U);
9056 }
9057
9058 extern __inline __mmask16
9059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9060 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9061                             const int __P)
9062 {
9063   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9064                                                    (__v16si) __Y, __P,
9065                                                    (__mmask16) __U);
9066 }
9067
9068 extern __inline __mmask8
9069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9071                                const int __P, const int __R)
9072 {
9073   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9074                                                   (__v8df) __Y, __P,
9075                                                   (__mmask8) __U, __R);
9076 }
9077
9078 extern __inline __mmask16
9079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9081                                const int __P, const int __R)
9082 {
9083   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9084                                                    (__v16sf) __Y, __P,
9085                                                    (__mmask16) __U, __R);
9086 }
9087
9088 extern __inline __mmask8
9089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9090 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9091 {
9092   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9093                                                (__v2df) __Y, __P,
9094                                                (__mmask8) -1, __R);
9095 }
9096
9097 extern __inline __mmask8
9098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9099 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9100                             const int __P, const int __R)
9101 {
9102   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9103                                                (__v2df) __Y, __P,
9104                                                (__mmask8) __M, __R);
9105 }
9106
9107 extern __inline __mmask8
9108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9109 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9110 {
9111   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9112                                                (__v4sf) __Y, __P,
9113                                                (__mmask8) -1, __R);
9114 }
9115
9116 extern __inline __mmask8
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9119                             const int __P, const int __R)
9120 {
9121   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9122                                                (__v4sf) __Y, __P,
9123                                                (__mmask8) __M, __R);
9124 }
9125
9126 #else
9127 #define _mm512_cmp_epi64_mask(X, Y, P)                                  \
9128   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),        \
9129                                            (__v8di)(__m512i)(Y), (int)(P),\
9130                                            (__mmask8)-1))
9131
9132 #define _mm512_cmp_epi32_mask(X, Y, P)                                  \
9133   ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),       \
9134                                            (__v16si)(__m512i)(Y), (int)(P),\
9135                                            (__mmask16)-1))
9136
9137 #define _mm512_cmp_epu64_mask(X, Y, P)                                  \
9138   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),       \
9139                                             (__v8di)(__m512i)(Y), (int)(P),\
9140                                             (__mmask8)-1))
9141
9142 #define _mm512_cmp_epu32_mask(X, Y, P)                                  \
9143   ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),      \
9144                                             (__v16si)(__m512i)(Y), (int)(P),\
9145                                             (__mmask16)-1))
9146
9147 #define _mm512_cmp_round_pd_mask(X, Y, P, R)                                    \
9148   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
9149                                             (__v8df)(__m512d)(Y), (int)(P),\
9150                                             (__mmask8)-1, R))
9151
9152 #define _mm512_cmp_round_ps_mask(X, Y, P, R)                                    \
9153   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
9154                                              (__v16sf)(__m512)(Y), (int)(P),\
9155                                              (__mmask16)-1, R))
9156
9157 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P)                                  \
9158   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),        \
9159                                            (__v8di)(__m512i)(Y), (int)(P),\
9160                                            (__mmask8)M))
9161
9162 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P)                                  \
9163   ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),       \
9164                                            (__v16si)(__m512i)(Y), (int)(P),\
9165                                            (__mmask16)M))
9166
9167 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P)                                  \
9168   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),       \
9169                                             (__v8di)(__m512i)(Y), (int)(P),\
9170                                             (__mmask8)M))
9171
9172 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P)                                  \
9173   ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),      \
9174                                             (__v16si)(__m512i)(Y), (int)(P),\
9175                                             (__mmask16)M))
9176
9177 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)                                    \
9178   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
9179                                             (__v8df)(__m512d)(Y), (int)(P),\
9180                                             (__mmask8)M, R))
9181
9182 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)                                    \
9183   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
9184                                              (__v16sf)(__m512)(Y), (int)(P),\
9185                                              (__mmask16)M, R))
9186
9187 #define _mm_cmp_round_sd_mask(X, Y, P, R)                                       \
9188   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
9189                                          (__v2df)(__m128d)(Y), (int)(P),\
9190                                          (__mmask8)-1, R))
9191
9192 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)                                       \
9193   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
9194                                          (__v2df)(__m128d)(Y), (int)(P),\
9195                                          (M), R))
9196
9197 #define _mm_cmp_round_ss_mask(X, Y, P, R)                                       \
9198   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
9199                                          (__v4sf)(__m128)(Y), (int)(P), \
9200                                          (__mmask8)-1, R))
9201
9202 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)                                       \
9203   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
9204                                          (__v4sf)(__m128)(Y), (int)(P), \
9205                                          (M), R))
9206 #endif
9207
9208 #ifdef __OPTIMIZE__
9209 extern __inline __m512
9210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9212 {
9213   __m512 v1_old = _mm512_undefined_ps ();
9214   __mmask16 mask = 0xFFFF;
9215
9216   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9217                                                 __addr,
9218                                                 (__v16si) __index,
9219                                                 mask, __scale);
9220 }
9221
9222 extern __inline __m512
9223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9225                           __m512i __index, float const *__addr, int __scale)
9226 {
9227   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9228                                                 __addr,
9229                                                 (__v16si) __index,
9230                                                 __mask, __scale);
9231 }
9232
9233 extern __inline __m512d
9234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9235 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9236 {
9237   __m512d v1_old = _mm512_undefined_pd ();
9238   __mmask8 mask = 0xFF;
9239
9240   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9241                                                 __addr,
9242                                                 (__v8si) __index, mask,
9243                                                 __scale);
9244 }
9245
9246 extern __inline __m512d
9247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9248 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9249                           __m256i __index, double const *__addr, int __scale)
9250 {
9251   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9252                                                 __addr,
9253                                                 (__v8si) __index,
9254                                                 __mask, __scale);
9255 }
9256
9257 extern __inline __m256
9258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9259 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9260 {
9261   __m256 v1_old = _mm256_undefined_ps ();
9262   __mmask8 mask = 0xFF;
9263
9264   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9265                                                 __addr,
9266                                                 (__v8di) __index, mask,
9267                                                 __scale);
9268 }
9269
9270 extern __inline __m256
9271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9272 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9273                           __m512i __index, float const *__addr, int __scale)
9274 {
9275   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9276                                                 __addr,
9277                                                 (__v8di) __index,
9278                                                 __mask, __scale);
9279 }
9280
9281 extern __inline __m512d
9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9284 {
9285   __m512d v1_old = _mm512_undefined_pd ();
9286   __mmask8 mask = 0xFF;
9287
9288   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9289                                                 __addr,
9290                                                 (__v8di) __index, mask,
9291                                                 __scale);
9292 }
9293
9294 extern __inline __m512d
9295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9297                           __m512i __index, double const *__addr, int __scale)
9298 {
9299   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9300                                                 __addr,
9301                                                 (__v8di) __index,
9302                                                 __mask, __scale);
9303 }
9304
9305 extern __inline __m512i
9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9308 {
9309   __m512i v1_old = _mm512_undefined_si512 ();
9310   __mmask16 mask = 0xFFFF;
9311
9312   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9313                                                  __addr,
9314                                                  (__v16si) __index,
9315                                                  mask, __scale);
9316 }
9317
9318 extern __inline __m512i
9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9321                              __m512i __index, int const *__addr, int __scale)
9322 {
9323   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9324                                                  __addr,
9325                                                  (__v16si) __index,
9326                                                  __mask, __scale);
9327 }
9328
9329 extern __inline __m512i
9330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9331 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9332 {
9333   __m512i v1_old = _mm512_undefined_si512 ();
9334   __mmask8 mask = 0xFF;
9335
9336   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9337                                                 __addr,
9338                                                 (__v8si) __index, mask,
9339                                                 __scale);
9340 }
9341
9342 extern __inline __m512i
9343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9344 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9345                              __m256i __index, long long const *__addr,
9346                              int __scale)
9347 {
9348   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9349                                                 __addr,
9350                                                 (__v8si) __index,
9351                                                 __mask, __scale);
9352 }
9353
9354 extern __inline __m256i
9355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9357 {
9358   __m256i v1_old = _mm256_undefined_si256 ();
9359   __mmask8 mask = 0xFF;
9360
9361   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9362                                                  __addr,
9363                                                  (__v8di) __index,
9364                                                  mask, __scale);
9365 }
9366
9367 extern __inline __m256i
9368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9369 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9370                              __m512i __index, int const *__addr, int __scale)
9371 {
9372   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9373                                                  __addr,
9374                                                  (__v8di) __index,
9375                                                  __mask, __scale);
9376 }
9377
9378 extern __inline __m512i
9379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9380 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9381 {
9382   __m512i v1_old = _mm512_undefined_si512 ();
9383   __mmask8 mask = 0xFF;
9384
9385   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9386                                                 __addr,
9387                                                 (__v8di) __index, mask,
9388                                                 __scale);
9389 }
9390
9391 extern __inline __m512i
9392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9394                              __m512i __index, long long const *__addr,
9395                              int __scale)
9396 {
9397   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9398                                                 __addr,
9399                                                 (__v8di) __index,
9400                                                 __mask, __scale);
9401 }
9402
9403 extern __inline void
9404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9406 {
9407   __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9408                                  (__v16si) __index, (__v16sf) __v1, __scale);
9409 }
9410
9411 extern __inline void
9412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9414                            __m512i __index, __m512 __v1, int __scale)
9415 {
9416   __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9417                                  (__v16sf) __v1, __scale);
9418 }
9419
9420 extern __inline void
9421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9422 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9423                       int __scale)
9424 {
9425   __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9426                                 (__v8si) __index, (__v8df) __v1, __scale);
9427 }
9428
9429 extern __inline void
9430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9431 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9432                            __m256i __index, __m512d __v1, int __scale)
9433 {
9434   __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9435                                 (__v8df) __v1, __scale);
9436 }
9437
9438 extern __inline void
9439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9440 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9441 {
9442   __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9443                                  (__v8di) __index, (__v8sf) __v1, __scale);
9444 }
9445
9446 extern __inline void
9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9449                            __m512i __index, __m256 __v1, int __scale)
9450 {
9451   __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9452                                  (__v8sf) __v1, __scale);
9453 }
9454
9455 extern __inline void
9456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9457 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9458                       int __scale)
9459 {
9460   __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9461                                 (__v8di) __index, (__v8df) __v1, __scale);
9462 }
9463
9464 extern __inline void
9465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9467                            __m512i __index, __m512d __v1, int __scale)
9468 {
9469   __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9470                                 (__v8df) __v1, __scale);
9471 }
9472
9473 extern __inline void
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9476                          __m512i __v1, int __scale)
9477 {
9478   __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9479                                  (__v16si) __index, (__v16si) __v1, __scale);
9480 }
9481
9482 extern __inline void
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9485                               __m512i __index, __m512i __v1, int __scale)
9486 {
9487   __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9488                                  (__v16si) __v1, __scale);
9489 }
9490
9491 extern __inline void
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9494                          __m512i __v1, int __scale)
9495 {
9496   __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9497                                 (__v8si) __index, (__v8di) __v1, __scale);
9498 }
9499
9500 extern __inline void
9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9503                               __m256i __index, __m512i __v1, int __scale)
9504 {
9505   __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9506                                 (__v8di) __v1, __scale);
9507 }
9508
9509 extern __inline void
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9512                          __m256i __v1, int __scale)
9513 {
9514   __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9515                                  (__v8di) __index, (__v8si) __v1, __scale);
9516 }
9517
9518 extern __inline void
9519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9521                               __m512i __index, __m256i __v1, int __scale)
9522 {
9523   __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9524                                  (__v8si) __v1, __scale);
9525 }
9526
9527 extern __inline void
9528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9530                          __m512i __v1, int __scale)
9531 {
9532   __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9533                                 (__v8di) __index, (__v8di) __v1, __scale);
9534 }
9535
9536 extern __inline void
9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9539                               __m512i __index, __m512i __v1, int __scale)
9540 {
9541   __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9542                                 (__v8di) __v1, __scale);
9543 }
9544 #else
9545 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE)                         \
9546   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9547                                          (float const *)ADDR,           \
9548                                          (__v16si)(__m512i)INDEX,       \
9549                                          (__mmask16)0xFFFF, (int)SCALE)
9550
9551 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9552   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,        \
9553                                          (float const *)ADDR,           \
9554                                          (__v16si)(__m512i)INDEX,       \
9555                                          (__mmask16)MASK, (int)SCALE)
9556
9557 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE)                         \
9558   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9559                                          (double const *)ADDR,          \
9560                                          (__v8si)(__m256i)INDEX,        \
9561                                          (__mmask8)0xFF, (int)SCALE)
9562
9563 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9564   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,        \
9565                                          (double const *)ADDR,          \
9566                                          (__v8si)(__m256i)INDEX,        \
9567                                          (__mmask8)MASK, (int)SCALE)
9568
9569 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE)                         \
9570   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9571                                          (float const *)ADDR,           \
9572                                          (__v8di)(__m512i)INDEX,        \
9573                                          (__mmask8)0xFF, (int)SCALE)
9574
9575 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9576   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,         \
9577                                          (float const *)ADDR,           \
9578                                          (__v8di)(__m512i)INDEX,        \
9579                                          (__mmask8)MASK, (int)SCALE)
9580
9581 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE)                         \
9582   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9583                                          (double const *)ADDR,          \
9584                                          (__v8di)(__m512i)INDEX,        \
9585                                          (__mmask8)0xFF, (int)SCALE)
9586
9587 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9588   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,        \
9589                                          (double const *)ADDR,          \
9590                                          (__v8di)(__m512i)INDEX,        \
9591                                          (__mmask8)MASK, (int)SCALE)
9592
9593 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)                      \
9594   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (),   \
9595                                           (int const *)ADDR,            \
9596                                           (__v16si)(__m512i)INDEX,      \
9597                                           (__mmask16)0xFFFF, (int)SCALE)
9598
9599 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9600   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,      \
9601                                           (int const *)ADDR,            \
9602                                           (__v16si)(__m512i)INDEX,      \
9603                                           (__mmask16)MASK, (int)SCALE)
9604
9605 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)                      \
9606   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (),     \
9607                                          (long long const *)ADDR,       \
9608                                          (__v8si)(__m256i)INDEX,        \
9609                                          (__mmask8)0xFF, (int)SCALE)
9610
9611 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9612   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,        \
9613                                          (long long const *)ADDR,       \
9614                                          (__v8si)(__m256i)INDEX,        \
9615                                          (__mmask8)MASK, (int)SCALE)
9616
9617 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)                        \
9618   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9619                                           (int const *)ADDR,              \
9620                                           (__v8di)(__m512i)INDEX,         \
9621                                           (__mmask8)0xFF, (int)SCALE)
9622
9623 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9624   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,       \
9625                                           (int const *)ADDR,            \
9626                                           (__v8di)(__m512i)INDEX,       \
9627                                           (__mmask8)MASK, (int)SCALE)
9628
9629 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)                      \
9630   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (),     \
9631                                          (long long const *)ADDR,       \
9632                                          (__v8di)(__m512i)INDEX,        \
9633                                          (__mmask8)0xFF, (int)SCALE)
9634
9635 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9636   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,        \
9637                                          (long long const *)ADDR,       \
9638                                          (__v8di)(__m512i)INDEX,        \
9639                                          (__mmask8)MASK, (int)SCALE)
9640
9641 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)                    \
9642   __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF,      \
9643                                  (__v16si)(__m512i)INDEX,               \
9644                                  (__v16sf)(__m512)V1, (int)SCALE)
9645
9646 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
9647   __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK,                \
9648                                  (__v16si)(__m512i)INDEX,               \
9649                                  (__v16sf)(__m512)V1, (int)SCALE)
9650
9651 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)                    \
9652   __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF,         \
9653                                 (__v8si)(__m256i)INDEX,                 \
9654                                 (__v8df)(__m512d)V1, (int)SCALE)
9655
9656 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
9657   __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK,         \
9658                                 (__v8si)(__m256i)INDEX,                 \
9659                                 (__v8df)(__m512d)V1, (int)SCALE)
9660
9661 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)                    \
9662   __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF,         \
9663                                  (__v8di)(__m512i)INDEX,                \
9664                                  (__v8sf)(__m256)V1, (int)SCALE)
9665
9666 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
9667   __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK,                \
9668                                  (__v8di)(__m512i)INDEX,                \
9669                                  (__v8sf)(__m256)V1, (int)SCALE)
9670
9671 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)                    \
9672   __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF,         \
9673                                 (__v8di)(__m512i)INDEX,                 \
9674                                 (__v8df)(__m512d)V1, (int)SCALE)
9675
9676 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
9677   __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK,         \
9678                                 (__v8di)(__m512i)INDEX,                 \
9679                                 (__v8df)(__m512d)V1, (int)SCALE)
9680
9681 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
9682   __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF,        \
9683                                  (__v16si)(__m512i)INDEX,               \
9684                                  (__v16si)(__m512i)V1, (int)SCALE)
9685
9686 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
9687   __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK,          \
9688                                  (__v16si)(__m512i)INDEX,               \
9689                                  (__v16si)(__m512i)V1, (int)SCALE)
9690
9691 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
9692   __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF,      \
9693                                 (__v8si)(__m256i)INDEX,                 \
9694                                 (__v8di)(__m512i)V1, (int)SCALE)
9695
9696 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
9697   __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK,      \
9698                                 (__v8si)(__m256i)INDEX,                 \
9699                                 (__v8di)(__m512i)V1, (int)SCALE)
9700
9701 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
9702   __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF,           \
9703                                  (__v8di)(__m512i)INDEX,                \
9704                                  (__v8si)(__m256i)V1, (int)SCALE)
9705
9706 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
9707   __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK,           \
9708                                  (__v8di)(__m512i)INDEX,                \
9709                                  (__v8si)(__m256i)V1, (int)SCALE)
9710
9711 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
9712   __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF,      \
9713                                 (__v8di)(__m512i)INDEX,                 \
9714                                 (__v8di)(__m512i)V1, (int)SCALE)
9715
9716 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
9717   __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK,      \
9718                                 (__v8di)(__m512i)INDEX,                 \
9719                                 (__v8di)(__m512i)V1, (int)SCALE)
9720 #endif
9721
9722 extern __inline __m512d
9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9725 {
9726   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9727                                                       (__v8df) __W,
9728                                                       (__mmask8) __U);
9729 }
9730
9731 extern __inline __m512d
9732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9734 {
9735   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9736                                                       (__v8df)
9737                                                       _mm512_setzero_pd (),
9738                                                       (__mmask8) __U);
9739 }
9740
9741 extern __inline void
9742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9744 {
9745   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9746                                           (__mmask8) __U);
9747 }
9748
9749 extern __inline __m512
9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9752 {
9753   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9754                                                      (__v16sf) __W,
9755                                                      (__mmask16) __U);
9756 }
9757
9758 extern __inline __m512
9759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9761 {
9762   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9763                                                      (__v16sf)
9764                                                      _mm512_setzero_ps (),
9765                                                      (__mmask16) __U);
9766 }
9767
9768 extern __inline void
9769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9771 {
9772   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9773                                           (__mmask16) __U);
9774 }
9775
9776 extern __inline __m512i
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9779 {
9780   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9781                                                       (__v8di) __W,
9782                                                       (__mmask8) __U);
9783 }
9784
9785 extern __inline __m512i
9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9788 {
9789   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9790                                                       (__v8di)
9791                                                       _mm512_setzero_si512 (),
9792                                                       (__mmask8) __U);
9793 }
9794
9795 extern __inline void
9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9798 {
9799   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9800                                           (__mmask8) __U);
9801 }
9802
9803 extern __inline __m512i
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9806 {
9807   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9808                                                       (__v16si) __W,
9809                                                       (__mmask16) __U);
9810 }
9811
9812 extern __inline __m512i
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9815 {
9816   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9817                                                       (__v16si)
9818                                                       _mm512_setzero_si512 (),
9819                                                       (__mmask16) __U);
9820 }
9821
9822 extern __inline void
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9825 {
9826   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9827                                           (__mmask16) __U);
9828 }
9829
9830 extern __inline __m512d
9831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9833 {
9834   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9835                                                     (__v8df) __W,
9836                                                     (__mmask8) __U);
9837 }
9838
9839 extern __inline __m512d
9840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9841 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9842 {
9843   return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9844                                                      (__v8df)
9845                                                      _mm512_setzero_pd (),
9846                                                      (__mmask8) __U);
9847 }
9848
9849 extern __inline __m512d
9850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9851 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9852 {
9853   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9854                                                         (__v8df) __W,
9855                                                         (__mmask8) __U);
9856 }
9857
9858 extern __inline __m512d
9859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9860 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9861 {
9862   return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9863                                                          (__v8df)
9864                                                          _mm512_setzero_pd (),
9865                                                          (__mmask8) __U);
9866 }
9867
9868 extern __inline __m512
9869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9871 {
9872   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9873                                                    (__v16sf) __W,
9874                                                    (__mmask16) __U);
9875 }
9876
9877 extern __inline __m512
9878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9879 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9880 {
9881   return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9882                                                     (__v16sf)
9883                                                     _mm512_setzero_ps (),
9884                                                     (__mmask16) __U);
9885 }
9886
9887 extern __inline __m512
9888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9890 {
9891   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9892                                                        (__v16sf) __W,
9893                                                        (__mmask16) __U);
9894 }
9895
9896 extern __inline __m512
9897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9899 {
9900   return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9901                                                         (__v16sf)
9902                                                         _mm512_setzero_ps (),
9903                                                         (__mmask16) __U);
9904 }
9905
9906 extern __inline __m512i
9907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9909 {
9910   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9911                                                     (__v8di) __W,
9912                                                     (__mmask8) __U);
9913 }
9914
9915 extern __inline __m512i
9916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9918 {
9919   return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9920                                                      (__v8di)
9921                                                      _mm512_setzero_si512 (),
9922                                                      (__mmask8) __U);
9923 }
9924
9925 extern __inline __m512i
9926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9928 {
9929   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9930                                                         (__v8di) __W,
9931                                                         (__mmask8) __U);
9932 }
9933
9934 extern __inline __m512i
9935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9937 {
9938   return (__m512i)
9939          __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9940                                                (__v8di)
9941                                                _mm512_setzero_si512 (),
9942                                                (__mmask8) __U);
9943 }
9944
9945 extern __inline __m512i
9946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9947 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9948 {
9949   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9950                                                     (__v16si) __W,
9951                                                     (__mmask16) __U);
9952 }
9953
9954 extern __inline __m512i
9955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9957 {
9958   return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9959                                                      (__v16si)
9960                                                      _mm512_setzero_si512 (),
9961                                                      (__mmask16) __U);
9962 }
9963
9964 extern __inline __m512i
9965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9966 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9967 {
9968   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9969                                                         (__v16si) __W,
9970                                                         (__mmask16) __U);
9971 }
9972
9973 extern __inline __m512i
9974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9976 {
9977   return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9978                                                          (__v16si)
9979                                                          _mm512_setzero_si512
9980                                                          (), (__mmask16) __U);
9981 }
9982
9983 /* Mask arithmetic operations */
9984 extern __inline __mmask16
9985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9986 _mm512_kand (__mmask16 __A, __mmask16 __B)
9987 {
9988   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9989 }
9990
9991 extern __inline __mmask16
9992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9994 {
9995   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9996 }
9997
9998 extern __inline __mmask16
9999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000 _mm512_kor (__mmask16 __A, __mmask16 __B)
10001 {
10002   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10003 }
10004
10005 extern __inline int
10006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10007 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10008 {
10009   return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10010                                                 (__mmask16) __B);
10011 }
10012
10013 extern __inline int
10014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10016 {
10017   return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10018                                                 (__mmask16) __B);
10019 }
10020
10021 extern __inline __mmask16
10022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10024 {
10025   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10026 }
10027
10028 extern __inline __mmask16
10029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10030 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10031 {
10032   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10033 }
10034
10035 extern __inline __mmask16
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm512_knot (__mmask16 __A)
10038 {
10039   return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10040 }
10041
10042 extern __inline __mmask16
10043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10045 {
10046   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10047 }
10048
10049 #ifdef __OPTIMIZE__
10050 extern __inline __m512i
10051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10052 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10053                           const int __imm)
10054 {
10055   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10056                                                     (__v4si) __D,
10057                                                     __imm,
10058                                                     (__v16si)
10059                                                     _mm512_setzero_si512 (),
10060                                                     __B);
10061 }
10062
10063 extern __inline __m512
10064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10065 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10066                           const int __imm)
10067 {
10068   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10069                                                    (__v4sf) __D,
10070                                                    __imm,
10071                                                    (__v16sf)
10072                                                    _mm512_setzero_ps (), __B);
10073 }
10074
10075 extern __inline __m512i
10076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10077 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10078                          __m128i __D, const int __imm)
10079 {
10080   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10081                                                     (__v4si) __D,
10082                                                     __imm,
10083                                                     (__v16si) __A,
10084                                                     __B);
10085 }
10086
10087 extern __inline __m512
10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10090                          __m128 __D, const int __imm)
10091 {
10092   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10093                                                    (__v4sf) __D,
10094                                                    __imm,
10095                                                    (__v16sf) __A, __B);
10096 }
10097 #else
10098 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
10099   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10100     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
10101     (__mmask8)(A)))
10102
10103 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
10104   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10105     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
10106     (__mmask8)(A)))
10107
10108 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
10109   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10110     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
10111                                              (__mmask8)(B)))
10112
10113 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
10114   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10115     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
10116                                               (__mmask8)(B)))
10117 #endif
10118
10119 extern __inline __m512i
10120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121 _mm512_max_epi64 (__m512i __A, __m512i __B)
10122 {
10123   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10124                                                   (__v8di) __B,
10125                                                   (__v8di)
10126                                                   _mm512_undefined_si512 (),
10127                                                   (__mmask8) -1);
10128 }
10129
10130 extern __inline __m512i
10131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10133 {
10134   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10135                                                   (__v8di) __B,
10136                                                   (__v8di)
10137                                                   _mm512_setzero_si512 (),
10138                                                   __M);
10139 }
10140
10141 extern __inline __m512i
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10144 {
10145   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10146                                                   (__v8di) __B,
10147                                                   (__v8di) __W, __M);
10148 }
10149
10150 extern __inline __m512i
10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152 _mm512_min_epi64 (__m512i __A, __m512i __B)
10153 {
10154   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10155                                                   (__v8di) __B,
10156                                                   (__v8di)
10157                                                   _mm512_undefined_si512 (),
10158                                                   (__mmask8) -1);
10159 }
10160
10161 extern __inline __m512i
10162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10163 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10164 {
10165   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10166                                                   (__v8di) __B,
10167                                                   (__v8di) __W, __M);
10168 }
10169
10170 extern __inline __m512i
10171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10172 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10173 {
10174   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10175                                                   (__v8di) __B,
10176                                                   (__v8di)
10177                                                   _mm512_setzero_si512 (),
10178                                                   __M);
10179 }
10180
10181 extern __inline __m512i
10182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183 _mm512_max_epu64 (__m512i __A, __m512i __B)
10184 {
10185   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10186                                                   (__v8di) __B,
10187                                                   (__v8di)
10188                                                   _mm512_undefined_si512 (),
10189                                                   (__mmask8) -1);
10190 }
10191
10192 extern __inline __m512i
10193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10195 {
10196   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10197                                                   (__v8di) __B,
10198                                                   (__v8di)
10199                                                   _mm512_setzero_si512 (),
10200                                                   __M);
10201 }
10202
10203 extern __inline __m512i
10204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10206 {
10207   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10208                                                   (__v8di) __B,
10209                                                   (__v8di) __W, __M);
10210 }
10211
10212 extern __inline __m512i
10213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10214 _mm512_min_epu64 (__m512i __A, __m512i __B)
10215 {
10216   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10217                                                   (__v8di) __B,
10218                                                   (__v8di)
10219                                                   _mm512_undefined_si512 (),
10220                                                   (__mmask8) -1);
10221 }
10222
10223 extern __inline __m512i
10224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10225 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10226 {
10227   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10228                                                   (__v8di) __B,
10229                                                   (__v8di) __W, __M);
10230 }
10231
10232 extern __inline __m512i
10233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10234 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10235 {
10236   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10237                                                   (__v8di) __B,
10238                                                   (__v8di)
10239                                                   _mm512_setzero_si512 (),
10240                                                   __M);
10241 }
10242
10243 extern __inline __m512i
10244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245 _mm512_max_epi32 (__m512i __A, __m512i __B)
10246 {
10247   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10248                                                   (__v16si) __B,
10249                                                   (__v16si)
10250                                                   _mm512_undefined_si512 (),
10251                                                   (__mmask16) -1);
10252 }
10253
10254 extern __inline __m512i
10255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10256 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10257 {
10258   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10259                                                   (__v16si) __B,
10260                                                   (__v16si)
10261                                                   _mm512_setzero_si512 (),
10262                                                   __M);
10263 }
10264
10265 extern __inline __m512i
10266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10267 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10268 {
10269   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10270                                                   (__v16si) __B,
10271                                                   (__v16si) __W, __M);
10272 }
10273
10274 extern __inline __m512i
10275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10276 _mm512_min_epi32 (__m512i __A, __m512i __B)
10277 {
10278   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10279                                                   (__v16si) __B,
10280                                                   (__v16si)
10281                                                   _mm512_undefined_si512 (),
10282                                                   (__mmask16) -1);
10283 }
10284
10285 extern __inline __m512i
10286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10287 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10288 {
10289   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10290                                                   (__v16si) __B,
10291                                                   (__v16si)
10292                                                   _mm512_setzero_si512 (),
10293                                                   __M);
10294 }
10295
10296 extern __inline __m512i
10297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10299 {
10300   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10301                                                   (__v16si) __B,
10302                                                   (__v16si) __W, __M);
10303 }
10304
10305 extern __inline __m512i
10306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10307 _mm512_max_epu32 (__m512i __A, __m512i __B)
10308 {
10309   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10310                                                   (__v16si) __B,
10311                                                   (__v16si)
10312                                                   _mm512_undefined_si512 (),
10313                                                   (__mmask16) -1);
10314 }
10315
10316 extern __inline __m512i
10317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10318 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10319 {
10320   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10321                                                   (__v16si) __B,
10322                                                   (__v16si)
10323                                                   _mm512_setzero_si512 (),
10324                                                   __M);
10325 }
10326
10327 extern __inline __m512i
10328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10330 {
10331   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10332                                                   (__v16si) __B,
10333                                                   (__v16si) __W, __M);
10334 }
10335
10336 extern __inline __m512i
10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338 _mm512_min_epu32 (__m512i __A, __m512i __B)
10339 {
10340   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10341                                                   (__v16si) __B,
10342                                                   (__v16si)
10343                                                   _mm512_undefined_si512 (),
10344                                                   (__mmask16) -1);
10345 }
10346
10347 extern __inline __m512i
10348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10350 {
10351   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10352                                                   (__v16si) __B,
10353                                                   (__v16si)
10354                                                   _mm512_setzero_si512 (),
10355                                                   __M);
10356 }
10357
10358 extern __inline __m512i
10359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10360 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10361 {
10362   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10363                                                   (__v16si) __B,
10364                                                   (__v16si) __W, __M);
10365 }
10366
10367 extern __inline __m512
10368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10369 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10370 {
10371   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10372                                                    (__v16sf) __B,
10373                                                    (__v16sf)
10374                                                    _mm512_undefined_ps (),
10375                                                    (__mmask16) -1);
10376 }
10377
10378 extern __inline __m512
10379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10380 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10381 {
10382   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10383                                                    (__v16sf) __B,
10384                                                    (__v16sf) __W,
10385                                                    (__mmask16) __U);
10386 }
10387
10388 extern __inline __m512
10389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10391 {
10392   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10393                                                    (__v16sf) __B,
10394                                                    (__v16sf)
10395                                                    _mm512_setzero_ps (),
10396                                                    (__mmask16) __U);
10397 }
10398
10399 #ifdef __OPTIMIZE__
10400 extern __inline __m128d
10401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10402 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10403 {
10404   return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10405                                                (__v2df) __B,
10406                                                __R);
10407 }
10408
10409 extern __inline __m128
10410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10411 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10412 {
10413   return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10414                                               (__v4sf) __B,
10415                                               __R);
10416 }
10417
10418 extern __inline __m128d
10419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10420 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10421 {
10422   return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10423                                                (__v2df) __B,
10424                                                __R);
10425 }
10426
10427 extern __inline __m128
10428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10429 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10430 {
10431   return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10432                                               (__v4sf) __B,
10433                                               __R);
10434 }
10435
10436 #else
10437 #define _mm_max_round_sd(A, B, C)            \
10438     (__m128d)__builtin_ia32_addsd_round(A, B, C)
10439
10440 #define _mm_max_round_ss(A, B, C)            \
10441     (__m128)__builtin_ia32_addss_round(A, B, C)
10442
10443 #define _mm_min_round_sd(A, B, C)            \
10444     (__m128d)__builtin_ia32_subsd_round(A, B, C)
10445
10446 #define _mm_min_round_ss(A, B, C)            \
10447     (__m128)__builtin_ia32_subss_round(A, B, C)
10448 #endif
10449
10450 extern __inline __m512d
10451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10452 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10453 {
10454   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10455                                                      (__v8df) __W,
10456                                                      (__mmask8) __U);
10457 }
10458
10459 extern __inline __m512
10460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10462 {
10463   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10464                                                     (__v16sf) __W,
10465                                                     (__mmask16) __U);
10466 }
10467
10468 extern __inline __m512i
10469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10470 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10471 {
10472   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10473                                                     (__v8di) __W,
10474                                                     (__mmask8) __U);
10475 }
10476
10477 extern __inline __m512i
10478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10480 {
10481   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10482                                                     (__v16si) __W,
10483                                                     (__mmask16) __U);
10484 }
10485
10486 #ifdef __OPTIMIZE__
10487 extern __inline __m128d
10488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10490 {
10491   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10492                                                    (__v2df) __A,
10493                                                    (__v2df) __B,
10494                                                    __R);
10495 }
10496
10497 extern __inline __m128
10498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10500 {
10501   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10502                                                   (__v4sf) __A,
10503                                                   (__v4sf) __B,
10504                                                   __R);
10505 }
10506
10507 extern __inline __m128d
10508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10510 {
10511   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10512                                                    (__v2df) __A,
10513                                                    -(__v2df) __B,
10514                                                    __R);
10515 }
10516
10517 extern __inline __m128
10518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10520 {
10521   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10522                                                   (__v4sf) __A,
10523                                                   -(__v4sf) __B,
10524                                                   __R);
10525 }
10526
10527 extern __inline __m128d
10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10530 {
10531   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10532                                                    -(__v2df) __A,
10533                                                    (__v2df) __B,
10534                                                    __R);
10535 }
10536
10537 extern __inline __m128
10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10540 {
10541   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10542                                                   -(__v4sf) __A,
10543                                                   (__v4sf) __B,
10544                                                   __R);
10545 }
10546
10547 extern __inline __m128d
10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10550 {
10551   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10552                                                    -(__v2df) __A,
10553                                                    -(__v2df) __B,
10554                                                    __R);
10555 }
10556
10557 extern __inline __m128
10558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10559 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10560 {
10561   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10562                                                   -(__v4sf) __A,
10563                                                   -(__v4sf) __B,
10564                                                   __R);
10565 }
10566 #else
10567 #define _mm_fmadd_round_sd(A, B, C, R)            \
10568     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10569
10570 #define _mm_fmadd_round_ss(A, B, C, R)            \
10571     (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10572
10573 #define _mm_fmsub_round_sd(A, B, C, R)            \
10574     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10575
10576 #define _mm_fmsub_round_ss(A, B, C, R)            \
10577     (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10578
10579 #define _mm_fnmadd_round_sd(A, B, C, R)            \
10580     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10581
10582 #define _mm_fnmadd_round_ss(A, B, C, R)            \
10583    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10584
10585 #define _mm_fnmsub_round_sd(A, B, C, R)            \
10586     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10587
10588 #define _mm_fnmsub_round_ss(A, B, C, R)            \
10589     (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10590 #endif
10591
10592 #ifdef __OPTIMIZE__
10593 extern __inline int
10594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10596 {
10597   return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10598 }
10599
10600 extern __inline int
10601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10602 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10603 {
10604   return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10605 }
10606 #else
10607 #define _mm_comi_round_ss(A, B, C, D)\
10608 __builtin_ia32_vcomiss(A, B, C, D)
10609 #define _mm_comi_round_sd(A, B, C, D)\
10610 __builtin_ia32_vcomisd(A, B, C, D)
10611 #endif
10612
10613 extern __inline __m512d
10614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10615 _mm512_sqrt_pd (__m512d __A)
10616 {
10617   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10618                                                   (__v8df)
10619                                                   _mm512_undefined_pd (),
10620                                                   (__mmask8) -1,
10621                                                   _MM_FROUND_CUR_DIRECTION);
10622 }
10623
10624 extern __inline __m512d
10625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10627 {
10628   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10629                                                   (__v8df) __W,
10630                                                   (__mmask8) __U,
10631                                                   _MM_FROUND_CUR_DIRECTION);
10632 }
10633
10634 extern __inline __m512d
10635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10636 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10637 {
10638   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10639                                                   (__v8df)
10640                                                   _mm512_setzero_pd (),
10641                                                   (__mmask8) __U,
10642                                                   _MM_FROUND_CUR_DIRECTION);
10643 }
10644
10645 extern __inline __m512
10646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10647 _mm512_sqrt_ps (__m512 __A)
10648 {
10649   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10650                                                  (__v16sf)
10651                                                  _mm512_undefined_ps (),
10652                                                  (__mmask16) -1,
10653                                                  _MM_FROUND_CUR_DIRECTION);
10654 }
10655
10656 extern __inline __m512
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10659 {
10660   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10661                                                  (__v16sf) __W,
10662                                                  (__mmask16) __U,
10663                                                  _MM_FROUND_CUR_DIRECTION);
10664 }
10665
10666 extern __inline __m512
10667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10669 {
10670   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10671                                                  (__v16sf)
10672                                                  _mm512_setzero_ps (),
10673                                                  (__mmask16) __U,
10674                                                  _MM_FROUND_CUR_DIRECTION);
10675 }
10676
10677 extern __inline __m512d
10678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679 _mm512_add_pd (__m512d __A, __m512d __B)
10680 {
10681   return (__m512d) ((__v8df)__A + (__v8df)__B);
10682 }
10683
10684 extern __inline __m512d
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10687 {
10688   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10689                                                  (__v8df) __B,
10690                                                  (__v8df) __W,
10691                                                  (__mmask8) __U,
10692                                                  _MM_FROUND_CUR_DIRECTION);
10693 }
10694
10695 extern __inline __m512d
10696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10698 {
10699   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10700                                                  (__v8df) __B,
10701                                                  (__v8df)
10702                                                  _mm512_setzero_pd (),
10703                                                  (__mmask8) __U,
10704                                                  _MM_FROUND_CUR_DIRECTION);
10705 }
10706
10707 extern __inline __m512
10708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10709 _mm512_add_ps (__m512 __A, __m512 __B)
10710 {
10711   return (__m512) ((__v16sf)__A + (__v16sf)__B);
10712 }
10713
10714 extern __inline __m512
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10717 {
10718   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10719                                                 (__v16sf) __B,
10720                                                 (__v16sf) __W,
10721                                                 (__mmask16) __U,
10722                                                 _MM_FROUND_CUR_DIRECTION);
10723 }
10724
10725 extern __inline __m512
10726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10727 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10728 {
10729   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10730                                                 (__v16sf) __B,
10731                                                 (__v16sf)
10732                                                 _mm512_setzero_ps (),
10733                                                 (__mmask16) __U,
10734                                                 _MM_FROUND_CUR_DIRECTION);
10735 }
10736
10737 extern __inline __m512d
10738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739 _mm512_sub_pd (__m512d __A, __m512d __B)
10740 {
10741   return (__m512d) ((__v8df)__A - (__v8df)__B);
10742 }
10743
10744 extern __inline __m512d
10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10747 {
10748   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10749                                                  (__v8df) __B,
10750                                                  (__v8df) __W,
10751                                                  (__mmask8) __U,
10752                                                  _MM_FROUND_CUR_DIRECTION);
10753 }
10754
10755 extern __inline __m512d
10756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10758 {
10759   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10760                                                  (__v8df) __B,
10761                                                  (__v8df)
10762                                                  _mm512_setzero_pd (),
10763                                                  (__mmask8) __U,
10764                                                  _MM_FROUND_CUR_DIRECTION);
10765 }
10766
10767 extern __inline __m512
10768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10769 _mm512_sub_ps (__m512 __A, __m512 __B)
10770 {
10771   return (__m512) ((__v16sf)__A - (__v16sf)__B);
10772 }
10773
10774 extern __inline __m512
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10777 {
10778   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10779                                                 (__v16sf) __B,
10780                                                 (__v16sf) __W,
10781                                                 (__mmask16) __U,
10782                                                 _MM_FROUND_CUR_DIRECTION);
10783 }
10784
10785 extern __inline __m512
10786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10788 {
10789   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10790                                                 (__v16sf) __B,
10791                                                 (__v16sf)
10792                                                 _mm512_setzero_ps (),
10793                                                 (__mmask16) __U,
10794                                                 _MM_FROUND_CUR_DIRECTION);
10795 }
10796
10797 extern __inline __m512d
10798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10799 _mm512_mul_pd (__m512d __A, __m512d __B)
10800 {
10801   return (__m512d) ((__v8df)__A * (__v8df)__B);
10802 }
10803
10804 extern __inline __m512d
10805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10807 {
10808   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10809                                                  (__v8df) __B,
10810                                                  (__v8df) __W,
10811                                                  (__mmask8) __U,
10812                                                  _MM_FROUND_CUR_DIRECTION);
10813 }
10814
10815 extern __inline __m512d
10816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10818 {
10819   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10820                                                  (__v8df) __B,
10821                                                  (__v8df)
10822                                                  _mm512_setzero_pd (),
10823                                                  (__mmask8) __U,
10824                                                  _MM_FROUND_CUR_DIRECTION);
10825 }
10826
10827 extern __inline __m512
10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829 _mm512_mul_ps (__m512 __A, __m512 __B)
10830 {
10831   return (__m512) ((__v16sf)__A * (__v16sf)__B);
10832 }
10833
10834 extern __inline __m512
10835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10836 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10837 {
10838   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10839                                                 (__v16sf) __B,
10840                                                 (__v16sf) __W,
10841                                                 (__mmask16) __U,
10842                                                 _MM_FROUND_CUR_DIRECTION);
10843 }
10844
10845 extern __inline __m512
10846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10847 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10848 {
10849   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10850                                                 (__v16sf) __B,
10851                                                 (__v16sf)
10852                                                 _mm512_setzero_ps (),
10853                                                 (__mmask16) __U,
10854                                                 _MM_FROUND_CUR_DIRECTION);
10855 }
10856
10857 extern __inline __m512d
10858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10859 _mm512_div_pd (__m512d __M, __m512d __V)
10860 {
10861   return (__m512d) ((__v8df)__M / (__v8df)__V);
10862 }
10863
10864 extern __inline __m512d
10865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10867 {
10868   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10869                                                  (__v8df) __V,
10870                                                  (__v8df) __W,
10871                                                  (__mmask8) __U,
10872                                                  _MM_FROUND_CUR_DIRECTION);
10873 }
10874
10875 extern __inline __m512d
10876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10877 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10878 {
10879   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10880                                                  (__v8df) __V,
10881                                                  (__v8df)
10882                                                  _mm512_setzero_pd (),
10883                                                  (__mmask8) __U,
10884                                                  _MM_FROUND_CUR_DIRECTION);
10885 }
10886
10887 extern __inline __m512
10888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10889 _mm512_div_ps (__m512 __A, __m512 __B)
10890 {
10891   return (__m512) ((__v16sf)__A / (__v16sf)__B);
10892 }
10893
10894 extern __inline __m512
10895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10896 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10897 {
10898   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10899                                                 (__v16sf) __B,
10900                                                 (__v16sf) __W,
10901                                                 (__mmask16) __U,
10902                                                 _MM_FROUND_CUR_DIRECTION);
10903 }
10904
10905 extern __inline __m512
10906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10907 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10908 {
10909   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10910                                                 (__v16sf) __B,
10911                                                 (__v16sf)
10912                                                 _mm512_setzero_ps (),
10913                                                 (__mmask16) __U,
10914                                                 _MM_FROUND_CUR_DIRECTION);
10915 }
10916
10917 extern __inline __m512d
10918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10919 _mm512_max_pd (__m512d __A, __m512d __B)
10920 {
10921   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10922                                                  (__v8df) __B,
10923                                                  (__v8df)
10924                                                  _mm512_undefined_pd (),
10925                                                  (__mmask8) -1,
10926                                                  _MM_FROUND_CUR_DIRECTION);
10927 }
10928
10929 extern __inline __m512d
10930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10932 {
10933   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10934                                                  (__v8df) __B,
10935                                                  (__v8df) __W,
10936                                                  (__mmask8) __U,
10937                                                  _MM_FROUND_CUR_DIRECTION);
10938 }
10939
10940 extern __inline __m512d
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10943 {
10944   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10945                                                  (__v8df) __B,
10946                                                  (__v8df)
10947                                                  _mm512_setzero_pd (),
10948                                                  (__mmask8) __U,
10949                                                  _MM_FROUND_CUR_DIRECTION);
10950 }
10951
10952 extern __inline __m512
10953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10954 _mm512_max_ps (__m512 __A, __m512 __B)
10955 {
10956   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10957                                                 (__v16sf) __B,
10958                                                 (__v16sf)
10959                                                 _mm512_undefined_ps (),
10960                                                 (__mmask16) -1,
10961                                                 _MM_FROUND_CUR_DIRECTION);
10962 }
10963
10964 extern __inline __m512
10965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10966 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10967 {
10968   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10969                                                 (__v16sf) __B,
10970                                                 (__v16sf) __W,
10971                                                 (__mmask16) __U,
10972                                                 _MM_FROUND_CUR_DIRECTION);
10973 }
10974
10975 extern __inline __m512
10976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10978 {
10979   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10980                                                 (__v16sf) __B,
10981                                                 (__v16sf)
10982                                                 _mm512_setzero_ps (),
10983                                                 (__mmask16) __U,
10984                                                 _MM_FROUND_CUR_DIRECTION);
10985 }
10986
10987 extern __inline __m512d
10988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10989 _mm512_min_pd (__m512d __A, __m512d __B)
10990 {
10991   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10992                                                  (__v8df) __B,
10993                                                  (__v8df)
10994                                                  _mm512_undefined_pd (),
10995                                                  (__mmask8) -1,
10996                                                  _MM_FROUND_CUR_DIRECTION);
10997 }
10998
10999 extern __inline __m512d
11000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11001 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11002 {
11003   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11004                                                  (__v8df) __B,
11005                                                  (__v8df) __W,
11006                                                  (__mmask8) __U,
11007                                                  _MM_FROUND_CUR_DIRECTION);
11008 }
11009
11010 extern __inline __m512d
11011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11013 {
11014   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11015                                                  (__v8df) __B,
11016                                                  (__v8df)
11017                                                  _mm512_setzero_pd (),
11018                                                  (__mmask8) __U,
11019                                                  _MM_FROUND_CUR_DIRECTION);
11020 }
11021
11022 extern __inline __m512
11023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024 _mm512_min_ps (__m512 __A, __m512 __B)
11025 {
11026   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11027                                                 (__v16sf) __B,
11028                                                 (__v16sf)
11029                                                 _mm512_undefined_ps (),
11030                                                 (__mmask16) -1,
11031                                                 _MM_FROUND_CUR_DIRECTION);
11032 }
11033
11034 extern __inline __m512
11035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11037 {
11038   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11039                                                 (__v16sf) __B,
11040                                                 (__v16sf) __W,
11041                                                 (__mmask16) __U,
11042                                                 _MM_FROUND_CUR_DIRECTION);
11043 }
11044
11045 extern __inline __m512
11046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11048 {
11049   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11050                                                 (__v16sf) __B,
11051                                                 (__v16sf)
11052                                                 _mm512_setzero_ps (),
11053                                                 (__mmask16) __U,
11054                                                 _MM_FROUND_CUR_DIRECTION);
11055 }
11056
11057 extern __inline __m512d
11058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11059 _mm512_scalef_pd (__m512d __A, __m512d __B)
11060 {
11061   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11062                                                     (__v8df) __B,
11063                                                     (__v8df)
11064                                                     _mm512_undefined_pd (),
11065                                                     (__mmask8) -1,
11066                                                     _MM_FROUND_CUR_DIRECTION);
11067 }
11068
11069 extern __inline __m512d
11070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11071 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11072 {
11073   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11074                                                     (__v8df) __B,
11075                                                     (__v8df) __W,
11076                                                     (__mmask8) __U,
11077                                                     _MM_FROUND_CUR_DIRECTION);
11078 }
11079
11080 extern __inline __m512d
11081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11083 {
11084   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11085                                                     (__v8df) __B,
11086                                                     (__v8df)
11087                                                     _mm512_setzero_pd (),
11088                                                     (__mmask8) __U,
11089                                                     _MM_FROUND_CUR_DIRECTION);
11090 }
11091
11092 extern __inline __m512
11093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11094 _mm512_scalef_ps (__m512 __A, __m512 __B)
11095 {
11096   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11097                                                    (__v16sf) __B,
11098                                                    (__v16sf)
11099                                                    _mm512_undefined_ps (),
11100                                                    (__mmask16) -1,
11101                                                    _MM_FROUND_CUR_DIRECTION);
11102 }
11103
11104 extern __inline __m512
11105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11106 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11107 {
11108   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11109                                                    (__v16sf) __B,
11110                                                    (__v16sf) __W,
11111                                                    (__mmask16) __U,
11112                                                    _MM_FROUND_CUR_DIRECTION);
11113 }
11114
11115 extern __inline __m512
11116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11118 {
11119   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11120                                                    (__v16sf) __B,
11121                                                    (__v16sf)
11122                                                    _mm512_setzero_ps (),
11123                                                    (__mmask16) __U,
11124                                                    _MM_FROUND_CUR_DIRECTION);
11125 }
11126
11127 extern __inline __m128d
11128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11129 _mm_scalef_sd (__m128d __A, __m128d __B)
11130 {
11131   return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11132                                                   (__v2df) __B,
11133                                                   _MM_FROUND_CUR_DIRECTION);
11134 }
11135
11136 extern __inline __m128
11137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11138 _mm_scalef_ss (__m128 __A, __m128 __B)
11139 {
11140   return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11141                                                  (__v4sf) __B,
11142                                                  _MM_FROUND_CUR_DIRECTION);
11143 }
11144
11145 extern __inline __m512d
11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11148 {
11149   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11150                                                     (__v8df) __B,
11151                                                     (__v8df) __C,
11152                                                     (__mmask8) -1,
11153                                                     _MM_FROUND_CUR_DIRECTION);
11154 }
11155
11156 extern __inline __m512d
11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11159 {
11160   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11161                                                     (__v8df) __B,
11162                                                     (__v8df) __C,
11163                                                     (__mmask8) __U,
11164                                                     _MM_FROUND_CUR_DIRECTION);
11165 }
11166
11167 extern __inline __m512d
11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11170 {
11171   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11172                                                      (__v8df) __B,
11173                                                      (__v8df) __C,
11174                                                      (__mmask8) __U,
11175                                                      _MM_FROUND_CUR_DIRECTION);
11176 }
11177
11178 extern __inline __m512d
11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11181 {
11182   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11183                                                      (__v8df) __B,
11184                                                      (__v8df) __C,
11185                                                      (__mmask8) __U,
11186                                                      _MM_FROUND_CUR_DIRECTION);
11187 }
11188
11189 extern __inline __m512
11190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11192 {
11193   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11194                                                    (__v16sf) __B,
11195                                                    (__v16sf) __C,
11196                                                    (__mmask16) -1,
11197                                                    _MM_FROUND_CUR_DIRECTION);
11198 }
11199
11200 extern __inline __m512
11201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11202 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11203 {
11204   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11205                                                    (__v16sf) __B,
11206                                                    (__v16sf) __C,
11207                                                    (__mmask16) __U,
11208                                                    _MM_FROUND_CUR_DIRECTION);
11209 }
11210
11211 extern __inline __m512
11212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11214 {
11215   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11216                                                     (__v16sf) __B,
11217                                                     (__v16sf) __C,
11218                                                     (__mmask16) __U,
11219                                                     _MM_FROUND_CUR_DIRECTION);
11220 }
11221
11222 extern __inline __m512
11223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11224 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11225 {
11226   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11227                                                     (__v16sf) __B,
11228                                                     (__v16sf) __C,
11229                                                     (__mmask16) __U,
11230                                                     _MM_FROUND_CUR_DIRECTION);
11231 }
11232
11233 extern __inline __m512d
11234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11235 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11236 {
11237   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11238                                                     (__v8df) __B,
11239                                                     -(__v8df) __C,
11240                                                     (__mmask8) -1,
11241                                                     _MM_FROUND_CUR_DIRECTION);
11242 }
11243
11244 extern __inline __m512d
11245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11247 {
11248   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11249                                                     (__v8df) __B,
11250                                                     -(__v8df) __C,
11251                                                     (__mmask8) __U,
11252                                                     _MM_FROUND_CUR_DIRECTION);
11253 }
11254
11255 extern __inline __m512d
11256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11258 {
11259   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11260                                                      (__v8df) __B,
11261                                                      (__v8df) __C,
11262                                                      (__mmask8) __U,
11263                                                      _MM_FROUND_CUR_DIRECTION);
11264 }
11265
11266 extern __inline __m512d
11267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11269 {
11270   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11271                                                      (__v8df) __B,
11272                                                      -(__v8df) __C,
11273                                                      (__mmask8) __U,
11274                                                      _MM_FROUND_CUR_DIRECTION);
11275 }
11276
11277 extern __inline __m512
11278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11280 {
11281   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11282                                                    (__v16sf) __B,
11283                                                    -(__v16sf) __C,
11284                                                    (__mmask16) -1,
11285                                                    _MM_FROUND_CUR_DIRECTION);
11286 }
11287
11288 extern __inline __m512
11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11291 {
11292   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11293                                                    (__v16sf) __B,
11294                                                    -(__v16sf) __C,
11295                                                    (__mmask16) __U,
11296                                                    _MM_FROUND_CUR_DIRECTION);
11297 }
11298
11299 extern __inline __m512
11300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11301 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11302 {
11303   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11304                                                     (__v16sf) __B,
11305                                                     (__v16sf) __C,
11306                                                     (__mmask16) __U,
11307                                                     _MM_FROUND_CUR_DIRECTION);
11308 }
11309
11310 extern __inline __m512
11311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11312 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11313 {
11314   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11315                                                     (__v16sf) __B,
11316                                                     -(__v16sf) __C,
11317                                                     (__mmask16) __U,
11318                                                     _MM_FROUND_CUR_DIRECTION);
11319 }
11320
11321 extern __inline __m512d
11322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11323 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11324 {
11325   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11326                                                        (__v8df) __B,
11327                                                        (__v8df) __C,
11328                                                        (__mmask8) -1,
11329                                                        _MM_FROUND_CUR_DIRECTION);
11330 }
11331
11332 extern __inline __m512d
11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11335 {
11336   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11337                                                        (__v8df) __B,
11338                                                        (__v8df) __C,
11339                                                        (__mmask8) __U,
11340                                                        _MM_FROUND_CUR_DIRECTION);
11341 }
11342
11343 extern __inline __m512d
11344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11345 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11346 {
11347   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11348                                                         (__v8df) __B,
11349                                                         (__v8df) __C,
11350                                                         (__mmask8) __U,
11351                                                         _MM_FROUND_CUR_DIRECTION);
11352 }
11353
11354 extern __inline __m512d
11355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11357 {
11358   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11359                                                         (__v8df) __B,
11360                                                         (__v8df) __C,
11361                                                         (__mmask8) __U,
11362                                                         _MM_FROUND_CUR_DIRECTION);
11363 }
11364
11365 extern __inline __m512
11366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11368 {
11369   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11370                                                       (__v16sf) __B,
11371                                                       (__v16sf) __C,
11372                                                       (__mmask16) -1,
11373                                                       _MM_FROUND_CUR_DIRECTION);
11374 }
11375
11376 extern __inline __m512
11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11379 {
11380   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11381                                                       (__v16sf) __B,
11382                                                       (__v16sf) __C,
11383                                                       (__mmask16) __U,
11384                                                       _MM_FROUND_CUR_DIRECTION);
11385 }
11386
11387 extern __inline __m512
11388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11389 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11390 {
11391   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11392                                                        (__v16sf) __B,
11393                                                        (__v16sf) __C,
11394                                                        (__mmask16) __U,
11395                                                        _MM_FROUND_CUR_DIRECTION);
11396 }
11397
11398 extern __inline __m512
11399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11401 {
11402   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11403                                                        (__v16sf) __B,
11404                                                        (__v16sf) __C,
11405                                                        (__mmask16) __U,
11406                                                        _MM_FROUND_CUR_DIRECTION);
11407 }
11408
11409 extern __inline __m512d
11410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11412 {
11413   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11414                                                        (__v8df) __B,
11415                                                        -(__v8df) __C,
11416                                                        (__mmask8) -1,
11417                                                        _MM_FROUND_CUR_DIRECTION);
11418 }
11419
11420 extern __inline __m512d
11421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11423 {
11424   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11425                                                        (__v8df) __B,
11426                                                        -(__v8df) __C,
11427                                                        (__mmask8) __U,
11428                                                        _MM_FROUND_CUR_DIRECTION);
11429 }
11430
11431 extern __inline __m512d
11432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11434 {
11435   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11436                                                         (__v8df) __B,
11437                                                         (__v8df) __C,
11438                                                         (__mmask8) __U,
11439                                                         _MM_FROUND_CUR_DIRECTION);
11440 }
11441
11442 extern __inline __m512d
11443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11445 {
11446   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11447                                                         (__v8df) __B,
11448                                                         -(__v8df) __C,
11449                                                         (__mmask8) __U,
11450                                                         _MM_FROUND_CUR_DIRECTION);
11451 }
11452
11453 extern __inline __m512
11454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11456 {
11457   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11458                                                       (__v16sf) __B,
11459                                                       -(__v16sf) __C,
11460                                                       (__mmask16) -1,
11461                                                       _MM_FROUND_CUR_DIRECTION);
11462 }
11463
11464 extern __inline __m512
11465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11467 {
11468   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11469                                                       (__v16sf) __B,
11470                                                       -(__v16sf) __C,
11471                                                       (__mmask16) __U,
11472                                                       _MM_FROUND_CUR_DIRECTION);
11473 }
11474
11475 extern __inline __m512
11476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11478 {
11479   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11480                                                        (__v16sf) __B,
11481                                                        (__v16sf) __C,
11482                                                        (__mmask16) __U,
11483                                                        _MM_FROUND_CUR_DIRECTION);
11484 }
11485
11486 extern __inline __m512
11487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11488 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11489 {
11490   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11491                                                        (__v16sf) __B,
11492                                                        -(__v16sf) __C,
11493                                                        (__mmask16) __U,
11494                                                        _MM_FROUND_CUR_DIRECTION);
11495 }
11496
11497 extern __inline __m512d
11498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11499 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11500 {
11501   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11502                                                     (__v8df) __B,
11503                                                     (__v8df) __C,
11504                                                     (__mmask8) -1,
11505                                                     _MM_FROUND_CUR_DIRECTION);
11506 }
11507
11508 extern __inline __m512d
11509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11510 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11511 {
11512   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11513                                                      (__v8df) __B,
11514                                                      (__v8df) __C,
11515                                                      (__mmask8) __U,
11516                                                      _MM_FROUND_CUR_DIRECTION);
11517 }
11518
11519 extern __inline __m512d
11520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11521 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11522 {
11523   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11524                                                      (__v8df) __B,
11525                                                      (__v8df) __C,
11526                                                      (__mmask8) __U,
11527                                                      _MM_FROUND_CUR_DIRECTION);
11528 }
11529
11530 extern __inline __m512d
11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11533 {
11534   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11535                                                      (__v8df) __B,
11536                                                      (__v8df) __C,
11537                                                      (__mmask8) __U,
11538                                                      _MM_FROUND_CUR_DIRECTION);
11539 }
11540
11541 extern __inline __m512
11542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11544 {
11545   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11546                                                    (__v16sf) __B,
11547                                                    (__v16sf) __C,
11548                                                    (__mmask16) -1,
11549                                                    _MM_FROUND_CUR_DIRECTION);
11550 }
11551
11552 extern __inline __m512
11553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11555 {
11556   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11557                                                     (__v16sf) __B,
11558                                                     (__v16sf) __C,
11559                                                     (__mmask16) __U,
11560                                                     _MM_FROUND_CUR_DIRECTION);
11561 }
11562
11563 extern __inline __m512
11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11566 {
11567   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11568                                                     (__v16sf) __B,
11569                                                     (__v16sf) __C,
11570                                                     (__mmask16) __U,
11571                                                     _MM_FROUND_CUR_DIRECTION);
11572 }
11573
11574 extern __inline __m512
11575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11577 {
11578   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11579                                                     (__v16sf) __B,
11580                                                     (__v16sf) __C,
11581                                                     (__mmask16) __U,
11582                                                     _MM_FROUND_CUR_DIRECTION);
11583 }
11584
11585 extern __inline __m512d
11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11588 {
11589   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11590                                                     (__v8df) __B,
11591                                                     -(__v8df) __C,
11592                                                     (__mmask8) -1,
11593                                                     _MM_FROUND_CUR_DIRECTION);
11594 }
11595
11596 extern __inline __m512d
11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11599 {
11600   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11601                                                      (__v8df) __B,
11602                                                      (__v8df) __C,
11603                                                      (__mmask8) __U,
11604                                                      _MM_FROUND_CUR_DIRECTION);
11605 }
11606
11607 extern __inline __m512d
11608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11610 {
11611   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11612                                                       (__v8df) __B,
11613                                                       (__v8df) __C,
11614                                                       (__mmask8) __U,
11615                                                       _MM_FROUND_CUR_DIRECTION);
11616 }
11617
11618 extern __inline __m512d
11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11621 {
11622   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11623                                                      (__v8df) __B,
11624                                                      -(__v8df) __C,
11625                                                      (__mmask8) __U,
11626                                                      _MM_FROUND_CUR_DIRECTION);
11627 }
11628
11629 extern __inline __m512
11630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11632 {
11633   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11634                                                    (__v16sf) __B,
11635                                                    -(__v16sf) __C,
11636                                                    (__mmask16) -1,
11637                                                    _MM_FROUND_CUR_DIRECTION);
11638 }
11639
11640 extern __inline __m512
11641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11643 {
11644   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11645                                                     (__v16sf) __B,
11646                                                     (__v16sf) __C,
11647                                                     (__mmask16) __U,
11648                                                     _MM_FROUND_CUR_DIRECTION);
11649 }
11650
11651 extern __inline __m512
11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11654 {
11655   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11656                                                      (__v16sf) __B,
11657                                                      (__v16sf) __C,
11658                                                      (__mmask16) __U,
11659                                                      _MM_FROUND_CUR_DIRECTION);
11660 }
11661
11662 extern __inline __m512
11663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11664 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11665 {
11666   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11667                                                     (__v16sf) __B,
11668                                                     -(__v16sf) __C,
11669                                                     (__mmask16) __U,
11670                                                     _MM_FROUND_CUR_DIRECTION);
11671 }
11672
11673 extern __inline __m256i
11674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11675 _mm512_cvttpd_epi32 (__m512d __A)
11676 {
11677   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11678                                                      (__v8si)
11679                                                      _mm256_undefined_si256 (),
11680                                                      (__mmask8) -1,
11681                                                      _MM_FROUND_CUR_DIRECTION);
11682 }
11683
11684 extern __inline __m256i
11685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11686 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11687 {
11688   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11689                                                      (__v8si) __W,
11690                                                      (__mmask8) __U,
11691                                                      _MM_FROUND_CUR_DIRECTION);
11692 }
11693
11694 extern __inline __m256i
11695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11696 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11697 {
11698   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11699                                                      (__v8si)
11700                                                      _mm256_setzero_si256 (),
11701                                                      (__mmask8) __U,
11702                                                      _MM_FROUND_CUR_DIRECTION);
11703 }
11704
11705 extern __inline __m256i
11706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707 _mm512_cvttpd_epu32 (__m512d __A)
11708 {
11709   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11710                                                       (__v8si)
11711                                                       _mm256_undefined_si256 (),
11712                                                       (__mmask8) -1,
11713                                                       _MM_FROUND_CUR_DIRECTION);
11714 }
11715
11716 extern __inline __m256i
11717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11719 {
11720   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11721                                                       (__v8si) __W,
11722                                                       (__mmask8) __U,
11723                                                       _MM_FROUND_CUR_DIRECTION);
11724 }
11725
11726 extern __inline __m256i
11727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11728 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11729 {
11730   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11731                                                       (__v8si)
11732                                                       _mm256_setzero_si256 (),
11733                                                       (__mmask8) __U,
11734                                                       _MM_FROUND_CUR_DIRECTION);
11735 }
11736
11737 extern __inline __m256i
11738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11739 _mm512_cvtpd_epi32 (__m512d __A)
11740 {
11741   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11742                                                     (__v8si)
11743                                                     _mm256_undefined_si256 (),
11744                                                     (__mmask8) -1,
11745                                                     _MM_FROUND_CUR_DIRECTION);
11746 }
11747
11748 extern __inline __m256i
11749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11751 {
11752   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11753                                                     (__v8si) __W,
11754                                                     (__mmask8) __U,
11755                                                     _MM_FROUND_CUR_DIRECTION);
11756 }
11757
11758 extern __inline __m256i
11759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11760 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11761 {
11762   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11763                                                     (__v8si)
11764                                                     _mm256_setzero_si256 (),
11765                                                     (__mmask8) __U,
11766                                                     _MM_FROUND_CUR_DIRECTION);
11767 }
11768
11769 extern __inline __m256i
11770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11771 _mm512_cvtpd_epu32 (__m512d __A)
11772 {
11773   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11774                                                      (__v8si)
11775                                                      _mm256_undefined_si256 (),
11776                                                      (__mmask8) -1,
11777                                                      _MM_FROUND_CUR_DIRECTION);
11778 }
11779
11780 extern __inline __m256i
11781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11782 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11783 {
11784   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11785                                                      (__v8si) __W,
11786                                                      (__mmask8) __U,
11787                                                      _MM_FROUND_CUR_DIRECTION);
11788 }
11789
11790 extern __inline __m256i
11791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11792 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11793 {
11794   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11795                                                      (__v8si)
11796                                                      _mm256_setzero_si256 (),
11797                                                      (__mmask8) __U,
11798                                                      _MM_FROUND_CUR_DIRECTION);
11799 }
11800
11801 extern __inline __m512i
11802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803 _mm512_cvttps_epi32 (__m512 __A)
11804 {
11805   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11806                                                      (__v16si)
11807                                                      _mm512_undefined_si512 (),
11808                                                      (__mmask16) -1,
11809                                                      _MM_FROUND_CUR_DIRECTION);
11810 }
11811
11812 extern __inline __m512i
11813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11815 {
11816   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11817                                                      (__v16si) __W,
11818                                                      (__mmask16) __U,
11819                                                      _MM_FROUND_CUR_DIRECTION);
11820 }
11821
11822 extern __inline __m512i
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11825 {
11826   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11827                                                      (__v16si)
11828                                                      _mm512_setzero_si512 (),
11829                                                      (__mmask16) __U,
11830                                                      _MM_FROUND_CUR_DIRECTION);
11831 }
11832
11833 extern __inline __m512i
11834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835 _mm512_cvttps_epu32 (__m512 __A)
11836 {
11837   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11838                                                       (__v16si)
11839                                                       _mm512_undefined_si512 (),
11840                                                       (__mmask16) -1,
11841                                                       _MM_FROUND_CUR_DIRECTION);
11842 }
11843
11844 extern __inline __m512i
11845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11847 {
11848   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11849                                                       (__v16si) __W,
11850                                                       (__mmask16) __U,
11851                                                       _MM_FROUND_CUR_DIRECTION);
11852 }
11853
11854 extern __inline __m512i
11855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11857 {
11858   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11859                                                       (__v16si)
11860                                                       _mm512_setzero_si512 (),
11861                                                       (__mmask16) __U,
11862                                                       _MM_FROUND_CUR_DIRECTION);
11863 }
11864
11865 extern __inline __m512i
11866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867 _mm512_cvtps_epi32 (__m512 __A)
11868 {
11869   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11870                                                     (__v16si)
11871                                                     _mm512_undefined_si512 (),
11872                                                     (__mmask16) -1,
11873                                                     _MM_FROUND_CUR_DIRECTION);
11874 }
11875
11876 extern __inline __m512i
11877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11879 {
11880   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11881                                                     (__v16si) __W,
11882                                                     (__mmask16) __U,
11883                                                     _MM_FROUND_CUR_DIRECTION);
11884 }
11885
11886 extern __inline __m512i
11887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11889 {
11890   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11891                                                     (__v16si)
11892                                                     _mm512_setzero_si512 (),
11893                                                     (__mmask16) __U,
11894                                                     _MM_FROUND_CUR_DIRECTION);
11895 }
11896
11897 extern __inline __m512i
11898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11899 _mm512_cvtps_epu32 (__m512 __A)
11900 {
11901   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11902                                                      (__v16si)
11903                                                      _mm512_undefined_si512 (),
11904                                                      (__mmask16) -1,
11905                                                      _MM_FROUND_CUR_DIRECTION);
11906 }
11907
11908 extern __inline __m512i
11909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11911 {
11912   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11913                                                      (__v16si) __W,
11914                                                      (__mmask16) __U,
11915                                                      _MM_FROUND_CUR_DIRECTION);
11916 }
11917
11918 extern __inline __m512i
11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11921 {
11922   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11923                                                      (__v16si)
11924                                                      _mm512_setzero_si512 (),
11925                                                      (__mmask16) __U,
11926                                                      _MM_FROUND_CUR_DIRECTION);
11927 }
11928
11929 #ifdef __x86_64__
11930 extern __inline __m128
11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11933 {
11934   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11935                                               _MM_FROUND_CUR_DIRECTION);
11936 }
11937
11938 extern __inline __m128d
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11941 {
11942   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11943                                                _MM_FROUND_CUR_DIRECTION);
11944 }
11945 #endif
11946
11947 extern __inline __m128
11948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11950 {
11951   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11952                                               _MM_FROUND_CUR_DIRECTION);
11953 }
11954
11955 extern __inline __m512
11956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11957 _mm512_cvtepi32_ps (__m512i __A)
11958 {
11959   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11960                                                    (__v16sf)
11961                                                    _mm512_undefined_ps (),
11962                                                    (__mmask16) -1,
11963                                                    _MM_FROUND_CUR_DIRECTION);
11964 }
11965
11966 extern __inline __m512
11967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11969 {
11970   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11971                                                    (__v16sf) __W,
11972                                                    (__mmask16) __U,
11973                                                    _MM_FROUND_CUR_DIRECTION);
11974 }
11975
11976 extern __inline __m512
11977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11979 {
11980   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11981                                                    (__v16sf)
11982                                                    _mm512_setzero_ps (),
11983                                                    (__mmask16) __U,
11984                                                    _MM_FROUND_CUR_DIRECTION);
11985 }
11986
11987 extern __inline __m512
11988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989 _mm512_cvtepu32_ps (__m512i __A)
11990 {
11991   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11992                                                     (__v16sf)
11993                                                     _mm512_undefined_ps (),
11994                                                     (__mmask16) -1,
11995                                                     _MM_FROUND_CUR_DIRECTION);
11996 }
11997
11998 extern __inline __m512
11999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12000 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12001 {
12002   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12003                                                     (__v16sf) __W,
12004                                                     (__mmask16) __U,
12005                                                     _MM_FROUND_CUR_DIRECTION);
12006 }
12007
12008 extern __inline __m512
12009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12011 {
12012   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12013                                                     (__v16sf)
12014                                                     _mm512_setzero_ps (),
12015                                                     (__mmask16) __U,
12016                                                     _MM_FROUND_CUR_DIRECTION);
12017 }
12018
12019 #ifdef __OPTIMIZE__
12020 extern __inline __m512d
12021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12022 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12023 {
12024   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12025                                                       (__v8df) __B,
12026                                                       (__v8di) __C,
12027                                                       __imm,
12028                                                       (__mmask8) -1,
12029                                                       _MM_FROUND_CUR_DIRECTION);
12030 }
12031
12032 extern __inline __m512d
12033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12035                          __m512i __C, const int __imm)
12036 {
12037   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12038                                                       (__v8df) __B,
12039                                                       (__v8di) __C,
12040                                                       __imm,
12041                                                       (__mmask8) __U,
12042                                                       _MM_FROUND_CUR_DIRECTION);
12043 }
12044
12045 extern __inline __m512d
12046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12047 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12048                           __m512i __C, const int __imm)
12049 {
12050   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12051                                                        (__v8df) __B,
12052                                                        (__v8di) __C,
12053                                                        __imm,
12054                                                        (__mmask8) __U,
12055                                                        _MM_FROUND_CUR_DIRECTION);
12056 }
12057
12058 extern __inline __m512
12059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12061 {
12062   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12063                                                      (__v16sf) __B,
12064                                                      (__v16si) __C,
12065                                                      __imm,
12066                                                      (__mmask16) -1,
12067                                                      _MM_FROUND_CUR_DIRECTION);
12068 }
12069
12070 extern __inline __m512
12071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12073                          __m512i __C, const int __imm)
12074 {
12075   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12076                                                      (__v16sf) __B,
12077                                                      (__v16si) __C,
12078                                                      __imm,
12079                                                      (__mmask16) __U,
12080                                                      _MM_FROUND_CUR_DIRECTION);
12081 }
12082
12083 extern __inline __m512
12084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12085 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12086                           __m512i __C, const int __imm)
12087 {
12088   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12089                                                       (__v16sf) __B,
12090                                                       (__v16si) __C,
12091                                                       __imm,
12092                                                       (__mmask16) __U,
12093                                                       _MM_FROUND_CUR_DIRECTION);
12094 }
12095
12096 extern __inline __m128d
12097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12099 {
12100   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12101                                                    (__v2df) __B,
12102                                                    (__v2di) __C, __imm,
12103                                                    (__mmask8) -1,
12104                                                    _MM_FROUND_CUR_DIRECTION);
12105 }
12106
12107 extern __inline __m128d
12108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12110                       __m128i __C, const int __imm)
12111 {
12112   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12113                                                    (__v2df) __B,
12114                                                    (__v2di) __C, __imm,
12115                                                    (__mmask8) __U,
12116                                                    _MM_FROUND_CUR_DIRECTION);
12117 }
12118
12119 extern __inline __m128d
12120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12121 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12122                        __m128i __C, const int __imm)
12123 {
12124   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12125                                                     (__v2df) __B,
12126                                                     (__v2di) __C,
12127                                                     __imm,
12128                                                     (__mmask8) __U,
12129                                                     _MM_FROUND_CUR_DIRECTION);
12130 }
12131
12132 extern __inline __m128
12133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12135 {
12136   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12137                                                   (__v4sf) __B,
12138                                                   (__v4si) __C, __imm,
12139                                                   (__mmask8) -1,
12140                                                   _MM_FROUND_CUR_DIRECTION);
12141 }
12142
12143 extern __inline __m128
12144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12146                       __m128i __C, const int __imm)
12147 {
12148   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12149                                                   (__v4sf) __B,
12150                                                   (__v4si) __C, __imm,
12151                                                   (__mmask8) __U,
12152                                                   _MM_FROUND_CUR_DIRECTION);
12153 }
12154
12155 extern __inline __m128
12156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12158                        __m128i __C, const int __imm)
12159 {
12160   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12161                                                    (__v4sf) __B,
12162                                                    (__v4si) __C, __imm,
12163                                                    (__mmask8) __U,
12164                                                    _MM_FROUND_CUR_DIRECTION);
12165 }
12166 #else
12167 #define _mm512_fixupimm_pd(X, Y, Z, C)                                  \
12168   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
12169       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12170       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12171
12172 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
12173   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
12174       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12175       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12176
12177 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
12178   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
12179       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12180       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12181
12182 #define _mm512_fixupimm_ps(X, Y, Z, C)                                  \
12183   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
12184     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12185     (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12186
12187 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
12188   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
12189     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12190     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12191
12192 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
12193   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
12194     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12195     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12196
12197 #define _mm_fixupimm_sd(X, Y, Z, C)                                     \
12198     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
12199       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
12200       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12201
12202 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C)                             \
12203     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
12204       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
12205       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12206
12207 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)                            \
12208     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),    \
12209       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
12210       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12211
12212 #define _mm_fixupimm_ss(X, Y, Z, C)                                     \
12213     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
12214       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
12215       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12216
12217 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C)                             \
12218     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
12219       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
12220       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12221
12222 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)                            \
12223     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),      \
12224       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
12225       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12226 #endif
12227
12228 #ifdef __x86_64__
12229 extern __inline unsigned long long
12230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12231 _mm_cvtss_u64 (__m128 __A)
12232 {
12233   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12234                                                            __A,
12235                                                            _MM_FROUND_CUR_DIRECTION);
12236 }
12237
12238 extern __inline unsigned long long
12239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12240 _mm_cvttss_u64 (__m128 __A)
12241 {
12242   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12243                                                             __A,
12244                                                             _MM_FROUND_CUR_DIRECTION);
12245 }
12246
12247 extern __inline long long
12248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12249 _mm_cvttss_i64 (__m128 __A)
12250 {
12251   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12252                                                   _MM_FROUND_CUR_DIRECTION);
12253 }
12254 #endif /* __x86_64__ */
12255
12256 extern __inline unsigned
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm_cvtss_u32 (__m128 __A)
12259 {
12260   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12261                                                  _MM_FROUND_CUR_DIRECTION);
12262 }
12263
12264 extern __inline unsigned
12265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12266 _mm_cvttss_u32 (__m128 __A)
12267 {
12268   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12269                                                   _MM_FROUND_CUR_DIRECTION);
12270 }
12271
12272 extern __inline int
12273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12274 _mm_cvttss_i32 (__m128 __A)
12275 {
12276   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12277                                             _MM_FROUND_CUR_DIRECTION);
12278 }
12279
12280 #ifdef __x86_64__
12281 extern __inline unsigned long long
12282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12283 _mm_cvtsd_u64 (__m128d __A)
12284 {
12285   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12286                                                            __A,
12287                                                            _MM_FROUND_CUR_DIRECTION);
12288 }
12289
12290 extern __inline unsigned long long
12291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292 _mm_cvttsd_u64 (__m128d __A)
12293 {
12294   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12295                                                             __A,
12296                                                             _MM_FROUND_CUR_DIRECTION);
12297 }
12298
12299 extern __inline long long
12300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12301 _mm_cvttsd_i64 (__m128d __A)
12302 {
12303   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12304                                                   _MM_FROUND_CUR_DIRECTION);
12305 }
12306 #endif /* __x86_64__ */
12307
12308 extern __inline unsigned
12309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310 _mm_cvtsd_u32 (__m128d __A)
12311 {
12312   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12313                                                  _MM_FROUND_CUR_DIRECTION);
12314 }
12315
12316 extern __inline unsigned
12317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318 _mm_cvttsd_u32 (__m128d __A)
12319 {
12320   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12321                                                   _MM_FROUND_CUR_DIRECTION);
12322 }
12323
12324 extern __inline int
12325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12326 _mm_cvttsd_i32 (__m128d __A)
12327 {
12328   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12329                                             _MM_FROUND_CUR_DIRECTION);
12330 }
12331
12332 extern __inline __m512d
12333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334 _mm512_cvtps_pd (__m256 __A)
12335 {
12336   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12337                                                     (__v8df)
12338                                                     _mm512_undefined_pd (),
12339                                                     (__mmask8) -1,
12340                                                     _MM_FROUND_CUR_DIRECTION);
12341 }
12342
12343 extern __inline __m512d
12344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12346 {
12347   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12348                                                     (__v8df) __W,
12349                                                     (__mmask8) __U,
12350                                                     _MM_FROUND_CUR_DIRECTION);
12351 }
12352
12353 extern __inline __m512d
12354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12356 {
12357   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12358                                                     (__v8df)
12359                                                     _mm512_setzero_pd (),
12360                                                     (__mmask8) __U,
12361                                                     _MM_FROUND_CUR_DIRECTION);
12362 }
12363
12364 extern __inline __m512
12365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366 _mm512_cvtph_ps (__m256i __A)
12367 {
12368   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12369                                                     (__v16sf)
12370                                                     _mm512_undefined_ps (),
12371                                                     (__mmask16) -1,
12372                                                     _MM_FROUND_CUR_DIRECTION);
12373 }
12374
12375 extern __inline __m512
12376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12378 {
12379   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12380                                                     (__v16sf) __W,
12381                                                     (__mmask16) __U,
12382                                                     _MM_FROUND_CUR_DIRECTION);
12383 }
12384
12385 extern __inline __m512
12386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12387 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12388 {
12389   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12390                                                     (__v16sf)
12391                                                     _mm512_setzero_ps (),
12392                                                     (__mmask16) __U,
12393                                                     _MM_FROUND_CUR_DIRECTION);
12394 }
12395
12396 extern __inline __m256
12397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12398 _mm512_cvtpd_ps (__m512d __A)
12399 {
12400   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12401                                                    (__v8sf)
12402                                                    _mm256_undefined_ps (),
12403                                                    (__mmask8) -1,
12404                                                    _MM_FROUND_CUR_DIRECTION);
12405 }
12406
12407 extern __inline __m256
12408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12410 {
12411   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12412                                                    (__v8sf) __W,
12413                                                    (__mmask8) __U,
12414                                                    _MM_FROUND_CUR_DIRECTION);
12415 }
12416
12417 extern __inline __m256
12418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12419 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12420 {
12421   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12422                                                    (__v8sf)
12423                                                    _mm256_setzero_ps (),
12424                                                    (__mmask8) __U,
12425                                                    _MM_FROUND_CUR_DIRECTION);
12426 }
12427
12428 #ifdef __OPTIMIZE__
12429 extern __inline __m512
12430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431 _mm512_getexp_ps (__m512 __A)
12432 {
12433   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12434                                                    (__v16sf)
12435                                                    _mm512_undefined_ps (),
12436                                                    (__mmask16) -1,
12437                                                    _MM_FROUND_CUR_DIRECTION);
12438 }
12439
12440 extern __inline __m512
12441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12442 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12443 {
12444   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12445                                                    (__v16sf) __W,
12446                                                    (__mmask16) __U,
12447                                                    _MM_FROUND_CUR_DIRECTION);
12448 }
12449
12450 extern __inline __m512
12451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12452 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12453 {
12454   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12455                                                    (__v16sf)
12456                                                    _mm512_setzero_ps (),
12457                                                    (__mmask16) __U,
12458                                                    _MM_FROUND_CUR_DIRECTION);
12459 }
12460
12461 extern __inline __m512d
12462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12463 _mm512_getexp_pd (__m512d __A)
12464 {
12465   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12466                                                     (__v8df)
12467                                                     _mm512_undefined_pd (),
12468                                                     (__mmask8) -1,
12469                                                     _MM_FROUND_CUR_DIRECTION);
12470 }
12471
12472 extern __inline __m512d
12473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12475 {
12476   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12477                                                     (__v8df) __W,
12478                                                     (__mmask8) __U,
12479                                                     _MM_FROUND_CUR_DIRECTION);
12480 }
12481
12482 extern __inline __m512d
12483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12484 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12485 {
12486   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12487                                                     (__v8df)
12488                                                     _mm512_setzero_pd (),
12489                                                     (__mmask8) __U,
12490                                                     _MM_FROUND_CUR_DIRECTION);
12491 }
12492
12493 extern __inline __m128
12494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12495 _mm_getexp_ss (__m128 __A, __m128 __B)
12496 {
12497   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12498                                                     (__v4sf) __B,
12499                                                     _MM_FROUND_CUR_DIRECTION);
12500 }
12501
12502 extern __inline __m128d
12503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12504 _mm_getexp_sd (__m128d __A, __m128d __B)
12505 {
12506   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12507                                                      (__v2df) __B,
12508                                                      _MM_FROUND_CUR_DIRECTION);
12509 }
12510
12511 extern __inline __m512d
12512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12513 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12514                    _MM_MANTISSA_SIGN_ENUM __C)
12515 {
12516   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12517                                                      (__C << 2) | __B,
12518                                                      _mm512_undefined_pd (),
12519                                                      (__mmask8) -1,
12520                                                      _MM_FROUND_CUR_DIRECTION);
12521 }
12522
12523 extern __inline __m512d
12524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12526                         _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12527 {
12528   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12529                                                      (__C << 2) | __B,
12530                                                      (__v8df) __W, __U,
12531                                                      _MM_FROUND_CUR_DIRECTION);
12532 }
12533
12534 extern __inline __m512d
12535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12536 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12537                          _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12538 {
12539   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12540                                                      (__C << 2) | __B,
12541                                                      (__v8df)
12542                                                      _mm512_setzero_pd (),
12543                                                      __U,
12544                                                      _MM_FROUND_CUR_DIRECTION);
12545 }
12546
12547 extern __inline __m512
12548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12549 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12550                    _MM_MANTISSA_SIGN_ENUM __C)
12551 {
12552   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12553                                                     (__C << 2) | __B,
12554                                                     _mm512_undefined_ps (),
12555                                                     (__mmask16) -1,
12556                                                     _MM_FROUND_CUR_DIRECTION);
12557 }
12558
12559 extern __inline __m512
12560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12561 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12562                         _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12563 {
12564   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12565                                                     (__C << 2) | __B,
12566                                                     (__v16sf) __W, __U,
12567                                                     _MM_FROUND_CUR_DIRECTION);
12568 }
12569
12570 extern __inline __m512
12571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12572 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12573                          _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12574 {
12575   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12576                                                     (__C << 2) | __B,
12577                                                     (__v16sf)
12578                                                     _mm512_setzero_ps (),
12579                                                     __U,
12580                                                     _MM_FROUND_CUR_DIRECTION);
12581 }
12582
12583 extern __inline __m128d
12584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12585 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12586                 _MM_MANTISSA_SIGN_ENUM __D)
12587 {
12588   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12589                                                    (__v2df) __B,
12590                                                    (__D << 2) | __C,
12591                                                    _MM_FROUND_CUR_DIRECTION);
12592 }
12593
12594 extern __inline __m128
12595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12596 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12597                 _MM_MANTISSA_SIGN_ENUM __D)
12598 {
12599   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12600                                                   (__v4sf) __B,
12601                                                   (__D << 2) | __C,
12602                                                   _MM_FROUND_CUR_DIRECTION);
12603 }
12604
12605 #else
12606 #define _mm512_getmant_pd(X, B, C)                                                  \
12607   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12608                                               (int)(((C)<<2) | (B)),                \
12609                                               (__v8df)_mm512_undefined_pd(),        \
12610                                               (__mmask8)-1,\
12611                                               _MM_FROUND_CUR_DIRECTION))
12612
12613 #define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
12614   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12615                                               (int)(((C)<<2) | (B)),                \
12616                                               (__v8df)(__m512d)(W),                 \
12617                                               (__mmask8)(U),\
12618                                               _MM_FROUND_CUR_DIRECTION))
12619
12620 #define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
12621   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12622                                               (int)(((C)<<2) | (B)),                \
12623                                               (__v8df)_mm512_setzero_pd(),          \
12624                                               (__mmask8)(U),\
12625                                               _MM_FROUND_CUR_DIRECTION))
12626 #define _mm512_getmant_ps(X, B, C)                                                  \
12627   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12628                                              (int)(((C)<<2) | (B)),                 \
12629                                              (__v16sf)_mm512_undefined_ps(),        \
12630                                              (__mmask16)-1,\
12631                                              _MM_FROUND_CUR_DIRECTION))
12632
12633 #define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
12634   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12635                                              (int)(((C)<<2) | (B)),                 \
12636                                              (__v16sf)(__m512)(W),                  \
12637                                              (__mmask16)(U),\
12638                                              _MM_FROUND_CUR_DIRECTION))
12639
12640 #define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
12641   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12642                                              (int)(((C)<<2) | (B)),                 \
12643                                              (__v16sf)_mm512_setzero_ps(),          \
12644                                              (__mmask16)(U),\
12645                                              _MM_FROUND_CUR_DIRECTION))
12646 #define _mm_getmant_sd(X, Y, C, D)                                                  \
12647   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
12648                                            (__v2df)(__m128d)(Y),                    \
12649                                            (int)(((D)<<2) | (C)),                   \
12650                                            _MM_FROUND_CUR_DIRECTION))
12651
12652 #define _mm_getmant_ss(X, Y, C, D)                                                  \
12653   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
12654                                           (__v4sf)(__m128)(Y),                      \
12655                                           (int)(((D)<<2) | (C)),                    \
12656                                           _MM_FROUND_CUR_DIRECTION))
12657
12658 #define _mm_getexp_ss(A, B)                                                   \
12659   ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
12660                                            _MM_FROUND_CUR_DIRECTION))
12661
12662 #define _mm_getexp_sd(A, B)                                                    \
12663   ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12664                                             _MM_FROUND_CUR_DIRECTION))
12665
12666 #define _mm512_getexp_ps(A)                                             \
12667   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
12668   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12669
12670 #define _mm512_mask_getexp_ps(W, U, A)                                  \
12671   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
12672   (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12673
12674 #define _mm512_maskz_getexp_ps(U, A)                                    \
12675   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
12676   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12677
12678 #define _mm512_getexp_pd(A)                                             \
12679   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
12680   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12681
12682 #define _mm512_mask_getexp_pd(W, U, A)                                  \
12683   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
12684   (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12685
12686 #define _mm512_maskz_getexp_pd(U, A)                                    \
12687   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
12688   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12689 #endif
12690
12691 #ifdef __OPTIMIZE__
12692 extern __inline __m512
12693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12694 _mm512_roundscale_ps (__m512 __A, const int __imm)
12695 {
12696   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12697                                                   (__v16sf)
12698                                                   _mm512_undefined_ps (),
12699                                                   -1,
12700                                                   _MM_FROUND_CUR_DIRECTION);
12701 }
12702
12703 extern __inline __m512
12704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12706                            const int __imm)
12707 {
12708   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12709                                                   (__v16sf) __A,
12710                                                   (__mmask16) __B,
12711                                                   _MM_FROUND_CUR_DIRECTION);
12712 }
12713
12714 extern __inline __m512
12715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12716 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12717 {
12718   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12719                                                   __imm,
12720                                                   (__v16sf)
12721                                                   _mm512_setzero_ps (),
12722                                                   (__mmask16) __A,
12723                                                   _MM_FROUND_CUR_DIRECTION);
12724 }
12725
12726 extern __inline __m512d
12727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12728 _mm512_roundscale_pd (__m512d __A, const int __imm)
12729 {
12730   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12731                                                    (__v8df)
12732                                                    _mm512_undefined_pd (),
12733                                                    -1,
12734                                                    _MM_FROUND_CUR_DIRECTION);
12735 }
12736
12737 extern __inline __m512d
12738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12739 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12740                            const int __imm)
12741 {
12742   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12743                                                    (__v8df) __A,
12744                                                    (__mmask8) __B,
12745                                                    _MM_FROUND_CUR_DIRECTION);
12746 }
12747
12748 extern __inline __m512d
12749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12750 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12751 {
12752   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12753                                                    __imm,
12754                                                    (__v8df)
12755                                                    _mm512_setzero_pd (),
12756                                                    (__mmask8) __A,
12757                                                    _MM_FROUND_CUR_DIRECTION);
12758 }
12759
12760 extern __inline __m128
12761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12763 {
12764   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12765                                                    (__v4sf) __B, __imm,
12766                                                    _MM_FROUND_CUR_DIRECTION);
12767 }
12768
12769 extern __inline __m128d
12770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12771 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12772 {
12773   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12774                                                     (__v2df) __B, __imm,
12775                                                    _MM_FROUND_CUR_DIRECTION);
12776 }
12777
12778 #else
12779 #define _mm512_roundscale_ps(A, B) \
12780   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12781     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12782 #define _mm512_mask_roundscale_ps(A, B, C, D)                           \
12783   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),       \
12784                                             (int)(D),                   \
12785                                             (__v16sf)(__m512)(A),       \
12786                                             (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12787 #define _mm512_maskz_roundscale_ps(A, B, C)                             \
12788   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),       \
12789                                             (int)(C),                   \
12790                                             (__v16sf)_mm512_setzero_ps(),\
12791                                             (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12792 #define _mm512_roundscale_pd(A, B) \
12793   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12794     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12795 #define _mm512_mask_roundscale_pd(A, B, C, D)                           \
12796   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),      \
12797                                              (int)(D),                  \
12798                                              (__v8df)(__m512d)(A),      \
12799                                              (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12800 #define _mm512_maskz_roundscale_pd(A, B, C)                             \
12801   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),      \
12802                                              (int)(C),                  \
12803                                              (__v8df)_mm512_setzero_pd(),\
12804                                              (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12805 #define _mm_roundscale_ss(A, B, C)                                      \
12806   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),       \
12807   (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12808 #define _mm_roundscale_sd(A, B, C)                                      \
12809   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),     \
12810     (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12811 #endif
12812
12813 #ifdef __OPTIMIZE__
12814 extern __inline __mmask8
12815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12817 {
12818   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12819                                                   (__v8df) __Y, __P,
12820                                                   (__mmask8) -1,
12821                                                   _MM_FROUND_CUR_DIRECTION);
12822 }
12823
12824 extern __inline __mmask16
12825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12826 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12827 {
12828   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12829                                                    (__v16sf) __Y, __P,
12830                                                    (__mmask16) -1,
12831                                                    _MM_FROUND_CUR_DIRECTION);
12832 }
12833
12834 extern __inline __mmask16
12835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12836 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12837 {
12838   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12839                                                    (__v16sf) __Y, __P,
12840                                                    (__mmask16) __U,
12841                                                    _MM_FROUND_CUR_DIRECTION);
12842 }
12843
12844 extern __inline __mmask8
12845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12847 {
12848   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12849                                                   (__v8df) __Y, __P,
12850                                                   (__mmask8) __U,
12851                                                   _MM_FROUND_CUR_DIRECTION);
12852 }
12853
12854 extern __inline __mmask8
12855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12856 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12857 {
12858   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12859                                                (__v2df) __Y, __P,
12860                                                (__mmask8) -1,
12861                                                _MM_FROUND_CUR_DIRECTION);
12862 }
12863
12864 extern __inline __mmask8
12865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12866 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12867 {
12868   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12869                                                (__v2df) __Y, __P,
12870                                                (__mmask8) __M,
12871                                                _MM_FROUND_CUR_DIRECTION);
12872 }
12873
12874 extern __inline __mmask8
12875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12876 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12877 {
12878   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12879                                                (__v4sf) __Y, __P,
12880                                                (__mmask8) -1,
12881                                                _MM_FROUND_CUR_DIRECTION);
12882 }
12883
12884 extern __inline __mmask8
12885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12886 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12887 {
12888   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12889                                                (__v4sf) __Y, __P,
12890                                                (__mmask8) __M,
12891                                                _MM_FROUND_CUR_DIRECTION);
12892 }
12893
12894 #else
12895 #define _mm512_cmp_pd_mask(X, Y, P)                                     \
12896   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
12897                                             (__v8df)(__m512d)(Y), (int)(P),\
12898                                             (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12899
12900 #define _mm512_cmp_ps_mask(X, Y, P)                                     \
12901   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
12902                                              (__v16sf)(__m512)(Y), (int)(P),\
12903                                              (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12904
12905 #define _mm512_mask_cmp_pd_mask(M, X, Y, P)                                     \
12906   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
12907                                             (__v8df)(__m512d)(Y), (int)(P),\
12908                                             (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12909
12910 #define _mm512_mask_cmp_ps_mask(M, X, Y, P)                                     \
12911   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
12912                                              (__v16sf)(__m512)(Y), (int)(P),\
12913                                              (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12914
12915 #define _mm_cmp_sd_mask(X, Y, P)                                        \
12916   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
12917                                          (__v2df)(__m128d)(Y), (int)(P),\
12918                                          (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12919
12920 #define _mm_mask_cmp_sd_mask(M, X, Y, P)                                        \
12921   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
12922                                          (__v2df)(__m128d)(Y), (int)(P),\
12923                                          M,_MM_FROUND_CUR_DIRECTION))
12924
12925 #define _mm_cmp_ss_mask(X, Y, P)                                        \
12926   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
12927                                          (__v4sf)(__m128)(Y), (int)(P), \
12928                                          (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12929
12930 #define _mm_mask_cmp_ss_mask(M, X, Y, P)                                        \
12931   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
12932                                          (__v4sf)(__m128)(Y), (int)(P), \
12933                                          M,_MM_FROUND_CUR_DIRECTION))
12934 #endif
12935
12936 extern __inline __mmask16
12937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12938 _mm512_kmov (__mmask16 __A)
12939 {
12940   return __builtin_ia32_kmov16 (__A);
12941 }
12942
12943 extern __inline __m512
12944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12945 _mm512_castpd_ps (__m512d __A)
12946 {
12947   return (__m512) (__A);
12948 }
12949
12950 extern __inline __m512i
12951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12952 _mm512_castpd_si512 (__m512d __A)
12953 {
12954   return (__m512i) (__A);
12955 }
12956
12957 extern __inline __m512d
12958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12959 _mm512_castps_pd (__m512 __A)
12960 {
12961   return (__m512d) (__A);
12962 }
12963
12964 extern __inline __m512i
12965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12966 _mm512_castps_si512 (__m512 __A)
12967 {
12968   return (__m512i) (__A);
12969 }
12970
12971 extern __inline __m512
12972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12973 _mm512_castsi512_ps (__m512i __A)
12974 {
12975   return (__m512) (__A);
12976 }
12977
12978 extern __inline __m512d
12979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12980 _mm512_castsi512_pd (__m512i __A)
12981 {
12982   return (__m512d) (__A);
12983 }
12984
12985 extern __inline __m128d
12986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12987 _mm512_castpd512_pd128 (__m512d __A)
12988 {
12989   return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12990 }
12991
12992 extern __inline __m128
12993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12994 _mm512_castps512_ps128 (__m512 __A)
12995 {
12996   return _mm512_extractf32x4_ps(__A, 0);
12997 }
12998
12999 extern __inline __m128i
13000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13001 _mm512_castsi512_si128 (__m512i __A)
13002 {
13003   return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13004 }
13005
13006 extern __inline __m256d
13007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13008 _mm512_castpd512_pd256 (__m512d __A)
13009 {
13010   return _mm512_extractf64x4_pd(__A, 0);
13011 }
13012
13013 extern __inline __m256
13014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13015 _mm512_castps512_ps256 (__m512 __A)
13016 {
13017   return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13018 }
13019
13020 extern __inline __m256i
13021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13022 _mm512_castsi512_si256 (__m512i __A)
13023 {
13024   return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13025 }
13026
13027 extern __inline __m512d
13028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13029 _mm512_castpd128_pd512 (__m128d __A)
13030 {
13031   return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13032 }
13033
13034 extern __inline __m512
13035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13036 _mm512_castps128_ps512 (__m128 __A)
13037 {
13038   return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13039 }
13040
13041 extern __inline __m512i
13042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13043 _mm512_castsi128_si512 (__m128i __A)
13044 {
13045   return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13046 }
13047
13048 extern __inline __m512d
13049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13050 _mm512_castpd256_pd512 (__m256d __A)
13051 {
13052   return __builtin_ia32_pd512_256pd (__A);
13053 }
13054
13055 extern __inline __m512
13056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13057 _mm512_castps256_ps512 (__m256 __A)
13058 {
13059   return __builtin_ia32_ps512_256ps (__A);
13060 }
13061
13062 extern __inline __m512i
13063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13064 _mm512_castsi256_si512 (__m256i __A)
13065 {
13066   return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13067 }
13068
13069 extern __inline __mmask16
13070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13071 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13072 {
13073   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13074                                                      (__v16si) __B, 0,
13075                                                      (__mmask16) -1);
13076 }
13077
13078 extern __inline __mmask16
13079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13080 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13081 {
13082   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13083                                                      (__v16si) __B, 0, __U);
13084 }
13085
13086 extern __inline __mmask8
13087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13088 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13089 {
13090   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13091                                                     (__v8di) __B, 0, __U);
13092 }
13093
13094 extern __inline __mmask8
13095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13096 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13097 {
13098   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13099                                                     (__v8di) __B, 0,
13100                                                     (__mmask8) -1);
13101 }
13102
13103 extern __inline __mmask16
13104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13105 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13106 {
13107   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13108                                                      (__v16si) __B, 6,
13109                                                      (__mmask16) -1);
13110 }
13111
13112 extern __inline __mmask16
13113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13115 {
13116   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13117                                                      (__v16si) __B, 6,  __U);
13118 }
13119
13120 extern __inline __mmask8
13121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13122 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13123 {
13124   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13125                                                     (__v8di) __B, 6, __U);
13126 }
13127
13128 extern __inline __mmask8
13129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13130 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13131 {
13132   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13133                                                     (__v8di) __B, 6,
13134                                                     (__mmask8) -1);
13135 }
13136
13137 #ifdef __DISABLE_AVX512F__
13138 #undef __DISABLE_AVX512F__
13139 #pragma GCC pop_options
13140 #endif /* __DISABLE_AVX512F__ */
13141
13142 #endif /* _AVX512FINTRIN_H_INCLUDED */