ba65acadf8ddad9d6eea786cb37608a878d71f02
[dragonfly.git] / contrib / gcc-8.0 / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics.  */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50    vector types, and their scalar components.  */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 /* Unaligned version of the same type.  */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
60 typedef unsigned char  __mmask8;
61 typedef unsigned short __mmask16;
62
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
66 {
67   return (__mmask16) __M;
68 }
69
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
73 {
74   return (int) __M;
75 }
76
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80                   long long __D, long long __E, long long __F,
81                   long long __G, long long __H)
82 {
83   return __extension__ (__m512i) (__v8di)
84          { __H, __G, __F, __E, __D, __C, __B, __A };
85 }
86
87 /* Create the vector [A B C D E F G H I J K L M N O P].  */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91                   int __E, int __F, int __G, int __H,
92                   int __I, int __J, int __K, int __L,
93                   int __M, int __N, int __O, int __P)
94 {
95   return __extension__ (__m512i)(__v16si)
96          { __P, __O, __N, __M, __L, __K, __J, __I,
97            __H, __G, __F, __E, __D, __C, __B, __A };
98 }
99
100 extern __inline __m512d
101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_pd (double __A, double __B, double __C, double __D,
103                double __E, double __F, double __G, double __H)
104 {
105   return __extension__ (__m512d)
106          { __H, __G, __F, __E, __D, __C, __B, __A };
107 }
108
109 extern __inline __m512
110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111 _mm512_set_ps (float __A, float __B, float __C, float __D,
112                float __E, float __F, float __G, float __H,
113                float __I, float __J, float __K, float __L,
114                float __M, float __N, float __O, float __P)
115 {
116   return __extension__ (__m512)
117          { __P, __O, __N, __M, __L, __K, __J, __I,
118            __H, __G, __F, __E, __D, __C, __B, __A };
119 }
120
121 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)                            \
122   _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
123
124 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,                            \
125                           e8,e9,e10,e11,e12,e13,e14,e15)                      \
126   _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
127
128 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)                               \
129   _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
130
131 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132   _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
133
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_undefined_ps (void)
137 {
138   __m512 __Y = __Y;
139   return __Y;
140 }
141
142 #define _mm512_undefined _mm512_undefined_ps
143
144 extern __inline __m512d
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_undefined_pd (void)
147 {
148   __m512d __Y = __Y;
149   return __Y;
150 }
151
152 extern __inline __m512i
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm512_undefined_epi32 (void)
155 {
156   __m512i __Y = __Y;
157   return __Y;
158 }
159
160 #define _mm512_undefined_si512 _mm512_undefined_epi32
161
162 extern __inline __m512i
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm512_set1_epi8 (char __A)
165 {
166   return __extension__ (__m512i)(__v64qi)
167          { __A, __A, __A, __A, __A, __A, __A, __A,
168            __A, __A, __A, __A, __A, __A, __A, __A,
169            __A, __A, __A, __A, __A, __A, __A, __A,
170            __A, __A, __A, __A, __A, __A, __A, __A,
171            __A, __A, __A, __A, __A, __A, __A, __A,
172            __A, __A, __A, __A, __A, __A, __A, __A,
173            __A, __A, __A, __A, __A, __A, __A, __A,
174            __A, __A, __A, __A, __A, __A, __A, __A };
175 }
176
177 extern __inline __m512i
178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179 _mm512_set1_epi16 (short __A)
180 {
181   return __extension__ (__m512i)(__v32hi)
182          { __A, __A, __A, __A, __A, __A, __A, __A,
183            __A, __A, __A, __A, __A, __A, __A, __A,
184            __A, __A, __A, __A, __A, __A, __A, __A,
185            __A, __A, __A, __A, __A, __A, __A, __A };
186 }
187
188 extern __inline __m512d
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set1_pd (double __A)
191 {
192   return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193                                                   (__v2df) { __A, },
194                                                   (__v8df)
195                                                   _mm512_undefined_pd (),
196                                                   (__mmask8) -1);
197 }
198
199 extern __inline __m512
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set1_ps (float __A)
202 {
203   return (__m512) __builtin_ia32_broadcastss512 (__extension__
204                                                  (__v4sf) { __A, },
205                                                  (__v16sf)
206                                                  _mm512_undefined_ps (),
207                                                  (__mmask16) -1);
208 }
209
210 /* Create the vector [A B C D A B C D A B C D A B C D].  */
211 extern __inline __m512i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
214 {
215   return __extension__ (__m512i)(__v16si)
216          { __D, __C, __B, __A, __D, __C, __B, __A,
217            __D, __C, __B, __A, __D, __C, __B, __A };
218 }
219
220 extern __inline __m512i
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
223                    long long __D)
224 {
225   return __extension__ (__m512i) (__v8di)
226          { __D, __C, __B, __A, __D, __C, __B, __A };
227 }
228
229 extern __inline __m512d
230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231 _mm512_set4_pd (double __A, double __B, double __C, double __D)
232 {
233   return __extension__ (__m512d)
234          { __D, __C, __B, __A, __D, __C, __B, __A };
235 }
236
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_set4_ps (float __A, float __B, float __C, float __D)
240 {
241   return __extension__ (__m512)
242          { __D, __C, __B, __A, __D, __C, __B, __A,
243            __D, __C, __B, __A, __D, __C, __B, __A };
244 }
245
246 #define _mm512_setr4_epi64(e0,e1,e2,e3)                                       \
247   _mm512_set4_epi64(e3,e2,e1,e0)
248
249 #define _mm512_setr4_epi32(e0,e1,e2,e3)                                       \
250   _mm512_set4_epi32(e3,e2,e1,e0)
251
252 #define _mm512_setr4_pd(e0,e1,e2,e3)                                          \
253   _mm512_set4_pd(e3,e2,e1,e0)
254
255 #define _mm512_setr4_ps(e0,e1,e2,e3)                                          \
256   _mm512_set4_ps(e3,e2,e1,e0)
257
258 extern __inline __m512
259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260 _mm512_setzero_ps (void)
261 {
262   return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
264 }
265
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_setzero_pd (void)
269 {
270   return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
271 }
272
273 extern __inline __m512i
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_setzero_epi32 (void)
276 {
277   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
278 }
279
280 extern __inline __m512i
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_setzero_si512 (void)
283 {
284   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
285 }
286
287 extern __inline __m512d
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
290 {
291   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292                                                   (__v8df) __W,
293                                                   (__mmask8) __U);
294 }
295
296 extern __inline __m512d
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
299 {
300   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301                                                   (__v8df)
302                                                   _mm512_setzero_pd (),
303                                                   (__mmask8) __U);
304 }
305
306 extern __inline __m512
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
309 {
310   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311                                                  (__v16sf) __W,
312                                                  (__mmask16) __U);
313 }
314
315 extern __inline __m512
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
318 {
319   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320                                                  (__v16sf)
321                                                  _mm512_setzero_ps (),
322                                                  (__mmask16) __U);
323 }
324
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_load_pd (void const *__P)
328 {
329   return *(__m512d *) __P;
330 }
331
332 extern __inline __m512d
333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
335 {
336   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337                                                    (__v8df) __W,
338                                                    (__mmask8) __U);
339 }
340
341 extern __inline __m512d
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
344 {
345   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346                                                    (__v8df)
347                                                    _mm512_setzero_pd (),
348                                                    (__mmask8) __U);
349 }
350
351 extern __inline void
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 _mm512_store_pd (void *__P, __m512d __A)
354 {
355   *(__m512d *) __P = __A;
356 }
357
358 extern __inline void
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
361 {
362   __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363                                    (__mmask8) __U);
364 }
365
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_load_ps (void const *__P)
369 {
370   return *(__m512 *) __P;
371 }
372
373 extern __inline __m512
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
376 {
377   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378                                                   (__v16sf) __W,
379                                                   (__mmask16) __U);
380 }
381
382 extern __inline __m512
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
385 {
386   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387                                                   (__v16sf)
388                                                   _mm512_setzero_ps (),
389                                                   (__mmask16) __U);
390 }
391
392 extern __inline void
393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394 _mm512_store_ps (void *__P, __m512 __A)
395 {
396   *(__m512 *) __P = __A;
397 }
398
399 extern __inline void
400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
402 {
403   __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404                                    (__mmask16) __U);
405 }
406
407 extern __inline __m512i
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
410 {
411   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412                                                      (__v8di) __W,
413                                                      (__mmask8) __U);
414 }
415
416 extern __inline __m512i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
419 {
420   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421                                                      (__v8di)
422                                                      _mm512_setzero_si512 (),
423                                                      (__mmask8) __U);
424 }
425
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_load_epi64 (void const *__P)
429 {
430   return *(__m512i *) __P;
431 }
432
433 extern __inline __m512i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
436 {
437   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438                                                         (__v8di) __W,
439                                                         (__mmask8) __U);
440 }
441
442 extern __inline __m512i
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
445 {
446   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447                                                         (__v8di)
448                                                         _mm512_setzero_si512 (),
449                                                         (__mmask8) __U);
450 }
451
452 extern __inline void
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm512_store_epi64 (void *__P, __m512i __A)
455 {
456   *(__m512i *) __P = __A;
457 }
458
459 extern __inline void
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
462 {
463   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464                                         (__mmask8) __U);
465 }
466
467 extern __inline __m512i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
470 {
471   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472                                                      (__v16si) __W,
473                                                      (__mmask16) __U);
474 }
475
476 extern __inline __m512i
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
479 {
480   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481                                                      (__v16si)
482                                                      _mm512_setzero_si512 (),
483                                                      (__mmask16) __U);
484 }
485
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_load_si512 (void const *__P)
489 {
490   return *(__m512i *) __P;
491 }
492
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_load_epi32 (void const *__P)
496 {
497   return *(__m512i *) __P;
498 }
499
500 extern __inline __m512i
501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
503 {
504   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505                                                         (__v16si) __W,
506                                                         (__mmask16) __U);
507 }
508
509 extern __inline __m512i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
512 {
513   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514                                                         (__v16si)
515                                                         _mm512_setzero_si512 (),
516                                                         (__mmask16) __U);
517 }
518
519 extern __inline void
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm512_store_si512 (void *__P, __m512i __A)
522 {
523   *(__m512i *) __P = __A;
524 }
525
526 extern __inline void
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm512_store_epi32 (void *__P, __m512i __A)
529 {
530   *(__m512i *) __P = __A;
531 }
532
533 extern __inline void
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
536 {
537   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538                                         (__mmask16) __U);
539 }
540
541 extern __inline __m512i
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
544 {
545   return (__m512i) ((__v16su) __A * (__v16su) __B);
546 }
547
548 extern __inline __m512i
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
551 {
552   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553                                                   (__v16si) __B,
554                                                   (__v16si)
555                                                   _mm512_setzero_si512 (),
556                                                   __M);
557 }
558
559 extern __inline __m512i
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
562 {
563   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564                                                   (__v16si) __B,
565                                                   (__v16si) __W, __M);
566 }
567
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
571 {
572   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573                                                   (__v16si) __Y,
574                                                   (__v16si)
575                                                   _mm512_undefined_epi32 (),
576                                                   (__mmask16) -1);
577 }
578
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
582 {
583   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584                                                   (__v16si) __Y,
585                                                   (__v16si) __W,
586                                                   (__mmask16) __U);
587 }
588
589 extern __inline __m512i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
592 {
593   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594                                                   (__v16si) __Y,
595                                                   (__v16si)
596                                                   _mm512_setzero_si512 (),
597                                                   (__mmask16) __U);
598 }
599
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
603 {
604   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605                                                   (__v16si) __Y,
606                                                   (__v16si)
607                                                   _mm512_undefined_epi32 (),
608                                                   (__mmask16) -1);
609 }
610
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
614 {
615   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616                                                   (__v16si) __Y,
617                                                   (__v16si) __W,
618                                                   (__mmask16) __U);
619 }
620
621 extern __inline __m512i
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
624 {
625   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626                                                   (__v16si) __Y,
627                                                   (__v16si)
628                                                   _mm512_setzero_si512 (),
629                                                   (__mmask16) __U);
630 }
631
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
635 {
636   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637                                                   (__v16si) __Y,
638                                                   (__v16si)
639                                                   _mm512_undefined_epi32 (),
640                                                   (__mmask16) -1);
641 }
642
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
646 {
647   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648                                                   (__v16si) __Y,
649                                                   (__v16si) __W,
650                                                   (__mmask16) __U);
651 }
652
653 extern __inline __m512i
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
656 {
657   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658                                                   (__v16si) __Y,
659                                                   (__v16si)
660                                                   _mm512_setzero_si512 (),
661                                                   (__mmask16) __U);
662 }
663
664 extern __inline __m512i
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm512_add_epi64 (__m512i __A, __m512i __B)
667 {
668   return (__m512i) ((__v8du) __A + (__v8du) __B);
669 }
670
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
674 {
675   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676                                                  (__v8di) __B,
677                                                  (__v8di) __W,
678                                                  (__mmask8) __U);
679 }
680
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
684 {
685   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686                                                  (__v8di) __B,
687                                                  (__v8di)
688                                                  _mm512_setzero_si512 (),
689                                                  (__mmask8) __U);
690 }
691
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_sub_epi64 (__m512i __A, __m512i __B)
695 {
696   return (__m512i) ((__v8du) __A - (__v8du) __B);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702 {
703   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704                                                  (__v8di) __B,
705                                                  (__v8di) __W,
706                                                  (__mmask8) __U);
707 }
708
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
712 {
713   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714                                                  (__v8di) __B,
715                                                  (__v8di)
716                                                  _mm512_setzero_si512 (),
717                                                  (__mmask8) __U);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
723 {
724   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725                                                  (__v8di) __Y,
726                                                  (__v8di)
727                                                  _mm512_undefined_pd (),
728                                                  (__mmask8) -1);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
734 {
735   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736                                                  (__v8di) __Y,
737                                                  (__v8di) __W,
738                                                  (__mmask8) __U);
739 }
740
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
744 {
745   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746                                                  (__v8di) __Y,
747                                                  (__v8di)
748                                                  _mm512_setzero_si512 (),
749                                                  (__mmask8) __U);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
755 {
756   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757                                                  (__v8di) __Y,
758                                                  (__v8di)
759                                                  _mm512_undefined_epi32 (),
760                                                  (__mmask8) -1);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
766 {
767   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768                                                  (__v8di) __Y,
769                                                  (__v8di) __W,
770                                                  (__mmask8) __U);
771 }
772
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
776 {
777   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778                                                  (__v8di) __Y,
779                                                  (__v8di)
780                                                  _mm512_setzero_si512 (),
781                                                  (__mmask8) __U);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
787 {
788   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789                                                  (__v8di) __Y,
790                                                  (__v8di)
791                                                  _mm512_undefined_epi32 (),
792                                                  (__mmask8) -1);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
798 {
799   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800                                                  (__v8di) __Y,
801                                                  (__v8di) __W,
802                                                  (__mmask8) __U);
803 }
804
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
808 {
809   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810                                                  (__v8di) __Y,
811                                                  (__v8di)
812                                                  _mm512_setzero_si512 (),
813                                                  (__mmask8) __U);
814 }
815
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_add_epi32 (__m512i __A, __m512i __B)
819 {
820   return (__m512i) ((__v16su) __A + (__v16su) __B);
821 }
822
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
826 {
827   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828                                                  (__v16si) __B,
829                                                  (__v16si) __W,
830                                                  (__mmask16) __U);
831 }
832
833 extern __inline __m512i
834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
836 {
837   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838                                                  (__v16si) __B,
839                                                  (__v16si)
840                                                  _mm512_setzero_si512 (),
841                                                  (__mmask16) __U);
842 }
843
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
847 {
848   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849                                                   (__v16si) __Y,
850                                                   (__v8di)
851                                                   _mm512_undefined_epi32 (),
852                                                   (__mmask8) -1);
853 }
854
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
858 {
859   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860                                                   (__v16si) __Y,
861                                                   (__v8di) __W, __M);
862 }
863
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
867 {
868   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869                                                   (__v16si) __Y,
870                                                   (__v8di)
871                                                   _mm512_setzero_si512 (),
872                                                   __M);
873 }
874
875 extern __inline __m512i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm512_sub_epi32 (__m512i __A, __m512i __B)
878 {
879   return (__m512i) ((__v16su) __A - (__v16su) __B);
880 }
881
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
885 {
886   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887                                                  (__v16si) __B,
888                                                  (__v16si) __W,
889                                                  (__mmask16) __U);
890 }
891
892 extern __inline __m512i
893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
895 {
896   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897                                                  (__v16si) __B,
898                                                  (__v16si)
899                                                  _mm512_setzero_si512 (),
900                                                  (__mmask16) __U);
901 }
902
903 extern __inline __m512i
904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
906 {
907   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908                                                    (__v16si) __Y,
909                                                    (__v8di)
910                                                    _mm512_undefined_epi32 (),
911                                                    (__mmask8) -1);
912 }
913
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
917 {
918   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919                                                    (__v16si) __Y,
920                                                    (__v8di) __W, __M);
921 }
922
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
926 {
927   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928                                                    (__v16si) __Y,
929                                                    (__v8di)
930                                                    _mm512_setzero_si512 (),
931                                                    __M);
932 }
933
934 #ifdef __OPTIMIZE__
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
938 {
939   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940                                                   (__v8di)
941                                                   _mm512_undefined_epi32 (),
942                                                   (__mmask8) -1);
943 }
944
945 extern __inline __m512i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948                         unsigned int __B)
949 {
950   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951                                                   (__v8di) __W,
952                                                   (__mmask8) __U);
953 }
954
955 extern __inline __m512i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
958 {
959   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960                                                   (__v8di)
961                                                   _mm512_setzero_si512 (),
962                                                   (__mmask8) __U);
963 }
964 #else
965 #define _mm512_slli_epi64(X, C)                                            \
966   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
967     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
968     (__mmask8)-1))
969
970 #define _mm512_mask_slli_epi64(W, U, X, C)                                 \
971   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972     (__v8di)(__m512i)(W),\
973     (__mmask8)(U)))
974
975 #define _mm512_maskz_slli_epi64(U, X, C)                                   \
976   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977     (__v8di)(__m512i)_mm512_setzero_si512 (),\
978     (__mmask8)(U)))
979 #endif
980
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_sll_epi64 (__m512i __A, __m128i __B)
984 {
985   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986                                                  (__v2di) __B,
987                                                  (__v8di)
988                                                  _mm512_undefined_epi32 (),
989                                                  (__mmask8) -1);
990 }
991
992 extern __inline __m512i
993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
995 {
996   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997                                                  (__v2di) __B,
998                                                  (__v8di) __W,
999                                                  (__mmask8) __U);
1000 }
1001
1002 extern __inline __m512i
1003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1005 {
1006   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007                                                  (__v2di) __B,
1008                                                  (__v8di)
1009                                                  _mm512_setzero_si512 (),
1010                                                  (__mmask8) __U);
1011 }
1012
1013 #ifdef __OPTIMIZE__
1014 extern __inline __m512i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1017 {
1018   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019                                                   (__v8di)
1020                                                   _mm512_undefined_epi32 (),
1021                                                   (__mmask8) -1);
1022 }
1023
1024 extern __inline __m512i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027                         __m512i __A, unsigned int __B)
1028 {
1029   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030                                                   (__v8di) __W,
1031                                                   (__mmask8) __U);
1032 }
1033
1034 extern __inline __m512i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1037 {
1038   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039                                                   (__v8di)
1040                                                   _mm512_setzero_si512 (),
1041                                                   (__mmask8) __U);
1042 }
1043 #else
1044 #define _mm512_srli_epi64(X, C)                                            \
1045   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1046     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1047     (__mmask8)-1))
1048
1049 #define _mm512_mask_srli_epi64(W, U, X, C)                                 \
1050   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051     (__v8di)(__m512i)(W),\
1052     (__mmask8)(U)))
1053
1054 #define _mm512_maskz_srli_epi64(U, X, C)                                   \
1055   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056     (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057     (__mmask8)(U)))
1058 #endif
1059
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1063 {
1064   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065                                                  (__v2di) __B,
1066                                                  (__v8di)
1067                                                  _mm512_undefined_epi32 (),
1068                                                  (__mmask8) -1);
1069 }
1070
1071 extern __inline __m512i
1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1074 {
1075   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076                                                  (__v2di) __B,
1077                                                  (__v8di) __W,
1078                                                  (__mmask8) __U);
1079 }
1080
1081 extern __inline __m512i
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1084 {
1085   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086                                                  (__v2di) __B,
1087                                                  (__v8di)
1088                                                  _mm512_setzero_si512 (),
1089                                                  (__mmask8) __U);
1090 }
1091
1092 #ifdef __OPTIMIZE__
1093 extern __inline __m512i
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1096 {
1097   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098                                                   (__v8di)
1099                                                   _mm512_undefined_epi32 (),
1100                                                   (__mmask8) -1);
1101 }
1102
1103 extern __inline __m512i
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106                         unsigned int __B)
1107 {
1108   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109                                                   (__v8di) __W,
1110                                                   (__mmask8) __U);
1111 }
1112
1113 extern __inline __m512i
1114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1116 {
1117   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118                                                   (__v8di)
1119                                                   _mm512_setzero_si512 (),
1120                                                   (__mmask8) __U);
1121 }
1122 #else
1123 #define _mm512_srai_epi64(X, C)                                            \
1124   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1125     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1126     (__mmask8)-1))
1127
1128 #define _mm512_mask_srai_epi64(W, U, X, C)                                 \
1129   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130     (__v8di)(__m512i)(W),\
1131     (__mmask8)(U)))
1132
1133 #define _mm512_maskz_srai_epi64(U, X, C)                                   \
1134   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135     (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136     (__mmask8)(U)))
1137 #endif
1138
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1142 {
1143   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144                                                  (__v2di) __B,
1145                                                  (__v8di)
1146                                                  _mm512_undefined_epi32 (),
1147                                                  (__mmask8) -1);
1148 }
1149
1150 extern __inline __m512i
1151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1153 {
1154   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155                                                  (__v2di) __B,
1156                                                  (__v8di) __W,
1157                                                  (__mmask8) __U);
1158 }
1159
1160 extern __inline __m512i
1161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1163 {
1164   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165                                                  (__v2di) __B,
1166                                                  (__v8di)
1167                                                  _mm512_setzero_si512 (),
1168                                                  (__mmask8) __U);
1169 }
1170
1171 #ifdef __OPTIMIZE__
1172 extern __inline __m512i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1175 {
1176   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177                                                   (__v16si)
1178                                                   _mm512_undefined_epi32 (),
1179                                                   (__mmask16) -1);
1180 }
1181
1182 extern __inline __m512i
1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185                         unsigned int __B)
1186 {
1187   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188                                                   (__v16si) __W,
1189                                                   (__mmask16) __U);
1190 }
1191
1192 extern __inline __m512i
1193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1195 {
1196   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197                                                   (__v16si)
1198                                                   _mm512_setzero_si512 (),
1199                                                   (__mmask16) __U);
1200 }
1201 #else
1202 #define _mm512_slli_epi32(X, C)                                             \
1203   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1204     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1205     (__mmask16)-1))
1206
1207 #define _mm512_mask_slli_epi32(W, U, X, C)                                  \
1208   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209     (__v16si)(__m512i)(W),\
1210     (__mmask16)(U)))
1211
1212 #define _mm512_maskz_slli_epi32(U, X, C)                                    \
1213   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215     (__mmask16)(U)))
1216 #endif
1217
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1221 {
1222   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223                                                  (__v4si) __B,
1224                                                  (__v16si)
1225                                                  _mm512_undefined_epi32 (),
1226                                                  (__mmask16) -1);
1227 }
1228
1229 extern __inline __m512i
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1232 {
1233   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234                                                  (__v4si) __B,
1235                                                  (__v16si) __W,
1236                                                  (__mmask16) __U);
1237 }
1238
1239 extern __inline __m512i
1240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1242 {
1243   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244                                                  (__v4si) __B,
1245                                                  (__v16si)
1246                                                  _mm512_setzero_si512 (),
1247                                                  (__mmask16) __U);
1248 }
1249
1250 #ifdef __OPTIMIZE__
1251 extern __inline __m512i
1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1254 {
1255   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256                                                   (__v16si)
1257                                                   _mm512_undefined_epi32 (),
1258                                                   (__mmask16) -1);
1259 }
1260
1261 extern __inline __m512i
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264                         __m512i __A, unsigned int __B)
1265 {
1266   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267                                                   (__v16si) __W,
1268                                                   (__mmask16) __U);
1269 }
1270
1271 extern __inline __m512i
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1274 {
1275   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276                                                   (__v16si)
1277                                                   _mm512_setzero_si512 (),
1278                                                   (__mmask16) __U);
1279 }
1280 #else
1281 #define _mm512_srli_epi32(X, C)                                             \
1282   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1283     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1284     (__mmask16)-1))
1285
1286 #define _mm512_mask_srli_epi32(W, U, X, C)                                  \
1287   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288     (__v16si)(__m512i)(W),\
1289     (__mmask16)(U)))
1290
1291 #define _mm512_maskz_srli_epi32(U, X, C)                                    \
1292   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294     (__mmask16)(U)))
1295 #endif
1296
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1300 {
1301   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302                                                  (__v4si) __B,
1303                                                  (__v16si)
1304                                                  _mm512_undefined_epi32 (),
1305                                                  (__mmask16) -1);
1306 }
1307
1308 extern __inline __m512i
1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1311 {
1312   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313                                                  (__v4si) __B,
1314                                                  (__v16si) __W,
1315                                                  (__mmask16) __U);
1316 }
1317
1318 extern __inline __m512i
1319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1321 {
1322   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323                                                  (__v4si) __B,
1324                                                  (__v16si)
1325                                                  _mm512_setzero_si512 (),
1326                                                  (__mmask16) __U);
1327 }
1328
1329 #ifdef __OPTIMIZE__
1330 extern __inline __m512i
1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1333 {
1334   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335                                                   (__v16si)
1336                                                   _mm512_undefined_epi32 (),
1337                                                   (__mmask16) -1);
1338 }
1339
1340 extern __inline __m512i
1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343                         unsigned int __B)
1344 {
1345   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346                                                   (__v16si) __W,
1347                                                   (__mmask16) __U);
1348 }
1349
1350 extern __inline __m512i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1353 {
1354   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355                                                   (__v16si)
1356                                                   _mm512_setzero_si512 (),
1357                                                   (__mmask16) __U);
1358 }
1359 #else
1360 #define _mm512_srai_epi32(X, C)                                             \
1361   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1362     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1363     (__mmask16)-1))
1364
1365 #define _mm512_mask_srai_epi32(W, U, X, C)                                  \
1366   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367     (__v16si)(__m512i)(W),\
1368     (__mmask16)(U)))
1369
1370 #define _mm512_maskz_srai_epi32(U, X, C)                                    \
1371   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373     (__mmask16)(U)))
1374 #endif
1375
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1379 {
1380   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381                                                  (__v4si) __B,
1382                                                  (__v16si)
1383                                                  _mm512_undefined_epi32 (),
1384                                                  (__mmask16) -1);
1385 }
1386
1387 extern __inline __m512i
1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1390 {
1391   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392                                                  (__v4si) __B,
1393                                                  (__v16si) __W,
1394                                                  (__mmask16) __U);
1395 }
1396
1397 extern __inline __m512i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1400 {
1401   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402                                                  (__v4si) __B,
1403                                                  (__v16si)
1404                                                  _mm512_setzero_si512 (),
1405                                                  (__mmask16) __U);
1406 }
1407
1408 #ifdef __OPTIMIZE__
1409 extern __inline __m128d
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1412 {
1413   return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414                                                (__v2df) __B,
1415                                                __R);
1416 }
1417
1418 extern __inline __m128d
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1421                           __m128d __B, const int __R)
1422 {
1423   return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1424                                                  (__v2df) __B,
1425                                                  (__v2df) __W,
1426                                                  (__mmask8) __U, __R);
1427 }
1428
1429 extern __inline __m128d
1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1432                            const int __R)
1433 {
1434   return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1435                                                  (__v2df) __B,
1436                                                  (__v2df)
1437                                                  _mm_setzero_pd (),
1438                                                  (__mmask8) __U, __R);
1439 }
1440
1441 extern __inline __m128
1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1444 {
1445   return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1446                                               (__v4sf) __B,
1447                                               __R);
1448 }
1449
1450 extern __inline __m128
1451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1453                           __m128 __B, const int __R)
1454 {
1455   return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1456                                                  (__v4sf) __B,
1457                                                  (__v4sf) __W,
1458                                                  (__mmask8) __U, __R);
1459 }
1460
1461 extern __inline __m128
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1464                            const int __R)
1465 {
1466   return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1467                                                  (__v4sf) __B,
1468                                                  (__v4sf)
1469                                                  _mm_setzero_ps (),
1470                                                  (__mmask8) __U, __R);
1471 }
1472
1473 extern __inline __m128d
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1476 {
1477   return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1478                                                (__v2df) __B,
1479                                                __R);
1480 }
1481
1482 extern __inline __m128d
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1485                           __m128d __B, const int __R)
1486 {
1487   return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1488                                                  (__v2df) __B,
1489                                                  (__v2df) __W,
1490                                                  (__mmask8) __U, __R);
1491 }
1492
1493 extern __inline __m128d
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1496                            const int __R)
1497 {
1498   return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1499                                                  (__v2df) __B,
1500                                                  (__v2df)
1501                                                  _mm_setzero_pd (),
1502                                                  (__mmask8) __U, __R);
1503 }
1504
1505 extern __inline __m128
1506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1508 {
1509   return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1510                                               (__v4sf) __B,
1511                                               __R);
1512 }
1513
1514 extern __inline __m128
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1517                           __m128 __B, const int __R)
1518 {
1519   return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1520                                                  (__v4sf) __B,
1521                                                  (__v4sf) __W,
1522                                                  (__mmask8) __U, __R);
1523 }
1524
1525 extern __inline __m128
1526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1528                            const int __R)
1529 {
1530   return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1531                                                  (__v4sf) __B,
1532                                                  (__v4sf)
1533                                                  _mm_setzero_ps (),
1534                                                  (__mmask8) __U, __R);
1535 }
1536
1537 #else
1538 #define _mm_add_round_sd(A, B, C)            \
1539     (__m128d)__builtin_ia32_addsd_round(A, B, C)
1540
1541 #define _mm_mask_add_round_sd(W, U, A, B, C) \
1542     (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1543
1544 #define _mm_maskz_add_round_sd(U, A, B, C)   \
1545     (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1546
1547 #define _mm_add_round_ss(A, B, C)            \
1548     (__m128)__builtin_ia32_addss_round(A, B, C)
1549
1550 #define _mm_mask_add_round_ss(W, U, A, B, C) \
1551     (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1552
1553 #define _mm_maskz_add_round_ss(U, A, B, C)   \
1554     (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1555
1556 #define _mm_sub_round_sd(A, B, C)            \
1557     (__m128d)__builtin_ia32_subsd_round(A, B, C)
1558
1559 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
1560     (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1561
1562 #define _mm_maskz_sub_round_sd(U, A, B, C)   \
1563     (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1564
1565 #define _mm_sub_round_ss(A, B, C)            \
1566     (__m128)__builtin_ia32_subss_round(A, B, C)
1567
1568 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
1569     (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1570
1571 #define _mm_maskz_sub_round_ss(U, A, B, C)   \
1572     (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1573
1574 #endif
1575
1576 #ifdef __OPTIMIZE__
1577 extern __inline __m512i
1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1580                            const int __imm)
1581 {
1582   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1583                                                      (__v8di) __B,
1584                                                      (__v8di) __C, __imm,
1585                                                      (__mmask8) -1);
1586 }
1587
1588 extern __inline __m512i
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1591                                 __m512i __C, const int __imm)
1592 {
1593   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1594                                                      (__v8di) __B,
1595                                                      (__v8di) __C, __imm,
1596                                                      (__mmask8) __U);
1597 }
1598
1599 extern __inline __m512i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1602                                  __m512i __C, const int __imm)
1603 {
1604   return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1605                                                       (__v8di) __B,
1606                                                       (__v8di) __C,
1607                                                       __imm, (__mmask8) __U);
1608 }
1609
1610 extern __inline __m512i
1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1613                            const int __imm)
1614 {
1615   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1616                                                      (__v16si) __B,
1617                                                      (__v16si) __C,
1618                                                      __imm, (__mmask16) -1);
1619 }
1620
1621 extern __inline __m512i
1622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1623 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1624                                 __m512i __C, const int __imm)
1625 {
1626   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1627                                                      (__v16si) __B,
1628                                                      (__v16si) __C,
1629                                                      __imm, (__mmask16) __U);
1630 }
1631
1632 extern __inline __m512i
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1635                                  __m512i __C, const int __imm)
1636 {
1637   return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1638                                                       (__v16si) __B,
1639                                                       (__v16si) __C,
1640                                                       __imm, (__mmask16) __U);
1641 }
1642 #else
1643 #define _mm512_ternarylogic_epi64(A, B, C, I)                           \
1644   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),    \
1645     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1646 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)                   \
1647   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),    \
1648     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1649 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)                  \
1650   ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),   \
1651     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1652 #define _mm512_ternarylogic_epi32(A, B, C, I)                           \
1653   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),   \
1654     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1655     (__mmask16)-1))
1656 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)                   \
1657   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),   \
1658     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1659     (__mmask16)(U)))
1660 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)                  \
1661   ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),  \
1662     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1663     (__mmask16)(U)))
1664 #endif
1665
1666 extern __inline __m512d
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 _mm512_rcp14_pd (__m512d __A)
1669 {
1670   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1671                                                    (__v8df)
1672                                                    _mm512_undefined_pd (),
1673                                                    (__mmask8) -1);
1674 }
1675
1676 extern __inline __m512d
1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1679 {
1680   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681                                                    (__v8df) __W,
1682                                                    (__mmask8) __U);
1683 }
1684
1685 extern __inline __m512d
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1688 {
1689   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1690                                                    (__v8df)
1691                                                    _mm512_setzero_pd (),
1692                                                    (__mmask8) __U);
1693 }
1694
1695 extern __inline __m512
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm512_rcp14_ps (__m512 __A)
1698 {
1699   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1700                                                   (__v16sf)
1701                                                   _mm512_undefined_ps (),
1702                                                   (__mmask16) -1);
1703 }
1704
1705 extern __inline __m512
1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1708 {
1709   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710                                                   (__v16sf) __W,
1711                                                   (__mmask16) __U);
1712 }
1713
1714 extern __inline __m512
1715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1717 {
1718   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719                                                   (__v16sf)
1720                                                   _mm512_setzero_ps (),
1721                                                   (__mmask16) __U);
1722 }
1723
1724 extern __inline __m128d
1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726 _mm_rcp14_sd (__m128d __A, __m128d __B)
1727 {
1728   return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1729                                            (__v2df) __A);
1730 }
1731
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1735 {
1736   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1737                                                 (__v2df) __A,
1738                                                 (__v2df) __W,
1739                                                 (__mmask8) __U);
1740 }
1741
1742 extern __inline __m128d
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1745 {
1746   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1747                                                 (__v2df) __A,
1748                                                 (__v2df) _mm_setzero_ps (),
1749                                                 (__mmask8) __U);
1750 }
1751
1752 extern __inline __m128
1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 _mm_rcp14_ss (__m128 __A, __m128 __B)
1755 {
1756   return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1757                                           (__v4sf) __A);
1758 }
1759
1760 extern __inline __m128
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1763 {
1764   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1765                                                 (__v4sf) __A,
1766                                                 (__v4sf) __W,
1767                                                 (__mmask8) __U);
1768 }
1769
1770 extern __inline __m128
1771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1773 {
1774   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1775                                                 (__v4sf) __A,
1776                                                 (__v4sf) _mm_setzero_ps (),
1777                                                 (__mmask8) __U);
1778 }
1779
1780 extern __inline __m512d
1781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782 _mm512_rsqrt14_pd (__m512d __A)
1783 {
1784   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1785                                                      (__v8df)
1786                                                      _mm512_undefined_pd (),
1787                                                      (__mmask8) -1);
1788 }
1789
1790 extern __inline __m512d
1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1793 {
1794   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1795                                                      (__v8df) __W,
1796                                                      (__mmask8) __U);
1797 }
1798
1799 extern __inline __m512d
1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1802 {
1803   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1804                                                      (__v8df)
1805                                                      _mm512_setzero_pd (),
1806                                                      (__mmask8) __U);
1807 }
1808
1809 extern __inline __m512
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm512_rsqrt14_ps (__m512 __A)
1812 {
1813   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1814                                                     (__v16sf)
1815                                                     _mm512_undefined_ps (),
1816                                                     (__mmask16) -1);
1817 }
1818
1819 extern __inline __m512
1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1822 {
1823   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1824                                                     (__v16sf) __W,
1825                                                     (__mmask16) __U);
1826 }
1827
1828 extern __inline __m512
1829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1830 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1831 {
1832   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1833                                                     (__v16sf)
1834                                                     _mm512_setzero_ps (),
1835                                                     (__mmask16) __U);
1836 }
1837
1838 extern __inline __m128d
1839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1840 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1841 {
1842   return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1843                                              (__v2df) __A);
1844 }
1845
1846 extern __inline __m128d
1847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1849 {
1850   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1851                                                  (__v2df) __A,
1852                                                  (__v2df) __W,
1853                                                  (__mmask8) __U);
1854 }
1855
1856 extern __inline __m128d
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1859 {
1860   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1861                                                  (__v2df) __A,
1862                                                  (__v2df) _mm_setzero_pd (),
1863                                                  (__mmask8) __U);
1864 }
1865
1866 extern __inline __m128
1867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1869 {
1870   return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1871                                             (__v4sf) __A);
1872 }
1873
1874 extern __inline __m128
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1877 {
1878   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1879                                                  (__v4sf) __A,
1880                                                  (__v4sf) __W,
1881                                                  (__mmask8) __U);
1882 }
1883
1884 extern __inline __m128
1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1887 {
1888   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1889                                                 (__v4sf) __A,
1890                                                 (__v4sf) _mm_setzero_ps (),
1891                                                 (__mmask8) __U);
1892 }
1893
1894 #ifdef __OPTIMIZE__
1895 extern __inline __m512d
1896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1898 {
1899   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1900                                                   (__v8df)
1901                                                   _mm512_undefined_pd (),
1902                                                   (__mmask8) -1, __R);
1903 }
1904
1905 extern __inline __m512d
1906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1908                            const int __R)
1909 {
1910   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1911                                                   (__v8df) __W,
1912                                                   (__mmask8) __U, __R);
1913 }
1914
1915 extern __inline __m512d
1916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1918 {
1919   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1920                                                   (__v8df)
1921                                                   _mm512_setzero_pd (),
1922                                                   (__mmask8) __U, __R);
1923 }
1924
1925 extern __inline __m512
1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1928 {
1929   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1930                                                  (__v16sf)
1931                                                  _mm512_undefined_ps (),
1932                                                  (__mmask16) -1, __R);
1933 }
1934
1935 extern __inline __m512
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1938 {
1939   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1940                                                  (__v16sf) __W,
1941                                                  (__mmask16) __U, __R);
1942 }
1943
1944 extern __inline __m512
1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1947 {
1948   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1949                                                  (__v16sf)
1950                                                  _mm512_setzero_ps (),
1951                                                  (__mmask16) __U, __R);
1952 }
1953
1954 extern __inline __m128d
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1957 {
1958   return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
1959                                                      (__v2df) __A,
1960                                                      (__v2df)
1961                                                      _mm_setzero_pd (),
1962                                                      (__mmask8) -1, __R);
1963 }
1964
1965 extern __inline __m128d
1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1967 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
1968                         const int __R)
1969 {
1970   return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
1971                                                      (__v2df) __A,
1972                                                      (__v2df) __W,
1973                                                      (__mmask8) __U, __R);
1974 }
1975
1976 extern __inline __m128d
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
1979 {
1980   return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
1981                                                      (__v2df) __A,
1982                                                      (__v2df)
1983                                                      _mm_setzero_pd (),
1984                                                      (__mmask8) __U, __R);
1985 }
1986
1987 extern __inline __m128
1988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1989 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1990 {
1991   return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
1992                                                     (__v4sf) __A,
1993                                                     (__v4sf)
1994                                                     _mm_setzero_ps (),
1995                                                     (__mmask8) -1, __R);
1996 }
1997
1998 extern __inline __m128
1999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2000 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2001                         const int __R)
2002 {
2003   return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2004                                                     (__v4sf) __A,
2005                                                     (__v4sf) __W,
2006                                                     (__mmask8) __U, __R);
2007 }
2008
2009 extern __inline __m128
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2012 {
2013   return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2014                                                     (__v4sf) __A,
2015                                                     (__v4sf)
2016                                                     _mm_setzero_ps (),
2017                                                     (__mmask8) __U, __R);
2018 }
2019 #else
2020 #define _mm512_sqrt_round_pd(A, C)            \
2021     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
2022
2023 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2024     (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2025
2026 #define _mm512_maskz_sqrt_round_pd(U, A, C)   \
2027     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2028
2029 #define _mm512_sqrt_round_ps(A, C)            \
2030     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
2031
2032 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2033     (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2034
2035 #define _mm512_maskz_sqrt_round_ps(U, A, C)   \
2036     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
2037
2038 #define _mm_sqrt_round_sd(A, B, C)            \
2039     (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2040         (__v2df) _mm_setzero_pd (), -1, C)
2041
2042 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2043     (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2044
2045 #define _mm_maskz_sqrt_round_sd(U, A, B, C)   \
2046     (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2047         (__v2df) _mm_setzero_pd (), U, C)
2048
2049 #define _mm_sqrt_round_ss(A, B, C)            \
2050     (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2051         (__v4sf) _mm_setzero_ps (), -1, C)
2052
2053 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2054     (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
2055
2056 #define _mm_maskz_sqrt_round_ss(U, A, B, C)   \
2057     (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2058         (__v4sf) _mm_setzero_ps (), U, C)
2059 #endif
2060
2061 extern __inline __m512i
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm512_cvtepi8_epi32 (__m128i __A)
2064 {
2065   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2066                                                     (__v16si)
2067                                                     _mm512_undefined_epi32 (),
2068                                                     (__mmask16) -1);
2069 }
2070
2071 extern __inline __m512i
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2074 {
2075   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2076                                                     (__v16si) __W,
2077                                                     (__mmask16) __U);
2078 }
2079
2080 extern __inline __m512i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2083 {
2084   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2085                                                     (__v16si)
2086                                                     _mm512_setzero_si512 (),
2087                                                     (__mmask16) __U);
2088 }
2089
2090 extern __inline __m512i
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm512_cvtepi8_epi64 (__m128i __A)
2093 {
2094   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2095                                                     (__v8di)
2096                                                     _mm512_undefined_epi32 (),
2097                                                     (__mmask8) -1);
2098 }
2099
2100 extern __inline __m512i
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2103 {
2104   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2105                                                     (__v8di) __W,
2106                                                     (__mmask8) __U);
2107 }
2108
2109 extern __inline __m512i
2110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2112 {
2113   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2114                                                     (__v8di)
2115                                                     _mm512_setzero_si512 (),
2116                                                     (__mmask8) __U);
2117 }
2118
2119 extern __inline __m512i
2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121 _mm512_cvtepi16_epi32 (__m256i __A)
2122 {
2123   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2124                                                     (__v16si)
2125                                                     _mm512_undefined_epi32 (),
2126                                                     (__mmask16) -1);
2127 }
2128
2129 extern __inline __m512i
2130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2131 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2132 {
2133   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2134                                                     (__v16si) __W,
2135                                                     (__mmask16) __U);
2136 }
2137
2138 extern __inline __m512i
2139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2140 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2141 {
2142   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2143                                                     (__v16si)
2144                                                     _mm512_setzero_si512 (),
2145                                                     (__mmask16) __U);
2146 }
2147
2148 extern __inline __m512i
2149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2150 _mm512_cvtepi16_epi64 (__m128i __A)
2151 {
2152   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2153                                                     (__v8di)
2154                                                     _mm512_undefined_epi32 (),
2155                                                     (__mmask8) -1);
2156 }
2157
2158 extern __inline __m512i
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2161 {
2162   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2163                                                     (__v8di) __W,
2164                                                     (__mmask8) __U);
2165 }
2166
2167 extern __inline __m512i
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2170 {
2171   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2172                                                     (__v8di)
2173                                                     _mm512_setzero_si512 (),
2174                                                     (__mmask8) __U);
2175 }
2176
2177 extern __inline __m512i
2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 _mm512_cvtepi32_epi64 (__m256i __X)
2180 {
2181   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2182                                                     (__v8di)
2183                                                     _mm512_undefined_epi32 (),
2184                                                     (__mmask8) -1);
2185 }
2186
2187 extern __inline __m512i
2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2190 {
2191   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2192                                                     (__v8di) __W,
2193                                                     (__mmask8) __U);
2194 }
2195
2196 extern __inline __m512i
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2199 {
2200   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2201                                                     (__v8di)
2202                                                     _mm512_setzero_si512 (),
2203                                                     (__mmask8) __U);
2204 }
2205
2206 extern __inline __m512i
2207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2208 _mm512_cvtepu8_epi32 (__m128i __A)
2209 {
2210   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2211                                                     (__v16si)
2212                                                     _mm512_undefined_epi32 (),
2213                                                     (__mmask16) -1);
2214 }
2215
2216 extern __inline __m512i
2217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2218 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2219 {
2220   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2221                                                     (__v16si) __W,
2222                                                     (__mmask16) __U);
2223 }
2224
2225 extern __inline __m512i
2226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2227 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2228 {
2229   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2230                                                     (__v16si)
2231                                                     _mm512_setzero_si512 (),
2232                                                     (__mmask16) __U);
2233 }
2234
2235 extern __inline __m512i
2236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2237 _mm512_cvtepu8_epi64 (__m128i __A)
2238 {
2239   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2240                                                     (__v8di)
2241                                                     _mm512_undefined_epi32 (),
2242                                                     (__mmask8) -1);
2243 }
2244
2245 extern __inline __m512i
2246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2247 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2248 {
2249   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2250                                                     (__v8di) __W,
2251                                                     (__mmask8) __U);
2252 }
2253
2254 extern __inline __m512i
2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2256 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2257 {
2258   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2259                                                     (__v8di)
2260                                                     _mm512_setzero_si512 (),
2261                                                     (__mmask8) __U);
2262 }
2263
2264 extern __inline __m512i
2265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2266 _mm512_cvtepu16_epi32 (__m256i __A)
2267 {
2268   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2269                                                     (__v16si)
2270                                                     _mm512_undefined_epi32 (),
2271                                                     (__mmask16) -1);
2272 }
2273
2274 extern __inline __m512i
2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2277 {
2278   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2279                                                     (__v16si) __W,
2280                                                     (__mmask16) __U);
2281 }
2282
2283 extern __inline __m512i
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2286 {
2287   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2288                                                     (__v16si)
2289                                                     _mm512_setzero_si512 (),
2290                                                     (__mmask16) __U);
2291 }
2292
2293 extern __inline __m512i
2294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2295 _mm512_cvtepu16_epi64 (__m128i __A)
2296 {
2297   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2298                                                     (__v8di)
2299                                                     _mm512_undefined_epi32 (),
2300                                                     (__mmask8) -1);
2301 }
2302
2303 extern __inline __m512i
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2306 {
2307   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2308                                                     (__v8di) __W,
2309                                                     (__mmask8) __U);
2310 }
2311
2312 extern __inline __m512i
2313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2314 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2315 {
2316   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2317                                                     (__v8di)
2318                                                     _mm512_setzero_si512 (),
2319                                                     (__mmask8) __U);
2320 }
2321
2322 extern __inline __m512i
2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 _mm512_cvtepu32_epi64 (__m256i __X)
2325 {
2326   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2327                                                     (__v8di)
2328                                                     _mm512_undefined_epi32 (),
2329                                                     (__mmask8) -1);
2330 }
2331
2332 extern __inline __m512i
2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2335 {
2336   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2337                                                     (__v8di) __W,
2338                                                     (__mmask8) __U);
2339 }
2340
2341 extern __inline __m512i
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2344 {
2345   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2346                                                     (__v8di)
2347                                                     _mm512_setzero_si512 (),
2348                                                     (__mmask8) __U);
2349 }
2350
2351 #ifdef __OPTIMIZE__
2352 extern __inline __m512d
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2355 {
2356   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2357                                                  (__v8df) __B,
2358                                                  (__v8df)
2359                                                  _mm512_undefined_pd (),
2360                                                  (__mmask8) -1, __R);
2361 }
2362
2363 extern __inline __m512d
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2366                           __m512d __B, const int __R)
2367 {
2368   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2369                                                  (__v8df) __B,
2370                                                  (__v8df) __W,
2371                                                  (__mmask8) __U, __R);
2372 }
2373
2374 extern __inline __m512d
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2377                            const int __R)
2378 {
2379   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2380                                                  (__v8df) __B,
2381                                                  (__v8df)
2382                                                  _mm512_setzero_pd (),
2383                                                  (__mmask8) __U, __R);
2384 }
2385
2386 extern __inline __m512
2387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2388 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2389 {
2390   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2391                                                 (__v16sf) __B,
2392                                                 (__v16sf)
2393                                                 _mm512_undefined_ps (),
2394                                                 (__mmask16) -1, __R);
2395 }
2396
2397 extern __inline __m512
2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2400                           __m512 __B, const int __R)
2401 {
2402   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2403                                                 (__v16sf) __B,
2404                                                 (__v16sf) __W,
2405                                                 (__mmask16) __U, __R);
2406 }
2407
2408 extern __inline __m512
2409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2411 {
2412   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2413                                                 (__v16sf) __B,
2414                                                 (__v16sf)
2415                                                 _mm512_setzero_ps (),
2416                                                 (__mmask16) __U, __R);
2417 }
2418
2419 extern __inline __m512d
2420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2421 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2422 {
2423   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2424                                                  (__v8df) __B,
2425                                                  (__v8df)
2426                                                  _mm512_undefined_pd (),
2427                                                  (__mmask8) -1, __R);
2428 }
2429
2430 extern __inline __m512d
2431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2432 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2433                           __m512d __B, const int __R)
2434 {
2435   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2436                                                  (__v8df) __B,
2437                                                  (__v8df) __W,
2438                                                  (__mmask8) __U, __R);
2439 }
2440
2441 extern __inline __m512d
2442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2443 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2444                            const int __R)
2445 {
2446   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2447                                                  (__v8df) __B,
2448                                                  (__v8df)
2449                                                  _mm512_setzero_pd (),
2450                                                  (__mmask8) __U, __R);
2451 }
2452
2453 extern __inline __m512
2454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2455 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2456 {
2457   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2458                                                 (__v16sf) __B,
2459                                                 (__v16sf)
2460                                                 _mm512_undefined_ps (),
2461                                                 (__mmask16) -1, __R);
2462 }
2463
2464 extern __inline __m512
2465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2466 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2467                           __m512 __B, const int __R)
2468 {
2469   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2470                                                 (__v16sf) __B,
2471                                                 (__v16sf) __W,
2472                                                 (__mmask16) __U, __R);
2473 }
2474
2475 extern __inline __m512
2476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2477 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2478 {
2479   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2480                                                 (__v16sf) __B,
2481                                                 (__v16sf)
2482                                                 _mm512_setzero_ps (),
2483                                                 (__mmask16) __U, __R);
2484 }
2485 #else
2486 #define _mm512_add_round_pd(A, B, C)            \
2487     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2488
2489 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2490     (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2491
2492 #define _mm512_maskz_add_round_pd(U, A, B, C)   \
2493     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2494
2495 #define _mm512_add_round_ps(A, B, C)            \
2496     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2497
2498 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2499     (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2500
2501 #define _mm512_maskz_add_round_ps(U, A, B, C)   \
2502     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2503
2504 #define _mm512_sub_round_pd(A, B, C)            \
2505     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2506
2507 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2508     (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2509
2510 #define _mm512_maskz_sub_round_pd(U, A, B, C)   \
2511     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2512
2513 #define _mm512_sub_round_ps(A, B, C)            \
2514     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2515
2516 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2517     (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2518
2519 #define _mm512_maskz_sub_round_ps(U, A, B, C)   \
2520     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2521 #endif
2522
2523 #ifdef __OPTIMIZE__
2524 extern __inline __m512d
2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2527 {
2528   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2529                                                  (__v8df) __B,
2530                                                  (__v8df)
2531                                                  _mm512_undefined_pd (),
2532                                                  (__mmask8) -1, __R);
2533 }
2534
2535 extern __inline __m512d
2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2538                           __m512d __B, const int __R)
2539 {
2540   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2541                                                  (__v8df) __B,
2542                                                  (__v8df) __W,
2543                                                  (__mmask8) __U, __R);
2544 }
2545
2546 extern __inline __m512d
2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2549                            const int __R)
2550 {
2551   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2552                                                  (__v8df) __B,
2553                                                  (__v8df)
2554                                                  _mm512_setzero_pd (),
2555                                                  (__mmask8) __U, __R);
2556 }
2557
2558 extern __inline __m512
2559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2560 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2561 {
2562   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2563                                                 (__v16sf) __B,
2564                                                 (__v16sf)
2565                                                 _mm512_undefined_ps (),
2566                                                 (__mmask16) -1, __R);
2567 }
2568
2569 extern __inline __m512
2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2572                           __m512 __B, const int __R)
2573 {
2574   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2575                                                 (__v16sf) __B,
2576                                                 (__v16sf) __W,
2577                                                 (__mmask16) __U, __R);
2578 }
2579
2580 extern __inline __m512
2581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2582 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2583 {
2584   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2585                                                 (__v16sf) __B,
2586                                                 (__v16sf)
2587                                                 _mm512_setzero_ps (),
2588                                                 (__mmask16) __U, __R);
2589 }
2590
2591 extern __inline __m512d
2592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2593 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2594 {
2595   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2596                                                  (__v8df) __V,
2597                                                  (__v8df)
2598                                                  _mm512_undefined_pd (),
2599                                                  (__mmask8) -1, __R);
2600 }
2601
2602 extern __inline __m512d
2603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2604 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2605                           __m512d __V, const int __R)
2606 {
2607   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2608                                                  (__v8df) __V,
2609                                                  (__v8df) __W,
2610                                                  (__mmask8) __U, __R);
2611 }
2612
2613 extern __inline __m512d
2614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2615 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2616                            const int __R)
2617 {
2618   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2619                                                  (__v8df) __V,
2620                                                  (__v8df)
2621                                                  _mm512_setzero_pd (),
2622                                                  (__mmask8) __U, __R);
2623 }
2624
2625 extern __inline __m512
2626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2627 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2628 {
2629   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2630                                                 (__v16sf) __B,
2631                                                 (__v16sf)
2632                                                 _mm512_undefined_ps (),
2633                                                 (__mmask16) -1, __R);
2634 }
2635
2636 extern __inline __m512
2637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2638 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2639                           __m512 __B, const int __R)
2640 {
2641   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2642                                                 (__v16sf) __B,
2643                                                 (__v16sf) __W,
2644                                                 (__mmask16) __U, __R);
2645 }
2646
2647 extern __inline __m512
2648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2649 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2650 {
2651   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2652                                                 (__v16sf) __B,
2653                                                 (__v16sf)
2654                                                 _mm512_setzero_ps (),
2655                                                 (__mmask16) __U, __R);
2656 }
2657
2658 extern __inline __m128d
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2661 {
2662   return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2663                                                (__v2df) __B,
2664                                                __R);
2665 }
2666
2667 extern __inline __m128d
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2670                           __m128d __B, const int __R)
2671 {
2672   return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2673                                                  (__v2df) __B,
2674                                                  (__v2df) __W,
2675                                                  (__mmask8) __U, __R);
2676 }
2677
2678 extern __inline __m128d
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2681                            const int __R)
2682 {
2683   return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2684                                                  (__v2df) __B,
2685                                                  (__v2df)
2686                                                  _mm_setzero_pd (),
2687                                                  (__mmask8) __U, __R);
2688 }
2689
2690 extern __inline __m128
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2693 {
2694   return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2695                                               (__v4sf) __B,
2696                                               __R);
2697 }
2698
2699 extern __inline __m128
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2702                           __m128 __B, const int __R)
2703 {
2704   return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2705                                                  (__v4sf) __B,
2706                                                  (__v4sf) __W,
2707                                                  (__mmask8) __U, __R);
2708 }
2709
2710 extern __inline __m128
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2713                            const int __R)
2714 {
2715   return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2716                                                  (__v4sf) __B,
2717                                                  (__v4sf)
2718                                                  _mm_setzero_ps (),
2719                                                  (__mmask8) __U, __R);
2720 }
2721
2722 extern __inline __m128d
2723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2724 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2725 {
2726   return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2727                                                (__v2df) __B,
2728                                                __R);
2729 }
2730
2731 extern __inline __m128d
2732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2733 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2734                           __m128d __B, const int __R)
2735 {
2736   return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2737                                                  (__v2df) __B,
2738                                                  (__v2df) __W,
2739                                                  (__mmask8) __U, __R);
2740 }
2741
2742 extern __inline __m128d
2743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2744 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2745                            const int __R)
2746 {
2747   return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2748                                                  (__v2df) __B,
2749                                                  (__v2df)
2750                                                  _mm_setzero_pd (),
2751                                                  (__mmask8) __U, __R);
2752 }
2753
2754 extern __inline __m128
2755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2756 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2757 {
2758   return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2759                                               (__v4sf) __B,
2760                                               __R);
2761 }
2762
2763 extern __inline __m128
2764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2765 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2766                           __m128 __B, const int __R)
2767 {
2768   return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2769                                                  (__v4sf) __B,
2770                                                  (__v4sf) __W,
2771                                                  (__mmask8) __U, __R);
2772 }
2773
2774 extern __inline __m128
2775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2776 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2777                            const int __R)
2778 {
2779   return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2780                                                  (__v4sf) __B,
2781                                                  (__v4sf)
2782                                                  _mm_setzero_ps (),
2783                                                  (__mmask8) __U, __R);
2784 }
2785
2786 #else
2787 #define _mm512_mul_round_pd(A, B, C)            \
2788     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2789
2790 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2791     (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2792
2793 #define _mm512_maskz_mul_round_pd(U, A, B, C)   \
2794     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2795
2796 #define _mm512_mul_round_ps(A, B, C)            \
2797     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2798
2799 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2800     (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2801
2802 #define _mm512_maskz_mul_round_ps(U, A, B, C)   \
2803     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2804
2805 #define _mm512_div_round_pd(A, B, C)            \
2806     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2807
2808 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2809     (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2810
2811 #define _mm512_maskz_div_round_pd(U, A, B, C)   \
2812     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2813
2814 #define _mm512_div_round_ps(A, B, C)            \
2815     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2816
2817 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2818     (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2819
2820 #define _mm512_maskz_div_round_ps(U, A, B, C)   \
2821     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2822
2823 #define _mm_mul_round_sd(A, B, C)            \
2824     (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2825
2826 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
2827     (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2828
2829 #define _mm_maskz_mul_round_sd(U, A, B, C)   \
2830     (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2831
2832 #define _mm_mul_round_ss(A, B, C)            \
2833     (__m128)__builtin_ia32_mulss_round(A, B, C)
2834
2835 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
2836     (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2837
2838 #define _mm_maskz_mul_round_ss(U, A, B, C)   \
2839     (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2840
2841 #define _mm_div_round_sd(A, B, C)            \
2842     (__m128d)__builtin_ia32_divsd_round(A, B, C)
2843
2844 #define _mm_mask_div_round_sd(W, U, A, B, C) \
2845     (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2846
2847 #define _mm_maskz_div_round_sd(U, A, B, C)   \
2848     (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2849
2850 #define _mm_div_round_ss(A, B, C)            \
2851     (__m128)__builtin_ia32_divss_round(A, B, C)
2852
2853 #define _mm_mask_div_round_ss(W, U, A, B, C) \
2854     (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2855
2856 #define _mm_maskz_div_round_ss(U, A, B, C)   \
2857     (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2858
2859 #endif
2860
2861 #ifdef __OPTIMIZE__
2862 extern __inline __m512d
2863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2865 {
2866   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2867                                                  (__v8df) __B,
2868                                                  (__v8df)
2869                                                  _mm512_undefined_pd (),
2870                                                  (__mmask8) -1, __R);
2871 }
2872
2873 extern __inline __m512d
2874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2875 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2876                           __m512d __B, const int __R)
2877 {
2878   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2879                                                  (__v8df) __B,
2880                                                  (__v8df) __W,
2881                                                  (__mmask8) __U, __R);
2882 }
2883
2884 extern __inline __m512d
2885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2886 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2887                            const int __R)
2888 {
2889   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2890                                                  (__v8df) __B,
2891                                                  (__v8df)
2892                                                  _mm512_setzero_pd (),
2893                                                  (__mmask8) __U, __R);
2894 }
2895
2896 extern __inline __m512
2897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2898 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2899 {
2900   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2901                                                 (__v16sf) __B,
2902                                                 (__v16sf)
2903                                                 _mm512_undefined_ps (),
2904                                                 (__mmask16) -1, __R);
2905 }
2906
2907 extern __inline __m512
2908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2909 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2910                           __m512 __B, const int __R)
2911 {
2912   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2913                                                 (__v16sf) __B,
2914                                                 (__v16sf) __W,
2915                                                 (__mmask16) __U, __R);
2916 }
2917
2918 extern __inline __m512
2919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2920 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2921 {
2922   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2923                                                 (__v16sf) __B,
2924                                                 (__v16sf)
2925                                                 _mm512_setzero_ps (),
2926                                                 (__mmask16) __U, __R);
2927 }
2928
2929 extern __inline __m512d
2930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2931 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2932 {
2933   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2934                                                  (__v8df) __B,
2935                                                  (__v8df)
2936                                                  _mm512_undefined_pd (),
2937                                                  (__mmask8) -1, __R);
2938 }
2939
2940 extern __inline __m512d
2941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2942 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2943                           __m512d __B, const int __R)
2944 {
2945   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2946                                                  (__v8df) __B,
2947                                                  (__v8df) __W,
2948                                                  (__mmask8) __U, __R);
2949 }
2950
2951 extern __inline __m512d
2952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2953 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2954                            const int __R)
2955 {
2956   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2957                                                  (__v8df) __B,
2958                                                  (__v8df)
2959                                                  _mm512_setzero_pd (),
2960                                                  (__mmask8) __U, __R);
2961 }
2962
2963 extern __inline __m512
2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2965 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2966 {
2967   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2968                                                 (__v16sf) __B,
2969                                                 (__v16sf)
2970                                                 _mm512_undefined_ps (),
2971                                                 (__mmask16) -1, __R);
2972 }
2973
2974 extern __inline __m512
2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2977                           __m512 __B, const int __R)
2978 {
2979   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2980                                                 (__v16sf) __B,
2981                                                 (__v16sf) __W,
2982                                                 (__mmask16) __U, __R);
2983 }
2984
2985 extern __inline __m512
2986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2988 {
2989   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2990                                                 (__v16sf) __B,
2991                                                 (__v16sf)
2992                                                 _mm512_setzero_ps (),
2993                                                 (__mmask16) __U, __R);
2994 }
2995 #else
2996 #define _mm512_max_round_pd(A, B,  R) \
2997     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2998
2999 #define _mm512_mask_max_round_pd(W, U,  A, B, R) \
3000     (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3001
3002 #define _mm512_maskz_max_round_pd(U, A,  B, R) \
3003     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3004
3005 #define _mm512_max_round_ps(A, B,  R) \
3006     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
3007
3008 #define _mm512_mask_max_round_ps(W, U,  A, B, R) \
3009     (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3010
3011 #define _mm512_maskz_max_round_ps(U, A,  B, R) \
3012     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3013
3014 #define _mm512_min_round_pd(A, B,  R) \
3015     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
3016
3017 #define _mm512_mask_min_round_pd(W, U,  A, B, R) \
3018     (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3019
3020 #define _mm512_maskz_min_round_pd(U, A,  B, R) \
3021     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3022
3023 #define _mm512_min_round_ps(A, B, R) \
3024     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
3025
3026 #define _mm512_mask_min_round_ps(W, U,  A, B, R) \
3027     (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3028
3029 #define _mm512_maskz_min_round_ps(U, A,  B, R) \
3030     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3031 #endif
3032
3033 #ifdef __OPTIMIZE__
3034 extern __inline __m512d
3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3037 {
3038   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3039                                                     (__v8df) __B,
3040                                                     (__v8df)
3041                                                     _mm512_undefined_pd (),
3042                                                     (__mmask8) -1, __R);
3043 }
3044
3045 extern __inline __m512d
3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3048                              __m512d __B, const int __R)
3049 {
3050   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3051                                                     (__v8df) __B,
3052                                                     (__v8df) __W,
3053                                                     (__mmask8) __U, __R);
3054 }
3055
3056 extern __inline __m512d
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3059                               const int __R)
3060 {
3061   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3062                                                     (__v8df) __B,
3063                                                     (__v8df)
3064                                                     _mm512_setzero_pd (),
3065                                                     (__mmask8) __U, __R);
3066 }
3067
3068 extern __inline __m512
3069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3070 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3071 {
3072   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3073                                                    (__v16sf) __B,
3074                                                    (__v16sf)
3075                                                    _mm512_undefined_ps (),
3076                                                    (__mmask16) -1, __R);
3077 }
3078
3079 extern __inline __m512
3080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3082                              __m512 __B, const int __R)
3083 {
3084   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3085                                                    (__v16sf) __B,
3086                                                    (__v16sf) __W,
3087                                                    (__mmask16) __U, __R);
3088 }
3089
3090 extern __inline __m512
3091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3093                               const int __R)
3094 {
3095   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3096                                                    (__v16sf) __B,
3097                                                    (__v16sf)
3098                                                    _mm512_setzero_ps (),
3099                                                    (__mmask16) __U, __R);
3100 }
3101
3102 extern __inline __m128d
3103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3104 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3105 {
3106   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3107                                                        (__v2df) __B,
3108                                                        (__v2df)
3109                                                        _mm_setzero_pd (),
3110                                                        (__mmask8) -1, __R);
3111 }
3112
3113 extern __inline __m128d
3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3115 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3116                           const int __R)
3117 {
3118   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3119                                                        (__v2df) __B,
3120                                                        (__v2df) __W,
3121                                                        (__mmask8) __U, __R);
3122 }
3123
3124 extern __inline __m128d
3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3126 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3127                            const int __R)
3128 {
3129   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3130                                                        (__v2df) __B,
3131                                                        (__v2df)
3132                                                        _mm_setzero_pd (),
3133                                                        (__mmask8) __U, __R);
3134 }
3135
3136 extern __inline __m128
3137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3139 {
3140   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3141                                                       (__v4sf) __B,
3142                                                       (__v4sf)
3143                                                       _mm_setzero_ps (),
3144                                                       (__mmask8) -1, __R);
3145 }
3146
3147 extern __inline __m128
3148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3150                          const int __R)
3151 {
3152   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3153                                                       (__v4sf) __B,
3154                                                       (__v4sf) __W,
3155                                                       (__mmask8) __U, __R);
3156 }
3157
3158 extern __inline __m128
3159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3160 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3161 {
3162   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3163                                                       (__v4sf) __B,
3164                                                       (__v4sf)
3165                                                       _mm_setzero_ps (),
3166                                                       (__mmask8) __U, __R);
3167 }
3168 #else
3169 #define _mm512_scalef_round_pd(A, B, C)            \
3170     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3171
3172 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3173     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3174
3175 #define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
3176     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3177
3178 #define _mm512_scalef_round_ps(A, B, C)            \
3179     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3180
3181 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3182     (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3183
3184 #define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
3185     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3186
3187 #define _mm_scalef_round_sd(A, B, C)            \
3188     (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3189         (__v2df)_mm_setzero_pd (), -1, C)
3190
3191 #define _mm_scalef_round_ss(A, B, C)            \
3192     (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3193         (__v4sf)_mm_setzero_ps (), -1, C)
3194 #endif
3195
3196 #ifdef __OPTIMIZE__
3197 extern __inline __m512d
3198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3199 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3200 {
3201   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3202                                                     (__v8df) __B,
3203                                                     (__v8df) __C,
3204                                                     (__mmask8) -1, __R);
3205 }
3206
3207 extern __inline __m512d
3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3210                             __m512d __C, const int __R)
3211 {
3212   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3213                                                     (__v8df) __B,
3214                                                     (__v8df) __C,
3215                                                     (__mmask8) __U, __R);
3216 }
3217
3218 extern __inline __m512d
3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3221                              __mmask8 __U, const int __R)
3222 {
3223   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3224                                                      (__v8df) __B,
3225                                                      (__v8df) __C,
3226                                                      (__mmask8) __U, __R);
3227 }
3228
3229 extern __inline __m512d
3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3232                              __m512d __C, const int __R)
3233 {
3234   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3235                                                      (__v8df) __B,
3236                                                      (__v8df) __C,
3237                                                      (__mmask8) __U, __R);
3238 }
3239
3240 extern __inline __m512
3241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3242 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3243 {
3244   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3245                                                    (__v16sf) __B,
3246                                                    (__v16sf) __C,
3247                                                    (__mmask16) -1, __R);
3248 }
3249
3250 extern __inline __m512
3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3252 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3253                             __m512 __C, const int __R)
3254 {
3255   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3256                                                    (__v16sf) __B,
3257                                                    (__v16sf) __C,
3258                                                    (__mmask16) __U, __R);
3259 }
3260
3261 extern __inline __m512
3262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3263 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3264                              __mmask16 __U, const int __R)
3265 {
3266   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3267                                                     (__v16sf) __B,
3268                                                     (__v16sf) __C,
3269                                                     (__mmask16) __U, __R);
3270 }
3271
3272 extern __inline __m512
3273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3274 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3275                              __m512 __C, const int __R)
3276 {
3277   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3278                                                     (__v16sf) __B,
3279                                                     (__v16sf) __C,
3280                                                     (__mmask16) __U, __R);
3281 }
3282
3283 extern __inline __m512d
3284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3285 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3286 {
3287   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3288                                                     (__v8df) __B,
3289                                                     -(__v8df) __C,
3290                                                     (__mmask8) -1, __R);
3291 }
3292
3293 extern __inline __m512d
3294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3295 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3296                             __m512d __C, const int __R)
3297 {
3298   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3299                                                     (__v8df) __B,
3300                                                     -(__v8df) __C,
3301                                                     (__mmask8) __U, __R);
3302 }
3303
3304 extern __inline __m512d
3305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3306 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3307                              __mmask8 __U, const int __R)
3308 {
3309   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3310                                                      (__v8df) __B,
3311                                                      (__v8df) __C,
3312                                                      (__mmask8) __U, __R);
3313 }
3314
3315 extern __inline __m512d
3316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3317 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3318                              __m512d __C, const int __R)
3319 {
3320   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3321                                                      (__v8df) __B,
3322                                                      -(__v8df) __C,
3323                                                      (__mmask8) __U, __R);
3324 }
3325
3326 extern __inline __m512
3327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3328 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3329 {
3330   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3331                                                    (__v16sf) __B,
3332                                                    -(__v16sf) __C,
3333                                                    (__mmask16) -1, __R);
3334 }
3335
3336 extern __inline __m512
3337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3338 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3339                             __m512 __C, const int __R)
3340 {
3341   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3342                                                    (__v16sf) __B,
3343                                                    -(__v16sf) __C,
3344                                                    (__mmask16) __U, __R);
3345 }
3346
3347 extern __inline __m512
3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3350                              __mmask16 __U, const int __R)
3351 {
3352   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3353                                                     (__v16sf) __B,
3354                                                     (__v16sf) __C,
3355                                                     (__mmask16) __U, __R);
3356 }
3357
3358 extern __inline __m512
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3361                              __m512 __C, const int __R)
3362 {
3363   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3364                                                     (__v16sf) __B,
3365                                                     -(__v16sf) __C,
3366                                                     (__mmask16) __U, __R);
3367 }
3368
3369 extern __inline __m512d
3370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3372 {
3373   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3374                                                        (__v8df) __B,
3375                                                        (__v8df) __C,
3376                                                        (__mmask8) -1, __R);
3377 }
3378
3379 extern __inline __m512d
3380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3382                                __m512d __C, const int __R)
3383 {
3384   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3385                                                        (__v8df) __B,
3386                                                        (__v8df) __C,
3387                                                        (__mmask8) __U, __R);
3388 }
3389
3390 extern __inline __m512d
3391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3392 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3393                                 __mmask8 __U, const int __R)
3394 {
3395   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3396                                                         (__v8df) __B,
3397                                                         (__v8df) __C,
3398                                                         (__mmask8) __U, __R);
3399 }
3400
3401 extern __inline __m512d
3402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3403 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3404                                 __m512d __C, const int __R)
3405 {
3406   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3407                                                         (__v8df) __B,
3408                                                         (__v8df) __C,
3409                                                         (__mmask8) __U, __R);
3410 }
3411
3412 extern __inline __m512
3413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3414 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3415 {
3416   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3417                                                       (__v16sf) __B,
3418                                                       (__v16sf) __C,
3419                                                       (__mmask16) -1, __R);
3420 }
3421
3422 extern __inline __m512
3423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3424 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3425                                __m512 __C, const int __R)
3426 {
3427   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3428                                                       (__v16sf) __B,
3429                                                       (__v16sf) __C,
3430                                                       (__mmask16) __U, __R);
3431 }
3432
3433 extern __inline __m512
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3436                                 __mmask16 __U, const int __R)
3437 {
3438   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3439                                                        (__v16sf) __B,
3440                                                        (__v16sf) __C,
3441                                                        (__mmask16) __U, __R);
3442 }
3443
3444 extern __inline __m512
3445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3446 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3447                                 __m512 __C, const int __R)
3448 {
3449   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3450                                                        (__v16sf) __B,
3451                                                        (__v16sf) __C,
3452                                                        (__mmask16) __U, __R);
3453 }
3454
3455 extern __inline __m512d
3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3458 {
3459   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3460                                                        (__v8df) __B,
3461                                                        -(__v8df) __C,
3462                                                        (__mmask8) -1, __R);
3463 }
3464
3465 extern __inline __m512d
3466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3467 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3468                                __m512d __C, const int __R)
3469 {
3470   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3471                                                        (__v8df) __B,
3472                                                        -(__v8df) __C,
3473                                                        (__mmask8) __U, __R);
3474 }
3475
3476 extern __inline __m512d
3477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3478 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3479                                 __mmask8 __U, const int __R)
3480 {
3481   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3482                                                         (__v8df) __B,
3483                                                         (__v8df) __C,
3484                                                         (__mmask8) __U, __R);
3485 }
3486
3487 extern __inline __m512d
3488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3489 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3490                                 __m512d __C, const int __R)
3491 {
3492   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3493                                                         (__v8df) __B,
3494                                                         -(__v8df) __C,
3495                                                         (__mmask8) __U, __R);
3496 }
3497
3498 extern __inline __m512
3499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3500 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3501 {
3502   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3503                                                       (__v16sf) __B,
3504                                                       -(__v16sf) __C,
3505                                                       (__mmask16) -1, __R);
3506 }
3507
3508 extern __inline __m512
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3511                                __m512 __C, const int __R)
3512 {
3513   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3514                                                       (__v16sf) __B,
3515                                                       -(__v16sf) __C,
3516                                                       (__mmask16) __U, __R);
3517 }
3518
3519 extern __inline __m512
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3522                                 __mmask16 __U, const int __R)
3523 {
3524   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3525                                                        (__v16sf) __B,
3526                                                        (__v16sf) __C,
3527                                                        (__mmask16) __U, __R);
3528 }
3529
3530 extern __inline __m512
3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3533                                 __m512 __C, const int __R)
3534 {
3535   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3536                                                        (__v16sf) __B,
3537                                                        -(__v16sf) __C,
3538                                                        (__mmask16) __U, __R);
3539 }
3540
3541 extern __inline __m512d
3542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3543 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3544 {
3545   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3546                                                     (__v8df) __B,
3547                                                     (__v8df) __C,
3548                                                     (__mmask8) -1, __R);
3549 }
3550
3551 extern __inline __m512d
3552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3553 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3554                              __m512d __C, const int __R)
3555 {
3556   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3557                                                      (__v8df) __B,
3558                                                      (__v8df) __C,
3559                                                      (__mmask8) __U, __R);
3560 }
3561
3562 extern __inline __m512d
3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3564 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3565                               __mmask8 __U, const int __R)
3566 {
3567   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3568                                                      (__v8df) __B,
3569                                                      (__v8df) __C,
3570                                                      (__mmask8) __U, __R);
3571 }
3572
3573 extern __inline __m512d
3574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3575 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3576                               __m512d __C, const int __R)
3577 {
3578   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3579                                                      (__v8df) __B,
3580                                                      (__v8df) __C,
3581                                                      (__mmask8) __U, __R);
3582 }
3583
3584 extern __inline __m512
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3587 {
3588   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3589                                                    (__v16sf) __B,
3590                                                    (__v16sf) __C,
3591                                                    (__mmask16) -1, __R);
3592 }
3593
3594 extern __inline __m512
3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3596 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3597                              __m512 __C, const int __R)
3598 {
3599   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3600                                                     (__v16sf) __B,
3601                                                     (__v16sf) __C,
3602                                                     (__mmask16) __U, __R);
3603 }
3604
3605 extern __inline __m512
3606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3607 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3608                               __mmask16 __U, const int __R)
3609 {
3610   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3611                                                     (__v16sf) __B,
3612                                                     (__v16sf) __C,
3613                                                     (__mmask16) __U, __R);
3614 }
3615
3616 extern __inline __m512
3617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3618 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3619                               __m512 __C, const int __R)
3620 {
3621   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3622                                                     (__v16sf) __B,
3623                                                     (__v16sf) __C,
3624                                                     (__mmask16) __U, __R);
3625 }
3626
3627 extern __inline __m512d
3628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3629 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3630 {
3631   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3632                                                     (__v8df) __B,
3633                                                     -(__v8df) __C,
3634                                                     (__mmask8) -1, __R);
3635 }
3636
3637 extern __inline __m512d
3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3640                              __m512d __C, const int __R)
3641 {
3642   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3643                                                      (__v8df) __B,
3644                                                      (__v8df) __C,
3645                                                      (__mmask8) __U, __R);
3646 }
3647
3648 extern __inline __m512d
3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3651                               __mmask8 __U, const int __R)
3652 {
3653   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3654                                                       (__v8df) __B,
3655                                                       (__v8df) __C,
3656                                                       (__mmask8) __U, __R);
3657 }
3658
3659 extern __inline __m512d
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3662                               __m512d __C, const int __R)
3663 {
3664   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3665                                                      (__v8df) __B,
3666                                                      -(__v8df) __C,
3667                                                      (__mmask8) __U, __R);
3668 }
3669
3670 extern __inline __m512
3671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3673 {
3674   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3675                                                    (__v16sf) __B,
3676                                                    -(__v16sf) __C,
3677                                                    (__mmask16) -1, __R);
3678 }
3679
3680 extern __inline __m512
3681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3682 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3683                              __m512 __C, const int __R)
3684 {
3685   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3686                                                     (__v16sf) __B,
3687                                                     (__v16sf) __C,
3688                                                     (__mmask16) __U, __R);
3689 }
3690
3691 extern __inline __m512
3692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3693 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3694                               __mmask16 __U, const int __R)
3695 {
3696   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3697                                                      (__v16sf) __B,
3698                                                      (__v16sf) __C,
3699                                                      (__mmask16) __U, __R);
3700 }
3701
3702 extern __inline __m512
3703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3704 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3705                               __m512 __C, const int __R)
3706 {
3707   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3708                                                     (__v16sf) __B,
3709                                                     -(__v16sf) __C,
3710                                                     (__mmask16) __U, __R);
3711 }
3712 #else
3713 #define _mm512_fmadd_round_pd(A, B, C, R)            \
3714     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3715
3716 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
3717     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3718
3719 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
3720     (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3721
3722 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
3723     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3724
3725 #define _mm512_fmadd_round_ps(A, B, C, R)            \
3726     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3727
3728 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
3729     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3730
3731 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
3732     (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3733
3734 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
3735     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3736
3737 #define _mm512_fmsub_round_pd(A, B, C, R)            \
3738     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3739
3740 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
3741     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3742
3743 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
3744     (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3745
3746 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
3747     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3748
3749 #define _mm512_fmsub_round_ps(A, B, C, R)            \
3750     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3751
3752 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
3753     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3754
3755 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
3756     (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3757
3758 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
3759     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3760
3761 #define _mm512_fmaddsub_round_pd(A, B, C, R)            \
3762     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3763
3764 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
3765     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3766
3767 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
3768     (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3769
3770 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
3771     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3772
3773 #define _mm512_fmaddsub_round_ps(A, B, C, R)            \
3774     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3775
3776 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
3777     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3778
3779 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
3780     (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3781
3782 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
3783     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3784
3785 #define _mm512_fmsubadd_round_pd(A, B, C, R)            \
3786     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3787
3788 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
3789     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3790
3791 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
3792     (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3793
3794 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
3795     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3796
3797 #define _mm512_fmsubadd_round_ps(A, B, C, R)            \
3798     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3799
3800 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
3801     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3802
3803 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
3804     (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3805
3806 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
3807     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3808
3809 #define _mm512_fnmadd_round_pd(A, B, C, R)            \
3810     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3811
3812 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
3813     (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3814
3815 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
3816     (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3817
3818 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
3819     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3820
3821 #define _mm512_fnmadd_round_ps(A, B, C, R)            \
3822     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3823
3824 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
3825     (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3826
3827 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
3828     (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3829
3830 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
3831     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3832
3833 #define _mm512_fnmsub_round_pd(A, B, C, R)            \
3834     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3835
3836 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
3837     (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3838
3839 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
3840     (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3841
3842 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
3843     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3844
3845 #define _mm512_fnmsub_round_ps(A, B, C, R)            \
3846     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3847
3848 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
3849     (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3850
3851 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
3852     (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3853
3854 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
3855     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3856 #endif
3857
3858 extern __inline __m512i
3859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3860 _mm512_abs_epi64 (__m512i __A)
3861 {
3862   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3863                                                  (__v8di)
3864                                                  _mm512_undefined_epi32 (),
3865                                                  (__mmask8) -1);
3866 }
3867
3868 extern __inline __m512i
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3871 {
3872   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3873                                                  (__v8di) __W,
3874                                                  (__mmask8) __U);
3875 }
3876
3877 extern __inline __m512i
3878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3879 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3880 {
3881   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3882                                                  (__v8di)
3883                                                  _mm512_setzero_si512 (),
3884                                                  (__mmask8) __U);
3885 }
3886
3887 extern __inline __m512i
3888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3889 _mm512_abs_epi32 (__m512i __A)
3890 {
3891   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3892                                                  (__v16si)
3893                                                  _mm512_undefined_epi32 (),
3894                                                  (__mmask16) -1);
3895 }
3896
3897 extern __inline __m512i
3898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3899 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3900 {
3901   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3902                                                  (__v16si) __W,
3903                                                  (__mmask16) __U);
3904 }
3905
3906 extern __inline __m512i
3907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3908 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3909 {
3910   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3911                                                  (__v16si)
3912                                                  _mm512_setzero_si512 (),
3913                                                  (__mmask16) __U);
3914 }
3915
3916 extern __inline __m512
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm512_broadcastss_ps (__m128 __A)
3919 {
3920   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3921                                                  (__v16sf)
3922                                                  _mm512_undefined_ps (),
3923                                                  (__mmask16) -1);
3924 }
3925
3926 extern __inline __m512
3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3928 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3929 {
3930   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3931                                                  (__v16sf) __O, __M);
3932 }
3933
3934 extern __inline __m512
3935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3937 {
3938   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3939                                                  (__v16sf)
3940                                                  _mm512_setzero_ps (),
3941                                                  __M);
3942 }
3943
3944 extern __inline __m512d
3945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3946 _mm512_broadcastsd_pd (__m128d __A)
3947 {
3948   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3949                                                   (__v8df)
3950                                                   _mm512_undefined_pd (),
3951                                                   (__mmask8) -1);
3952 }
3953
3954 extern __inline __m512d
3955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3956 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3957 {
3958   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3959                                                   (__v8df) __O, __M);
3960 }
3961
3962 extern __inline __m512d
3963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3964 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3965 {
3966   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3967                                                   (__v8df)
3968                                                   _mm512_setzero_pd (),
3969                                                   __M);
3970 }
3971
3972 extern __inline __m512i
3973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974 _mm512_broadcastd_epi32 (__m128i __A)
3975 {
3976   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3977                                                   (__v16si)
3978                                                   _mm512_undefined_epi32 (),
3979                                                   (__mmask16) -1);
3980 }
3981
3982 extern __inline __m512i
3983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3985 {
3986   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3987                                                   (__v16si) __O, __M);
3988 }
3989
3990 extern __inline __m512i
3991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3992 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3993 {
3994   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3995                                                   (__v16si)
3996                                                   _mm512_setzero_si512 (),
3997                                                   __M);
3998 }
3999
4000 extern __inline __m512i
4001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4002 _mm512_set1_epi32 (int __A)
4003 {
4004   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4005                                                            (__v16si)
4006                                                            _mm512_undefined_epi32 (),
4007                                                            (__mmask16)(-1));
4008 }
4009
4010 extern __inline __m512i
4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4013 {
4014   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4015                                                            __M);
4016 }
4017
4018 extern __inline __m512i
4019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4020 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4021 {
4022   return (__m512i)
4023          __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4024                                                  (__v16si) _mm512_setzero_si512 (),
4025                                                  __M);
4026 }
4027
4028 extern __inline __m512i
4029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4030 _mm512_broadcastq_epi64 (__m128i __A)
4031 {
4032   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4033                                                   (__v8di)
4034                                                   _mm512_undefined_epi32 (),
4035                                                   (__mmask8) -1);
4036 }
4037
4038 extern __inline __m512i
4039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4041 {
4042   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4043                                                   (__v8di) __O, __M);
4044 }
4045
4046 extern __inline __m512i
4047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4049 {
4050   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4051                                                   (__v8di)
4052                                                   _mm512_setzero_si512 (),
4053                                                   __M);
4054 }
4055
4056 extern __inline __m512i
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm512_set1_epi64 (long long __A)
4059 {
4060   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4061                                                            (__v8di)
4062                                                            _mm512_undefined_epi32 (),
4063                                                            (__mmask8)(-1));
4064 }
4065
4066 extern __inline __m512i
4067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4069 {
4070   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4071                                                            __M);
4072 }
4073
4074 extern __inline __m512i
4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4077 {
4078   return (__m512i)
4079          __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4080                                                  (__v8di) _mm512_setzero_si512 (),
4081                                                  __M);
4082 }
4083
4084 extern __inline __m512
4085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086 _mm512_broadcast_f32x4 (__m128 __A)
4087 {
4088   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4089                                                      (__v16sf)
4090                                                      _mm512_undefined_ps (),
4091                                                      (__mmask16) -1);
4092 }
4093
4094 extern __inline __m512
4095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4097 {
4098   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4099                                                      (__v16sf) __O,
4100                                                      __M);
4101 }
4102
4103 extern __inline __m512
4104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4105 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4106 {
4107   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4108                                                      (__v16sf)
4109                                                      _mm512_setzero_ps (),
4110                                                      __M);
4111 }
4112
4113 extern __inline __m512i
4114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4115 _mm512_broadcast_i32x4 (__m128i __A)
4116 {
4117   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4118                                                       (__v16si)
4119                                                       _mm512_undefined_epi32 (),
4120                                                       (__mmask16) -1);
4121 }
4122
4123 extern __inline __m512i
4124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4125 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4126 {
4127   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4128                                                       (__v16si) __O,
4129                                                       __M);
4130 }
4131
4132 extern __inline __m512i
4133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4135 {
4136   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4137                                                       (__v16si)
4138                                                       _mm512_setzero_si512 (),
4139                                                       __M);
4140 }
4141
4142 extern __inline __m512d
4143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4144 _mm512_broadcast_f64x4 (__m256d __A)
4145 {
4146   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4147                                                       (__v8df)
4148                                                       _mm512_undefined_pd (),
4149                                                       (__mmask8) -1);
4150 }
4151
4152 extern __inline __m512d
4153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4154 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4155 {
4156   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4157                                                       (__v8df) __O,
4158                                                       __M);
4159 }
4160
4161 extern __inline __m512d
4162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4163 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4164 {
4165   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4166                                                       (__v8df)
4167                                                       _mm512_setzero_pd (),
4168                                                       __M);
4169 }
4170
4171 extern __inline __m512i
4172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173 _mm512_broadcast_i64x4 (__m256i __A)
4174 {
4175   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4176                                                       (__v8di)
4177                                                       _mm512_undefined_epi32 (),
4178                                                       (__mmask8) -1);
4179 }
4180
4181 extern __inline __m512i
4182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4183 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4184 {
4185   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4186                                                       (__v8di) __O,
4187                                                       __M);
4188 }
4189
4190 extern __inline __m512i
4191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4193 {
4194   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4195                                                       (__v8di)
4196                                                       _mm512_setzero_si512 (),
4197                                                       __M);
4198 }
4199
4200 typedef enum
4201 {
4202   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4203   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4204   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4205   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4206   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4207   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4208   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4209   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4210   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4211   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4212   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4213   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4214   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4215   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4216   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4217   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4218   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4219   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4220   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4221   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4222   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4223   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4224   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4225   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4226   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4227   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4228   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4229   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4230   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4231   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4232   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4233   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4234   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4235   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4236   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4237   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4238   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4239   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4240   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4241   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4242   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4243   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4244   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4245   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4246   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4247   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4248   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4249   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4250   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4251   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4252   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4253   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4254   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4255   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4256   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4257   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4258   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4259   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4260   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4261   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4262   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4263   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4264   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4265   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4266   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4267   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4268   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4269   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4270   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4271   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4272   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4273   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4274   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4275   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4276   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4277   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4278   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4279   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4280   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4281   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4282   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4283   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4284   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4285   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4286   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4287   _MM_PERM_DDDD = 0xFF
4288 } _MM_PERM_ENUM;
4289
4290 #ifdef __OPTIMIZE__
4291 extern __inline __m512i
4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4294 {
4295   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4296                                                   __mask,
4297                                                   (__v16si)
4298                                                   _mm512_undefined_epi32 (),
4299                                                   (__mmask16) -1);
4300 }
4301
4302 extern __inline __m512i
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4305                            _MM_PERM_ENUM __mask)
4306 {
4307   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4308                                                   __mask,
4309                                                   (__v16si) __W,
4310                                                   (__mmask16) __U);
4311 }
4312
4313 extern __inline __m512i
4314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4315 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4316 {
4317   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4318                                                   __mask,
4319                                                   (__v16si)
4320                                                   _mm512_setzero_si512 (),
4321                                                   (__mmask16) __U);
4322 }
4323
4324 extern __inline __m512i
4325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4326 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4327 {
4328   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4329                                                    (__v8di) __B, __imm,
4330                                                    (__v8di)
4331                                                    _mm512_undefined_epi32 (),
4332                                                    (__mmask8) -1);
4333 }
4334
4335 extern __inline __m512i
4336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4337 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4338                            __m512i __B, const int __imm)
4339 {
4340   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4341                                                    (__v8di) __B, __imm,
4342                                                    (__v8di) __W,
4343                                                    (__mmask8) __U);
4344 }
4345
4346 extern __inline __m512i
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4349                             const int __imm)
4350 {
4351   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4352                                                    (__v8di) __B, __imm,
4353                                                    (__v8di)
4354                                                    _mm512_setzero_si512 (),
4355                                                    (__mmask8) __U);
4356 }
4357
4358 extern __inline __m512i
4359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4360 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4361 {
4362   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4363                                                    (__v16si) __B,
4364                                                    __imm,
4365                                                    (__v16si)
4366                                                    _mm512_undefined_epi32 (),
4367                                                    (__mmask16) -1);
4368 }
4369
4370 extern __inline __m512i
4371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4372 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4373                            __m512i __B, const int __imm)
4374 {
4375   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4376                                                    (__v16si) __B,
4377                                                    __imm,
4378                                                    (__v16si) __W,
4379                                                    (__mmask16) __U);
4380 }
4381
4382 extern __inline __m512i
4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4385                             const int __imm)
4386 {
4387   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4388                                                    (__v16si) __B,
4389                                                    __imm,
4390                                                    (__v16si)
4391                                                    _mm512_setzero_si512 (),
4392                                                    (__mmask16) __U);
4393 }
4394
4395 extern __inline __m512d
4396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4397 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4398 {
4399   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4400                                                    (__v8df) __B, __imm,
4401                                                    (__v8df)
4402                                                    _mm512_undefined_pd (),
4403                                                    (__mmask8) -1);
4404 }
4405
4406 extern __inline __m512d
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4409                            __m512d __B, const int __imm)
4410 {
4411   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4412                                                    (__v8df) __B, __imm,
4413                                                    (__v8df) __W,
4414                                                    (__mmask8) __U);
4415 }
4416
4417 extern __inline __m512d
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4420                             const int __imm)
4421 {
4422   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4423                                                    (__v8df) __B, __imm,
4424                                                    (__v8df)
4425                                                    _mm512_setzero_pd (),
4426                                                    (__mmask8) __U);
4427 }
4428
4429 extern __inline __m512
4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4432 {
4433   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4434                                                   (__v16sf) __B, __imm,
4435                                                   (__v16sf)
4436                                                   _mm512_undefined_ps (),
4437                                                   (__mmask16) -1);
4438 }
4439
4440 extern __inline __m512
4441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4442 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4443                            __m512 __B, const int __imm)
4444 {
4445   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4446                                                   (__v16sf) __B, __imm,
4447                                                   (__v16sf) __W,
4448                                                   (__mmask16) __U);
4449 }
4450
4451 extern __inline __m512
4452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4454                             const int __imm)
4455 {
4456   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4457                                                   (__v16sf) __B, __imm,
4458                                                   (__v16sf)
4459                                                   _mm512_setzero_ps (),
4460                                                   (__mmask16) __U);
4461 }
4462
4463 #else
4464 #define _mm512_shuffle_epi32(X, C)                                      \
4465   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4466     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4467     (__mmask16)-1))
4468
4469 #define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
4470   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4471     (__v16si)(__m512i)(W),\
4472     (__mmask16)(U)))
4473
4474 #define _mm512_maskz_shuffle_epi32(U, X, C)                             \
4475   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4476     (__v16si)(__m512i)_mm512_setzero_si512 (),\
4477     (__mmask16)(U)))
4478
4479 #define _mm512_shuffle_i64x2(X, Y, C)                                   \
4480   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4481       (__v8di)(__m512i)(Y), (int)(C),\
4482     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4483     (__mmask8)-1))
4484
4485 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
4486   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4487       (__v8di)(__m512i)(Y), (int)(C),\
4488     (__v8di)(__m512i)(W),\
4489     (__mmask8)(U)))
4490
4491 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
4492   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4493       (__v8di)(__m512i)(Y), (int)(C),\
4494     (__v8di)(__m512i)_mm512_setzero_si512 (),\
4495     (__mmask8)(U)))
4496
4497 #define _mm512_shuffle_i32x4(X, Y, C)                                   \
4498   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4499       (__v16si)(__m512i)(Y), (int)(C),\
4500     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4501     (__mmask16)-1))
4502
4503 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
4504   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4505       (__v16si)(__m512i)(Y), (int)(C),\
4506     (__v16si)(__m512i)(W),\
4507     (__mmask16)(U)))
4508
4509 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
4510   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4511       (__v16si)(__m512i)(Y), (int)(C),\
4512     (__v16si)(__m512i)_mm512_setzero_si512 (),\
4513     (__mmask16)(U)))
4514
4515 #define _mm512_shuffle_f64x2(X, Y, C)                                   \
4516   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4517       (__v8df)(__m512d)(Y), (int)(C),\
4518     (__v8df)(__m512d)_mm512_undefined_pd(),\
4519     (__mmask8)-1))
4520
4521 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
4522   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4523       (__v8df)(__m512d)(Y), (int)(C),\
4524     (__v8df)(__m512d)(W),\
4525     (__mmask8)(U)))
4526
4527 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
4528   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
4529       (__v8df)(__m512d)(Y), (int)(C),\
4530     (__v8df)(__m512d)_mm512_setzero_pd(),\
4531     (__mmask8)(U)))
4532
4533 #define _mm512_shuffle_f32x4(X, Y, C)                                  \
4534   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4535       (__v16sf)(__m512)(Y), (int)(C),\
4536     (__v16sf)(__m512)_mm512_undefined_ps(),\
4537     (__mmask16)-1))
4538
4539 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
4540   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4541       (__v16sf)(__m512)(Y), (int)(C),\
4542     (__v16sf)(__m512)(W),\
4543     (__mmask16)(U)))
4544
4545 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
4546   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4547       (__v16sf)(__m512)(Y), (int)(C),\
4548     (__v16sf)(__m512)_mm512_setzero_ps(),\
4549     (__mmask16)(U)))
4550 #endif
4551
4552 extern __inline __m512i
4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4554 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4555 {
4556   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4557                                                   (__v16si) __B,
4558                                                   (__v16si)
4559                                                   _mm512_undefined_epi32 (),
4560                                                   (__mmask16) -1);
4561 }
4562
4563 extern __inline __m512i
4564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4565 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4566 {
4567   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4568                                                   (__v16si) __B,
4569                                                   (__v16si) __W,
4570                                                   (__mmask16) __U);
4571 }
4572
4573 extern __inline __m512i
4574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4575 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4576 {
4577   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4578                                                   (__v16si) __B,
4579                                                   (__v16si)
4580                                                   _mm512_setzero_si512 (),
4581                                                   (__mmask16) __U);
4582 }
4583
4584 extern __inline __m512i
4585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4586 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4587 {
4588   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4589                                                   (__v16si) __B,
4590                                                   (__v16si)
4591                                                   _mm512_undefined_epi32 (),
4592                                                   (__mmask16) -1);
4593 }
4594
4595 extern __inline __m512i
4596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4597 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4598 {
4599   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4600                                                   (__v16si) __B,
4601                                                   (__v16si) __W,
4602                                                   (__mmask16) __U);
4603 }
4604
4605 extern __inline __m512i
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4608 {
4609   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4610                                                   (__v16si) __B,
4611                                                   (__v16si)
4612                                                   _mm512_setzero_si512 (),
4613                                                   (__mmask16) __U);
4614 }
4615
4616 extern __inline __m512i
4617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4619 {
4620   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4621                                                   (__v8di) __B,
4622                                                   (__v8di)
4623                                                   _mm512_undefined_epi32 (),
4624                                                   (__mmask8) -1);
4625 }
4626
4627 extern __inline __m512i
4628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4629 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4630 {
4631   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4632                                                   (__v8di) __B,
4633                                                   (__v8di) __W,
4634                                                   (__mmask8) __U);
4635 }
4636
4637 extern __inline __m512i
4638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4639 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4640 {
4641   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4642                                                   (__v8di) __B,
4643                                                   (__v8di)
4644                                                   _mm512_setzero_si512 (),
4645                                                   (__mmask8) __U);
4646 }
4647
4648 extern __inline __m512i
4649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4650 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4651 {
4652   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4653                                                   (__v8di) __B,
4654                                                   (__v8di)
4655                                                   _mm512_undefined_epi32 (),
4656                                                   (__mmask8) -1);
4657 }
4658
4659 extern __inline __m512i
4660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4661 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4662 {
4663   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4664                                                   (__v8di) __B,
4665                                                   (__v8di) __W,
4666                                                   (__mmask8) __U);
4667 }
4668
4669 extern __inline __m512i
4670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4671 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4672 {
4673   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4674                                                   (__v8di) __B,
4675                                                   (__v8di)
4676                                                   _mm512_setzero_si512 (),
4677                                                   (__mmask8) __U);
4678 }
4679
4680 #ifdef __OPTIMIZE__
4681 extern __inline __m256i
4682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4683 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4684 {
4685   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4686                                                      (__v8si)
4687                                                      _mm256_undefined_si256 (),
4688                                                      (__mmask8) -1, __R);
4689 }
4690
4691 extern __inline __m256i
4692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4693 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4694                                 const int __R)
4695 {
4696   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4697                                                      (__v8si) __W,
4698                                                      (__mmask8) __U, __R);
4699 }
4700
4701 extern __inline __m256i
4702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4703 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4704 {
4705   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4706                                                      (__v8si)
4707                                                      _mm256_setzero_si256 (),
4708                                                      (__mmask8) __U, __R);
4709 }
4710
4711 extern __inline __m256i
4712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4713 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4714 {
4715   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4716                                                       (__v8si)
4717                                                       _mm256_undefined_si256 (),
4718                                                       (__mmask8) -1, __R);
4719 }
4720
4721 extern __inline __m256i
4722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4723 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4724                                 const int __R)
4725 {
4726   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4727                                                       (__v8si) __W,
4728                                                       (__mmask8) __U, __R);
4729 }
4730
4731 extern __inline __m256i
4732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4733 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4734 {
4735   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4736                                                       (__v8si)
4737                                                       _mm256_setzero_si256 (),
4738                                                       (__mmask8) __U, __R);
4739 }
4740 #else
4741 #define _mm512_cvtt_roundpd_epi32(A, B)              \
4742     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4743
4744 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
4745     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4746
4747 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
4748     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4749
4750 #define _mm512_cvtt_roundpd_epu32(A, B)              \
4751     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4752
4753 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
4754     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4755
4756 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
4757     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4758 #endif
4759
4760 #ifdef __OPTIMIZE__
4761 extern __inline __m256i
4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4764 {
4765   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4766                                                     (__v8si)
4767                                                     _mm256_undefined_si256 (),
4768                                                     (__mmask8) -1, __R);
4769 }
4770
4771 extern __inline __m256i
4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4774                                const int __R)
4775 {
4776   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4777                                                     (__v8si) __W,
4778                                                     (__mmask8) __U, __R);
4779 }
4780
4781 extern __inline __m256i
4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4784 {
4785   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4786                                                     (__v8si)
4787                                                     _mm256_setzero_si256 (),
4788                                                     (__mmask8) __U, __R);
4789 }
4790
4791 extern __inline __m256i
4792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4793 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4794 {
4795   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4796                                                      (__v8si)
4797                                                      _mm256_undefined_si256 (),
4798                                                      (__mmask8) -1, __R);
4799 }
4800
4801 extern __inline __m256i
4802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4803 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4804                                const int __R)
4805 {
4806   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4807                                                      (__v8si) __W,
4808                                                      (__mmask8) __U, __R);
4809 }
4810
4811 extern __inline __m256i
4812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4813 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4814 {
4815   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4816                                                      (__v8si)
4817                                                      _mm256_setzero_si256 (),
4818                                                      (__mmask8) __U, __R);
4819 }
4820 #else
4821 #define _mm512_cvt_roundpd_epi32(A, B)              \
4822     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4823
4824 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
4825     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4826
4827 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
4828     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4829
4830 #define _mm512_cvt_roundpd_epu32(A, B)              \
4831     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4832
4833 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
4834     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4835
4836 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
4837     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4838 #endif
4839
4840 #ifdef __OPTIMIZE__
4841 extern __inline __m512i
4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4844 {
4845   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4846                                                      (__v16si)
4847                                                      _mm512_undefined_epi32 (),
4848                                                      (__mmask16) -1, __R);
4849 }
4850
4851 extern __inline __m512i
4852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4853 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4854                                 const int __R)
4855 {
4856   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4857                                                      (__v16si) __W,
4858                                                      (__mmask16) __U, __R);
4859 }
4860
4861 extern __inline __m512i
4862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4863 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4864 {
4865   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4866                                                      (__v16si)
4867                                                      _mm512_setzero_si512 (),
4868                                                      (__mmask16) __U, __R);
4869 }
4870
4871 extern __inline __m512i
4872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4873 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4874 {
4875   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4876                                                       (__v16si)
4877                                                       _mm512_undefined_epi32 (),
4878                                                       (__mmask16) -1, __R);
4879 }
4880
4881 extern __inline __m512i
4882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4883 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4884                                 const int __R)
4885 {
4886   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4887                                                       (__v16si) __W,
4888                                                       (__mmask16) __U, __R);
4889 }
4890
4891 extern __inline __m512i
4892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4893 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4894 {
4895   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4896                                                       (__v16si)
4897                                                       _mm512_setzero_si512 (),
4898                                                       (__mmask16) __U, __R);
4899 }
4900 #else
4901 #define _mm512_cvtt_roundps_epi32(A, B)              \
4902     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4903
4904 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
4905     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4906
4907 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
4908     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4909
4910 #define _mm512_cvtt_roundps_epu32(A, B)              \
4911     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4912
4913 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
4914     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4915
4916 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
4917     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4918 #endif
4919
4920 #ifdef __OPTIMIZE__
4921 extern __inline __m512i
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4924 {
4925   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4926                                                     (__v16si)
4927                                                     _mm512_undefined_epi32 (),
4928                                                     (__mmask16) -1, __R);
4929 }
4930
4931 extern __inline __m512i
4932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4933 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4934                                const int __R)
4935 {
4936   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4937                                                     (__v16si) __W,
4938                                                     (__mmask16) __U, __R);
4939 }
4940
4941 extern __inline __m512i
4942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4943 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4944 {
4945   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4946                                                     (__v16si)
4947                                                     _mm512_setzero_si512 (),
4948                                                     (__mmask16) __U, __R);
4949 }
4950
4951 extern __inline __m512i
4952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4953 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4954 {
4955   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4956                                                      (__v16si)
4957                                                      _mm512_undefined_epi32 (),
4958                                                      (__mmask16) -1, __R);
4959 }
4960
4961 extern __inline __m512i
4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4964                                const int __R)
4965 {
4966   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4967                                                      (__v16si) __W,
4968                                                      (__mmask16) __U, __R);
4969 }
4970
4971 extern __inline __m512i
4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4974 {
4975   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4976                                                      (__v16si)
4977                                                      _mm512_setzero_si512 (),
4978                                                      (__mmask16) __U, __R);
4979 }
4980 #else
4981 #define _mm512_cvt_roundps_epi32(A, B)              \
4982     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4983
4984 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
4985     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4986
4987 #define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
4988     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4989
4990 #define _mm512_cvt_roundps_epu32(A, B)              \
4991     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4992
4993 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
4994     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4995
4996 #define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
4997     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4998 #endif
4999
5000 extern __inline __m128d
5001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5002 _mm_cvtu32_sd (__m128d __A, unsigned __B)
5003 {
5004   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5005 }
5006
5007 #ifdef __x86_64__
5008 #ifdef __OPTIMIZE__
5009 extern __inline __m128d
5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5012 {
5013   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5014 }
5015
5016 extern __inline __m128d
5017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5018 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5019 {
5020   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5021 }
5022
5023 extern __inline __m128d
5024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5026 {
5027   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5028 }
5029 #else
5030 #define _mm_cvt_roundu64_sd(A, B, C)   \
5031     (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5032
5033 #define _mm_cvt_roundi64_sd(A, B, C)   \
5034     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5035
5036 #define _mm_cvt_roundsi64_sd(A, B, C)   \
5037     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5038 #endif
5039
5040 #endif
5041
5042 #ifdef __OPTIMIZE__
5043 extern __inline __m128
5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5046 {
5047   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5048 }
5049
5050 extern __inline __m128
5051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5052 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5053 {
5054   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5055 }
5056
5057 extern __inline __m128
5058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5059 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5060 {
5061   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5062 }
5063 #else
5064 #define _mm_cvt_roundu32_ss(A, B, C)   \
5065     (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5066
5067 #define _mm_cvt_roundi32_ss(A, B, C)   \
5068     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5069
5070 #define _mm_cvt_roundsi32_ss(A, B, C)   \
5071     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5072 #endif
5073
5074 #ifdef __x86_64__
5075 #ifdef __OPTIMIZE__
5076 extern __inline __m128
5077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5078 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5079 {
5080   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5081 }
5082
5083 extern __inline __m128
5084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5085 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5086 {
5087   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5088 }
5089
5090 extern __inline __m128
5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5093 {
5094   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5095 }
5096 #else
5097 #define _mm_cvt_roundu64_ss(A, B, C)   \
5098     (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5099
5100 #define _mm_cvt_roundi64_ss(A, B, C)   \
5101     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5102
5103 #define _mm_cvt_roundsi64_ss(A, B, C)   \
5104     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5105 #endif
5106
5107 #endif
5108
5109 extern __inline __m128i
5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111 _mm512_cvtepi32_epi8 (__m512i __A)
5112 {
5113   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5114                                                   (__v16qi)
5115                                                   _mm_undefined_si128 (),
5116                                                   (__mmask16) -1);
5117 }
5118
5119 extern __inline void
5120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5122 {
5123   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5124 }
5125
5126 extern __inline __m128i
5127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5128 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5129 {
5130   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5131                                                   (__v16qi) __O, __M);
5132 }
5133
5134 extern __inline __m128i
5135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5136 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5137 {
5138   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5139                                                   (__v16qi)
5140                                                   _mm_setzero_si128 (),
5141                                                   __M);
5142 }
5143
5144 extern __inline __m128i
5145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5146 _mm512_cvtsepi32_epi8 (__m512i __A)
5147 {
5148   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5149                                                    (__v16qi)
5150                                                    _mm_undefined_si128 (),
5151                                                    (__mmask16) -1);
5152 }
5153
5154 extern __inline void
5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5157 {
5158   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5159 }
5160
5161 extern __inline __m128i
5162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5163 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5164 {
5165   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5166                                                    (__v16qi) __O, __M);
5167 }
5168
5169 extern __inline __m128i
5170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5171 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5172 {
5173   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5174                                                    (__v16qi)
5175                                                    _mm_setzero_si128 (),
5176                                                    __M);
5177 }
5178
5179 extern __inline __m128i
5180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5181 _mm512_cvtusepi32_epi8 (__m512i __A)
5182 {
5183   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5184                                                     (__v16qi)
5185                                                     _mm_undefined_si128 (),
5186                                                     (__mmask16) -1);
5187 }
5188
5189 extern __inline void
5190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5191 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5192 {
5193   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5194 }
5195
5196 extern __inline __m128i
5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5199 {
5200   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5201                                                     (__v16qi) __O,
5202                                                     __M);
5203 }
5204
5205 extern __inline __m128i
5206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5207 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5208 {
5209   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5210                                                     (__v16qi)
5211                                                     _mm_setzero_si128 (),
5212                                                     __M);
5213 }
5214
5215 extern __inline __m256i
5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217 _mm512_cvtepi32_epi16 (__m512i __A)
5218 {
5219   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5220                                                   (__v16hi)
5221                                                   _mm256_undefined_si256 (),
5222                                                   (__mmask16) -1);
5223 }
5224
5225 extern __inline void
5226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5227 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5228 {
5229   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5230 }
5231
5232 extern __inline __m256i
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5235 {
5236   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5237                                                   (__v16hi) __O, __M);
5238 }
5239
5240 extern __inline __m256i
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5243 {
5244   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5245                                                   (__v16hi)
5246                                                   _mm256_setzero_si256 (),
5247                                                   __M);
5248 }
5249
5250 extern __inline __m256i
5251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252 _mm512_cvtsepi32_epi16 (__m512i __A)
5253 {
5254   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5255                                                    (__v16hi)
5256                                                    _mm256_undefined_si256 (),
5257                                                    (__mmask16) -1);
5258 }
5259
5260 extern __inline void
5261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5262 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5263 {
5264   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5265 }
5266
5267 extern __inline __m256i
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5270 {
5271   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5272                                                    (__v16hi) __O, __M);
5273 }
5274
5275 extern __inline __m256i
5276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5277 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5278 {
5279   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5280                                                    (__v16hi)
5281                                                    _mm256_setzero_si256 (),
5282                                                    __M);
5283 }
5284
5285 extern __inline __m256i
5286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5287 _mm512_cvtusepi32_epi16 (__m512i __A)
5288 {
5289   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5290                                                     (__v16hi)
5291                                                     _mm256_undefined_si256 (),
5292                                                     (__mmask16) -1);
5293 }
5294
5295 extern __inline void
5296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5297 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5298 {
5299   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5300 }
5301
5302 extern __inline __m256i
5303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5304 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5305 {
5306   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5307                                                     (__v16hi) __O,
5308                                                     __M);
5309 }
5310
5311 extern __inline __m256i
5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5314 {
5315   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5316                                                     (__v16hi)
5317                                                     _mm256_setzero_si256 (),
5318                                                     __M);
5319 }
5320
5321 extern __inline __m256i
5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323 _mm512_cvtepi64_epi32 (__m512i __A)
5324 {
5325   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5326                                                   (__v8si)
5327                                                   _mm256_undefined_si256 (),
5328                                                   (__mmask8) -1);
5329 }
5330
5331 extern __inline void
5332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5333 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5334 {
5335   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5336 }
5337
5338 extern __inline __m256i
5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5341 {
5342   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5343                                                   (__v8si) __O, __M);
5344 }
5345
5346 extern __inline __m256i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5349 {
5350   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5351                                                   (__v8si)
5352                                                   _mm256_setzero_si256 (),
5353                                                   __M);
5354 }
5355
5356 extern __inline __m256i
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_cvtsepi64_epi32 (__m512i __A)
5359 {
5360   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5361                                                    (__v8si)
5362                                                    _mm256_undefined_si256 (),
5363                                                    (__mmask8) -1);
5364 }
5365
5366 extern __inline void
5367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5369 {
5370   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5371 }
5372
5373 extern __inline __m256i
5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5376 {
5377   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5378                                                    (__v8si) __O, __M);
5379 }
5380
5381 extern __inline __m256i
5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5384 {
5385   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5386                                                    (__v8si)
5387                                                    _mm256_setzero_si256 (),
5388                                                    __M);
5389 }
5390
5391 extern __inline __m256i
5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393 _mm512_cvtusepi64_epi32 (__m512i __A)
5394 {
5395   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5396                                                     (__v8si)
5397                                                     _mm256_undefined_si256 (),
5398                                                     (__mmask8) -1);
5399 }
5400
5401 extern __inline void
5402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5404 {
5405   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5406 }
5407
5408 extern __inline __m256i
5409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5410 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5411 {
5412   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5413                                                     (__v8si) __O, __M);
5414 }
5415
5416 extern __inline __m256i
5417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5419 {
5420   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5421                                                     (__v8si)
5422                                                     _mm256_setzero_si256 (),
5423                                                     __M);
5424 }
5425
5426 extern __inline __m128i
5427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5428 _mm512_cvtepi64_epi16 (__m512i __A)
5429 {
5430   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5431                                                   (__v8hi)
5432                                                   _mm_undefined_si128 (),
5433                                                   (__mmask8) -1);
5434 }
5435
5436 extern __inline void
5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5438 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5439 {
5440   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5441 }
5442
5443 extern __inline __m128i
5444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5445 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5446 {
5447   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5448                                                   (__v8hi) __O, __M);
5449 }
5450
5451 extern __inline __m128i
5452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5454 {
5455   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5456                                                   (__v8hi)
5457                                                   _mm_setzero_si128 (),
5458                                                   __M);
5459 }
5460
5461 extern __inline __m128i
5462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5463 _mm512_cvtsepi64_epi16 (__m512i __A)
5464 {
5465   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5466                                                    (__v8hi)
5467                                                    _mm_undefined_si128 (),
5468                                                    (__mmask8) -1);
5469 }
5470
5471 extern __inline void
5472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5473 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5474 {
5475   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5476 }
5477
5478 extern __inline __m128i
5479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5480 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5481 {
5482   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5483                                                    (__v8hi) __O, __M);
5484 }
5485
5486 extern __inline __m128i
5487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5489 {
5490   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5491                                                    (__v8hi)
5492                                                    _mm_setzero_si128 (),
5493                                                    __M);
5494 }
5495
5496 extern __inline __m128i
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm512_cvtusepi64_epi16 (__m512i __A)
5499 {
5500   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5501                                                     (__v8hi)
5502                                                     _mm_undefined_si128 (),
5503                                                     (__mmask8) -1);
5504 }
5505
5506 extern __inline void
5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5509 {
5510   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5511 }
5512
5513 extern __inline __m128i
5514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5515 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5516 {
5517   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5518                                                     (__v8hi) __O, __M);
5519 }
5520
5521 extern __inline __m128i
5522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5523 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5524 {
5525   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5526                                                     (__v8hi)
5527                                                     _mm_setzero_si128 (),
5528                                                     __M);
5529 }
5530
5531 extern __inline __m128i
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533 _mm512_cvtepi64_epi8 (__m512i __A)
5534 {
5535   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5536                                                   (__v16qi)
5537                                                   _mm_undefined_si128 (),
5538                                                   (__mmask8) -1);
5539 }
5540
5541 extern __inline void
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5544 {
5545   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5546 }
5547
5548 extern __inline __m128i
5549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5550 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5551 {
5552   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5553                                                   (__v16qi) __O, __M);
5554 }
5555
5556 extern __inline __m128i
5557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5558 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5559 {
5560   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5561                                                   (__v16qi)
5562                                                   _mm_setzero_si128 (),
5563                                                   __M);
5564 }
5565
5566 extern __inline __m128i
5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568 _mm512_cvtsepi64_epi8 (__m512i __A)
5569 {
5570   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5571                                                    (__v16qi)
5572                                                    _mm_undefined_si128 (),
5573                                                    (__mmask8) -1);
5574 }
5575
5576 extern __inline void
5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5578 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5579 {
5580   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5581 }
5582
5583 extern __inline __m128i
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5586 {
5587   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5588                                                    (__v16qi) __O, __M);
5589 }
5590
5591 extern __inline __m128i
5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5594 {
5595   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5596                                                    (__v16qi)
5597                                                    _mm_setzero_si128 (),
5598                                                    __M);
5599 }
5600
5601 extern __inline __m128i
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm512_cvtusepi64_epi8 (__m512i __A)
5604 {
5605   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5606                                                     (__v16qi)
5607                                                     _mm_undefined_si128 (),
5608                                                     (__mmask8) -1);
5609 }
5610
5611 extern __inline void
5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5614 {
5615   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5616 }
5617
5618 extern __inline __m128i
5619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5620 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5621 {
5622   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5623                                                     (__v16qi) __O,
5624                                                     __M);
5625 }
5626
5627 extern __inline __m128i
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5630 {
5631   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5632                                                     (__v16qi)
5633                                                     _mm_setzero_si128 (),
5634                                                     __M);
5635 }
5636
5637 extern __inline __m512d
5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639 _mm512_cvtepi32_pd (__m256i __A)
5640 {
5641   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5642                                                     (__v8df)
5643                                                     _mm512_undefined_pd (),
5644                                                     (__mmask8) -1);
5645 }
5646
5647 extern __inline __m512d
5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5649 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5650 {
5651   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5652                                                     (__v8df) __W,
5653                                                     (__mmask8) __U);
5654 }
5655
5656 extern __inline __m512d
5657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5658 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5659 {
5660   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5661                                                     (__v8df)
5662                                                     _mm512_setzero_pd (),
5663                                                     (__mmask8) __U);
5664 }
5665
5666 extern __inline __m512d
5667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5668 _mm512_cvtepu32_pd (__m256i __A)
5669 {
5670   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5671                                                      (__v8df)
5672                                                      _mm512_undefined_pd (),
5673                                                      (__mmask8) -1);
5674 }
5675
5676 extern __inline __m512d
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5679 {
5680   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5681                                                      (__v8df) __W,
5682                                                      (__mmask8) __U);
5683 }
5684
5685 extern __inline __m512d
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5688 {
5689   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5690                                                      (__v8df)
5691                                                      _mm512_setzero_pd (),
5692                                                      (__mmask8) __U);
5693 }
5694
5695 #ifdef __OPTIMIZE__
5696 extern __inline __m512
5697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5698 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5699 {
5700   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5701                                                    (__v16sf)
5702                                                    _mm512_undefined_ps (),
5703                                                    (__mmask16) -1, __R);
5704 }
5705
5706 extern __inline __m512
5707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5708 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5709                                const int __R)
5710 {
5711   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5712                                                    (__v16sf) __W,
5713                                                    (__mmask16) __U, __R);
5714 }
5715
5716 extern __inline __m512
5717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5718 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5719 {
5720   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5721                                                    (__v16sf)
5722                                                    _mm512_setzero_ps (),
5723                                                    (__mmask16) __U, __R);
5724 }
5725
5726 extern __inline __m512
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5729 {
5730   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5731                                                     (__v16sf)
5732                                                     _mm512_undefined_ps (),
5733                                                     (__mmask16) -1, __R);
5734 }
5735
5736 extern __inline __m512
5737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5738 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5739                                const int __R)
5740 {
5741   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5742                                                     (__v16sf) __W,
5743                                                     (__mmask16) __U, __R);
5744 }
5745
5746 extern __inline __m512
5747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5748 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5749 {
5750   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5751                                                     (__v16sf)
5752                                                     _mm512_setzero_ps (),
5753                                                     (__mmask16) __U, __R);
5754 }
5755
5756 #else
5757 #define _mm512_cvt_roundepi32_ps(A, B)        \
5758     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5759
5760 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
5761     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5762
5763 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
5764     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5765
5766 #define _mm512_cvt_roundepu32_ps(A, B)        \
5767     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5768
5769 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
5770     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5771
5772 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
5773     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5774 #endif
5775
5776 #ifdef __OPTIMIZE__
5777 extern __inline __m256d
5778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5779 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5780 {
5781   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5782                                                      __imm,
5783                                                      (__v4df)
5784                                                      _mm256_undefined_pd (),
5785                                                      (__mmask8) -1);
5786 }
5787
5788 extern __inline __m256d
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5791                              const int __imm)
5792 {
5793   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5794                                                      __imm,
5795                                                      (__v4df) __W,
5796                                                      (__mmask8) __U);
5797 }
5798
5799 extern __inline __m256d
5800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5801 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5802 {
5803   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5804                                                      __imm,
5805                                                      (__v4df)
5806                                                      _mm256_setzero_pd (),
5807                                                      (__mmask8) __U);
5808 }
5809
5810 extern __inline __m128
5811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5812 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5813 {
5814   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5815                                                     __imm,
5816                                                     (__v4sf)
5817                                                     _mm_undefined_ps (),
5818                                                     (__mmask8) -1);
5819 }
5820
5821 extern __inline __m128
5822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5823 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5824                              const int __imm)
5825 {
5826   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5827                                                     __imm,
5828                                                     (__v4sf) __W,
5829                                                     (__mmask8) __U);
5830 }
5831
5832 extern __inline __m128
5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5835 {
5836   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5837                                                     __imm,
5838                                                     (__v4sf)
5839                                                     _mm_setzero_ps (),
5840                                                     (__mmask8) __U);
5841 }
5842
5843 extern __inline __m256i
5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5846 {
5847   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5848                                                      __imm,
5849                                                      (__v4di)
5850                                                      _mm256_undefined_si256 (),
5851                                                      (__mmask8) -1);
5852 }
5853
5854 extern __inline __m256i
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5857                                 const int __imm)
5858 {
5859   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5860                                                      __imm,
5861                                                      (__v4di) __W,
5862                                                      (__mmask8) __U);
5863 }
5864
5865 extern __inline __m256i
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5868 {
5869   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5870                                                      __imm,
5871                                                      (__v4di)
5872                                                      _mm256_setzero_si256 (),
5873                                                      (__mmask8) __U);
5874 }
5875
5876 extern __inline __m128i
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5879 {
5880   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5881                                                      __imm,
5882                                                      (__v4si)
5883                                                      _mm_undefined_si128 (),
5884                                                      (__mmask8) -1);
5885 }
5886
5887 extern __inline __m128i
5888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5889 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5890                                 const int __imm)
5891 {
5892   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5893                                                      __imm,
5894                                                      (__v4si) __W,
5895                                                      (__mmask8) __U);
5896 }
5897
5898 extern __inline __m128i
5899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5900 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5901 {
5902   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5903                                                      __imm,
5904                                                      (__v4si)
5905                                                      _mm_setzero_si128 (),
5906                                                      (__mmask8) __U);
5907 }
5908 #else
5909
5910 #define _mm512_extractf64x4_pd(X, C)                                    \
5911   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5912     (int) (C),\
5913     (__v4df)(__m256d)_mm256_undefined_pd(),\
5914     (__mmask8)-1))
5915
5916 #define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
5917   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5918     (int) (C),\
5919     (__v4df)(__m256d)(W),\
5920     (__mmask8)(U)))
5921
5922 #define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
5923   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5924     (int) (C),\
5925     (__v4df)(__m256d)_mm256_setzero_pd(),\
5926     (__mmask8)(U)))
5927
5928 #define _mm512_extractf32x4_ps(X, C)                                    \
5929   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5930     (int) (C),\
5931     (__v4sf)(__m128)_mm_undefined_ps(),\
5932     (__mmask8)-1))
5933
5934 #define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
5935   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5936     (int) (C),\
5937     (__v4sf)(__m128)(W),\
5938     (__mmask8)(U)))
5939
5940 #define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
5941   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5942     (int) (C),\
5943     (__v4sf)(__m128)_mm_setzero_ps(),\
5944     (__mmask8)(U)))
5945
5946 #define _mm512_extracti64x4_epi64(X, C)                                 \
5947   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5948     (int) (C),\
5949     (__v4di)(__m256i)_mm256_undefined_si256 (),\
5950     (__mmask8)-1))
5951
5952 #define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
5953   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5954     (int) (C),\
5955     (__v4di)(__m256i)(W),\
5956     (__mmask8)(U)))
5957
5958 #define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
5959   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5960     (int) (C),\
5961     (__v4di)(__m256i)_mm256_setzero_si256 (),\
5962     (__mmask8)(U)))
5963
5964 #define _mm512_extracti32x4_epi32(X, C)                                 \
5965   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5966     (int) (C),\
5967     (__v4si)(__m128i)_mm_undefined_si128 (),\
5968     (__mmask8)-1))
5969
5970 #define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
5971   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5972     (int) (C),\
5973     (__v4si)(__m128i)(W),\
5974     (__mmask8)(U)))
5975
5976 #define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
5977   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5978     (int) (C),\
5979     (__v4si)(__m128i)_mm_setzero_si128 (),\
5980     (__mmask8)(U)))
5981 #endif
5982
5983 #ifdef __OPTIMIZE__
5984 extern __inline __m512i
5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5987 {
5988   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5989                                                     (__v4si) __B,
5990                                                     __imm,
5991                                                     (__v16si) __A, -1);
5992 }
5993
5994 extern __inline __m512
5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5997 {
5998   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5999                                                    (__v4sf) __B,
6000                                                    __imm,
6001                                                    (__v16sf) __A, -1);
6002 }
6003
6004 extern __inline __m512i
6005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6006 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6007 {
6008   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6009                                                     (__v4di) __B,
6010                                                     __imm,
6011                                                     (__v8di)
6012                                                     _mm512_undefined_epi32 (),
6013                                                     (__mmask8) -1);
6014 }
6015
6016 extern __inline __m512i
6017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6018 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6019                          __m256i __B, const int __imm)
6020 {
6021   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6022                                                     (__v4di) __B,
6023                                                     __imm,
6024                                                     (__v8di) __W,
6025                                                     (__mmask8) __U);
6026 }
6027
6028 extern __inline __m512i
6029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6030 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6031                           const int __imm)
6032 {
6033   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6034                                                     (__v4di) __B,
6035                                                     __imm,
6036                                                     (__v8di)
6037                                                     _mm512_setzero_si512 (),
6038                                                     (__mmask8) __U);
6039 }
6040
6041 extern __inline __m512d
6042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6043 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6044 {
6045   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6046                                                     (__v4df) __B,
6047                                                     __imm,
6048                                                     (__v8df)
6049                                                     _mm512_undefined_pd (),
6050                                                     (__mmask8) -1);
6051 }
6052
6053 extern __inline __m512d
6054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6055 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6056                          __m256d __B, const int __imm)
6057 {
6058   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6059                                                     (__v4df) __B,
6060                                                     __imm,
6061                                                     (__v8df) __W,
6062                                                     (__mmask8) __U);
6063 }
6064
6065 extern __inline __m512d
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6068                           const int __imm)
6069 {
6070   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6071                                                     (__v4df) __B,
6072                                                     __imm,
6073                                                     (__v8df)
6074                                                     _mm512_setzero_pd (),
6075                                                     (__mmask8) __U);
6076 }
6077 #else
6078 #define _mm512_insertf32x4(X, Y, C)                                     \
6079   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
6080     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6081
6082 #define _mm512_inserti32x4(X, Y, C)                                     \
6083   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
6084     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6085
6086 #define _mm512_insertf64x4(X, Y, C)                                     \
6087   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
6088     (__v4df)(__m256d) (Y), (int) (C),                                   \
6089     (__v8df)(__m512d)_mm512_undefined_pd(),                             \
6090     (__mmask8)-1))
6091
6092 #define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
6093   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
6094     (__v4df)(__m256d) (Y), (int) (C),                                   \
6095     (__v8df)(__m512d)(W),                                               \
6096     (__mmask8)(U)))
6097
6098 #define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
6099   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
6100     (__v4df)(__m256d) (Y), (int) (C),                                   \
6101     (__v8df)(__m512d)_mm512_setzero_pd(),                               \
6102     (__mmask8)(U)))
6103
6104 #define _mm512_inserti64x4(X, Y, C)                                     \
6105   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
6106     (__v4di)(__m256i) (Y), (int) (C),                                   \
6107     (__v8di)(__m512i)_mm512_undefined_epi32 (),                         \
6108     (__mmask8)-1))
6109
6110 #define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
6111   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
6112     (__v4di)(__m256i) (Y), (int) (C),\
6113     (__v8di)(__m512i)(W),\
6114     (__mmask8)(U)))
6115
6116 #define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
6117   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
6118     (__v4di)(__m256i) (Y), (int) (C),                                   \
6119     (__v8di)(__m512i)_mm512_setzero_si512 (),                           \
6120     (__mmask8)(U)))
6121 #endif
6122
6123 extern __inline __m512d
6124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125 _mm512_loadu_pd (void const *__P)
6126 {
6127   return *(__m512d_u *)__P;
6128 }
6129
6130 extern __inline __m512d
6131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6132 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6133 {
6134   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6135                                                    (__v8df) __W,
6136                                                    (__mmask8) __U);
6137 }
6138
6139 extern __inline __m512d
6140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6141 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6142 {
6143   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6144                                                    (__v8df)
6145                                                    _mm512_setzero_pd (),
6146                                                    (__mmask8) __U);
6147 }
6148
6149 extern __inline void
6150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6151 _mm512_storeu_pd (void *__P, __m512d __A)
6152 {
6153   *(__m512d_u *)__P = __A;
6154 }
6155
6156 extern __inline void
6157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6159 {
6160   __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
6161                                    (__mmask8) __U);
6162 }
6163
6164 extern __inline __m512
6165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6166 _mm512_loadu_ps (void const *__P)
6167 {
6168   return *(__m512_u *)__P;
6169 }
6170
6171 extern __inline __m512
6172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6173 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6174 {
6175   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6176                                                   (__v16sf) __W,
6177                                                   (__mmask16) __U);
6178 }
6179
6180 extern __inline __m512
6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6183 {
6184   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6185                                                   (__v16sf)
6186                                                   _mm512_setzero_ps (),
6187                                                   (__mmask16) __U);
6188 }
6189
6190 extern __inline void
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm512_storeu_ps (void *__P, __m512 __A)
6193 {
6194   *(__m512_u *)__P = __A;
6195 }
6196
6197 extern __inline void
6198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6199 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6200 {
6201   __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
6202                                    (__mmask16) __U);
6203 }
6204
6205 extern __inline __m512i
6206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6207 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6208 {
6209   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6210                                                      (__v8di) __W,
6211                                                      (__mmask8) __U);
6212 }
6213
6214 extern __inline __m512i
6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6217 {
6218   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6219                                                      (__v8di)
6220                                                      _mm512_setzero_si512 (),
6221                                                      (__mmask8) __U);
6222 }
6223
6224 extern __inline void
6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6226 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6227 {
6228   __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
6229                                      (__mmask8) __U);
6230 }
6231
6232 extern __inline __m512i
6233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6234 _mm512_loadu_si512 (void const *__P)
6235 {
6236   return *(__m512i_u *)__P;
6237 }
6238
6239 extern __inline __m512i
6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6241 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6242 {
6243   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6244                                                      (__v16si) __W,
6245                                                      (__mmask16) __U);
6246 }
6247
6248 extern __inline __m512i
6249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6251 {
6252   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6253                                                      (__v16si)
6254                                                      _mm512_setzero_si512 (),
6255                                                      (__mmask16) __U);
6256 }
6257
6258 extern __inline void
6259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6260 _mm512_storeu_si512 (void *__P, __m512i __A)
6261 {
6262   *(__m512i_u *)__P = __A;
6263 }
6264
6265 extern __inline void
6266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6267 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6268 {
6269   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
6270                                      (__mmask16) __U);
6271 }
6272
6273 extern __inline __m512d
6274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6275 _mm512_permutevar_pd (__m512d __A, __m512i __C)
6276 {
6277   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6278                                                         (__v8di) __C,
6279                                                         (__v8df)
6280                                                         _mm512_undefined_pd (),
6281                                                         (__mmask8) -1);
6282 }
6283
6284 extern __inline __m512d
6285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6286 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6287 {
6288   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6289                                                         (__v8di) __C,
6290                                                         (__v8df) __W,
6291                                                         (__mmask8) __U);
6292 }
6293
6294 extern __inline __m512d
6295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6296 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6297 {
6298   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6299                                                         (__v8di) __C,
6300                                                         (__v8df)
6301                                                         _mm512_setzero_pd (),
6302                                                         (__mmask8) __U);
6303 }
6304
6305 extern __inline __m512
6306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6307 _mm512_permutevar_ps (__m512 __A, __m512i __C)
6308 {
6309   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6310                                                        (__v16si) __C,
6311                                                        (__v16sf)
6312                                                        _mm512_undefined_ps (),
6313                                                        (__mmask16) -1);
6314 }
6315
6316 extern __inline __m512
6317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6318 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6319 {
6320   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6321                                                        (__v16si) __C,
6322                                                        (__v16sf) __W,
6323                                                        (__mmask16) __U);
6324 }
6325
6326 extern __inline __m512
6327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6329 {
6330   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6331                                                        (__v16si) __C,
6332                                                        (__v16sf)
6333                                                        _mm512_setzero_ps (),
6334                                                        (__mmask16) __U);
6335 }
6336
6337 extern __inline __m512i
6338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6339 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6340 {
6341   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6342                                                        /* idx */ ,
6343                                                        (__v8di) __A,
6344                                                        (__v8di) __B,
6345                                                        (__mmask8) -1);
6346 }
6347
6348 extern __inline __m512i
6349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6350 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6351                                 __m512i __B)
6352 {
6353   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6354                                                        /* idx */ ,
6355                                                        (__v8di) __A,
6356                                                        (__v8di) __B,
6357                                                        (__mmask8) __U);
6358 }
6359
6360 extern __inline __m512i
6361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6363                                  __mmask8 __U, __m512i __B)
6364 {
6365   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6366                                                        (__v8di) __I
6367                                                        /* idx */ ,
6368                                                        (__v8di) __B,
6369                                                        (__mmask8) __U);
6370 }
6371
6372 extern __inline __m512i
6373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6374 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6375                                  __m512i __I, __m512i __B)
6376 {
6377   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6378                                                         /* idx */ ,
6379                                                         (__v8di) __A,
6380                                                         (__v8di) __B,
6381                                                         (__mmask8) __U);
6382 }
6383
6384 extern __inline __m512i
6385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6387 {
6388   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6389                                                        /* idx */ ,
6390                                                        (__v16si) __A,
6391                                                        (__v16si) __B,
6392                                                        (__mmask16) -1);
6393 }
6394
6395 extern __inline __m512i
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6398                                 __m512i __I, __m512i __B)
6399 {
6400   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6401                                                        /* idx */ ,
6402                                                        (__v16si) __A,
6403                                                        (__v16si) __B,
6404                                                        (__mmask16) __U);
6405 }
6406
6407 extern __inline __m512i
6408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6409 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6410                                  __mmask16 __U, __m512i __B)
6411 {
6412   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6413                                                        (__v16si) __I
6414                                                        /* idx */ ,
6415                                                        (__v16si) __B,
6416                                                        (__mmask16) __U);
6417 }
6418
6419 extern __inline __m512i
6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6422                                  __m512i __I, __m512i __B)
6423 {
6424   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6425                                                         /* idx */ ,
6426                                                         (__v16si) __A,
6427                                                         (__v16si) __B,
6428                                                         (__mmask16) __U);
6429 }
6430
6431 extern __inline __m512d
6432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6433 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6434 {
6435   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6436                                                         /* idx */ ,
6437                                                         (__v8df) __A,
6438                                                         (__v8df) __B,
6439                                                         (__mmask8) -1);
6440 }
6441
6442 extern __inline __m512d
6443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6444 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6445                              __m512d __B)
6446 {
6447   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6448                                                         /* idx */ ,
6449                                                         (__v8df) __A,
6450                                                         (__v8df) __B,
6451                                                         (__mmask8) __U);
6452 }
6453
6454 extern __inline __m512d
6455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6457                               __m512d __B)
6458 {
6459   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6460                                                         (__v8di) __I
6461                                                         /* idx */ ,
6462                                                         (__v8df) __B,
6463                                                         (__mmask8) __U);
6464 }
6465
6466 extern __inline __m512d
6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6469                               __m512d __B)
6470 {
6471   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6472                                                          /* idx */ ,
6473                                                          (__v8df) __A,
6474                                                          (__v8df) __B,
6475                                                          (__mmask8) __U);
6476 }
6477
6478 extern __inline __m512
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6481 {
6482   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6483                                                        /* idx */ ,
6484                                                        (__v16sf) __A,
6485                                                        (__v16sf) __B,
6486                                                        (__mmask16) -1);
6487 }
6488
6489 extern __inline __m512
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6491 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6492 {
6493   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6494                                                        /* idx */ ,
6495                                                        (__v16sf) __A,
6496                                                        (__v16sf) __B,
6497                                                        (__mmask16) __U);
6498 }
6499
6500 extern __inline __m512
6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6503                               __m512 __B)
6504 {
6505   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6506                                                        (__v16si) __I
6507                                                        /* idx */ ,
6508                                                        (__v16sf) __B,
6509                                                        (__mmask16) __U);
6510 }
6511
6512 extern __inline __m512
6513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6514 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6515                               __m512 __B)
6516 {
6517   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6518                                                         /* idx */ ,
6519                                                         (__v16sf) __A,
6520                                                         (__v16sf) __B,
6521                                                         (__mmask16) __U);
6522 }
6523
6524 #ifdef __OPTIMIZE__
6525 extern __inline __m512d
6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6527 _mm512_permute_pd (__m512d __X, const int __C)
6528 {
6529   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6530                                                      (__v8df)
6531                                                      _mm512_undefined_pd (),
6532                                                      (__mmask8) -1);
6533 }
6534
6535 extern __inline __m512d
6536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6537 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6538 {
6539   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6540                                                      (__v8df) __W,
6541                                                      (__mmask8) __U);
6542 }
6543
6544 extern __inline __m512d
6545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6546 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6547 {
6548   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6549                                                      (__v8df)
6550                                                      _mm512_setzero_pd (),
6551                                                      (__mmask8) __U);
6552 }
6553
6554 extern __inline __m512
6555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556 _mm512_permute_ps (__m512 __X, const int __C)
6557 {
6558   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6559                                                     (__v16sf)
6560                                                     _mm512_undefined_ps (),
6561                                                     (__mmask16) -1);
6562 }
6563
6564 extern __inline __m512
6565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6566 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6567 {
6568   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6569                                                     (__v16sf) __W,
6570                                                     (__mmask16) __U);
6571 }
6572
6573 extern __inline __m512
6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6575 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6576 {
6577   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6578                                                     (__v16sf)
6579                                                     _mm512_setzero_ps (),
6580                                                     (__mmask16) __U);
6581 }
6582 #else
6583 #define _mm512_permute_pd(X, C)                                                     \
6584   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6585                                               (__v8df)(__m512d)_mm512_undefined_pd(),\
6586                                               (__mmask8)(-1)))
6587
6588 #define _mm512_mask_permute_pd(W, U, X, C)                                          \
6589   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6590                                               (__v8df)(__m512d)(W),                 \
6591                                               (__mmask8)(U)))
6592
6593 #define _mm512_maskz_permute_pd(U, X, C)                                            \
6594   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6595                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
6596                                               (__mmask8)(U)))
6597
6598 #define _mm512_permute_ps(X, C)                                                     \
6599   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6600                                               (__v16sf)(__m512)_mm512_undefined_ps(),\
6601                                               (__mmask16)(-1)))
6602
6603 #define _mm512_mask_permute_ps(W, U, X, C)                                          \
6604   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6605                                               (__v16sf)(__m512)(W),                 \
6606                                               (__mmask16)(U)))
6607
6608 #define _mm512_maskz_permute_ps(U, X, C)                                            \
6609   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6610                                               (__v16sf)(__m512)_mm512_setzero_ps(), \
6611                                               (__mmask16)(U)))
6612 #endif
6613
6614 #ifdef __OPTIMIZE__
6615 extern __inline __m512i
6616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6617 _mm512_permutex_epi64 (__m512i __X, const int __I)
6618 {
6619   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6620                                                   (__v8di)
6621                                                   _mm512_undefined_epi32 (),
6622                                                   (__mmask8) (-1));
6623 }
6624
6625 extern __inline __m512i
6626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6627 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6628                             __m512i __X, const int __I)
6629 {
6630   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6631                                                   (__v8di) __W,
6632                                                   (__mmask8) __M);
6633 }
6634
6635 extern __inline __m512i
6636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6637 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6638 {
6639   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6640                                                   (__v8di)
6641                                                   _mm512_setzero_si512 (),
6642                                                   (__mmask8) __M);
6643 }
6644
6645 extern __inline __m512d
6646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6647 _mm512_permutex_pd (__m512d __X, const int __M)
6648 {
6649   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6650                                                   (__v8df)
6651                                                   _mm512_undefined_pd (),
6652                                                   (__mmask8) -1);
6653 }
6654
6655 extern __inline __m512d
6656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6657 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6658 {
6659   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6660                                                   (__v8df) __W,
6661                                                   (__mmask8) __U);
6662 }
6663
6664 extern __inline __m512d
6665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6666 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6667 {
6668   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6669                                                   (__v8df)
6670                                                   _mm512_setzero_pd (),
6671                                                   (__mmask8) __U);
6672 }
6673 #else
6674 #define _mm512_permutex_pd(X, M)                                                \
6675   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6676                                             (__v8df)(__m512d)_mm512_undefined_pd(),\
6677                                             (__mmask8)-1))
6678
6679 #define _mm512_mask_permutex_pd(W, U, X, M)                                     \
6680   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6681                                             (__v8df)(__m512d)(W), (__mmask8)(U)))
6682
6683 #define _mm512_maskz_permutex_pd(U, X, M)                                       \
6684   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6685                                             (__v8df)(__m512d)_mm512_setzero_pd(),\
6686                                             (__mmask8)(U)))
6687
6688 #define _mm512_permutex_epi64(X, I)                               \
6689   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6690                                             (int)(I),             \
6691                                             (__v8di)(__m512i)     \
6692                                             (_mm512_undefined_epi32 ()),\
6693                                             (__mmask8)(-1)))
6694
6695 #define _mm512_maskz_permutex_epi64(M, X, I)                 \
6696   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6697                                             (int)(I),             \
6698                                             (__v8di)(__m512i)     \
6699                                             (_mm512_setzero_si512 ()),\
6700                                             (__mmask8)(M)))
6701
6702 #define _mm512_mask_permutex_epi64(W, M, X, I)               \
6703   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6704                                             (int)(I),             \
6705                                             (__v8di)(__m512i)(W), \
6706                                             (__mmask8)(M)))
6707 #endif
6708
6709 extern __inline __m512i
6710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6711 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6712 {
6713   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6714                                                      (__v8di) __X,
6715                                                      (__v8di)
6716                                                      _mm512_setzero_si512 (),
6717                                                      __M);
6718 }
6719
6720 extern __inline __m512i
6721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6722 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6723 {
6724   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6725                                                      (__v8di) __X,
6726                                                      (__v8di)
6727                                                      _mm512_undefined_epi32 (),
6728                                                      (__mmask8) -1);
6729 }
6730
6731 extern __inline __m512i
6732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6733 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6734                                __m512i __Y)
6735 {
6736   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6737                                                      (__v8di) __X,
6738                                                      (__v8di) __W,
6739                                                      __M);
6740 }
6741
6742 extern __inline __m512i
6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6745 {
6746   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6747                                                      (__v16si) __X,
6748                                                      (__v16si)
6749                                                      _mm512_setzero_si512 (),
6750                                                      __M);
6751 }
6752
6753 extern __inline __m512i
6754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6755 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6756 {
6757   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6758                                                      (__v16si) __X,
6759                                                      (__v16si)
6760                                                      _mm512_undefined_epi32 (),
6761                                                      (__mmask16) -1);
6762 }
6763
6764 extern __inline __m512i
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6767                                __m512i __Y)
6768 {
6769   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6770                                                      (__v16si) __X,
6771                                                      (__v16si) __W,
6772                                                      __M);
6773 }
6774
6775 extern __inline __m512d
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6778 {
6779   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6780                                                      (__v8di) __X,
6781                                                      (__v8df)
6782                                                      _mm512_undefined_pd (),
6783                                                      (__mmask8) -1);
6784 }
6785
6786 extern __inline __m512d
6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6789 {
6790   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6791                                                      (__v8di) __X,
6792                                                      (__v8df) __W,
6793                                                      (__mmask8) __U);
6794 }
6795
6796 extern __inline __m512d
6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6799 {
6800   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6801                                                      (__v8di) __X,
6802                                                      (__v8df)
6803                                                      _mm512_setzero_pd (),
6804                                                      (__mmask8) __U);
6805 }
6806
6807 extern __inline __m512
6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6810 {
6811   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6812                                                     (__v16si) __X,
6813                                                     (__v16sf)
6814                                                     _mm512_undefined_ps (),
6815                                                     (__mmask16) -1);
6816 }
6817
6818 extern __inline __m512
6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6820 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6821 {
6822   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6823                                                     (__v16si) __X,
6824                                                     (__v16sf) __W,
6825                                                     (__mmask16) __U);
6826 }
6827
6828 extern __inline __m512
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6831 {
6832   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6833                                                     (__v16si) __X,
6834                                                     (__v16sf)
6835                                                     _mm512_setzero_ps (),
6836                                                     (__mmask16) __U);
6837 }
6838
6839 #ifdef __OPTIMIZE__
6840 extern __inline __m512
6841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6842 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6843 {
6844   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6845                                                  (__v16sf) __V, __imm,
6846                                                  (__v16sf)
6847                                                  _mm512_undefined_ps (),
6848                                                  (__mmask16) -1);
6849 }
6850
6851 extern __inline __m512
6852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6853 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6854                         __m512 __V, const int __imm)
6855 {
6856   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6857                                                  (__v16sf) __V, __imm,
6858                                                  (__v16sf) __W,
6859                                                  (__mmask16) __U);
6860 }
6861
6862 extern __inline __m512
6863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6864 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6865 {
6866   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6867                                                  (__v16sf) __V, __imm,
6868                                                  (__v16sf)
6869                                                  _mm512_setzero_ps (),
6870                                                  (__mmask16) __U);
6871 }
6872
6873 extern __inline __m512d
6874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6875 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6876 {
6877   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6878                                                   (__v8df) __V, __imm,
6879                                                   (__v8df)
6880                                                   _mm512_undefined_pd (),
6881                                                   (__mmask8) -1);
6882 }
6883
6884 extern __inline __m512d
6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6887                         __m512d __V, const int __imm)
6888 {
6889   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6890                                                   (__v8df) __V, __imm,
6891                                                   (__v8df) __W,
6892                                                   (__mmask8) __U);
6893 }
6894
6895 extern __inline __m512d
6896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6897 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6898                          const int __imm)
6899 {
6900   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6901                                                   (__v8df) __V, __imm,
6902                                                   (__v8df)
6903                                                   _mm512_setzero_pd (),
6904                                                   (__mmask8) __U);
6905 }
6906
6907 extern __inline __m512d
6908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6910                           const int __imm, const int __R)
6911 {
6912   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6913                                                       (__v8df) __B,
6914                                                       (__v8di) __C,
6915                                                       __imm,
6916                                                       (__mmask8) -1, __R);
6917 }
6918
6919 extern __inline __m512d
6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6921 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6922                                __m512i __C, const int __imm, const int __R)
6923 {
6924   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6925                                                       (__v8df) __B,
6926                                                       (__v8di) __C,
6927                                                       __imm,
6928                                                       (__mmask8) __U, __R);
6929 }
6930
6931 extern __inline __m512d
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6934                                 __m512i __C, const int __imm, const int __R)
6935 {
6936   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6937                                                        (__v8df) __B,
6938                                                        (__v8di) __C,
6939                                                        __imm,
6940                                                        (__mmask8) __U, __R);
6941 }
6942
6943 extern __inline __m512
6944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6945 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6946                           const int __imm, const int __R)
6947 {
6948   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6949                                                      (__v16sf) __B,
6950                                                      (__v16si) __C,
6951                                                      __imm,
6952                                                      (__mmask16) -1, __R);
6953 }
6954
6955 extern __inline __m512
6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6958                                __m512i __C, const int __imm, const int __R)
6959 {
6960   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6961                                                      (__v16sf) __B,
6962                                                      (__v16si) __C,
6963                                                      __imm,
6964                                                      (__mmask16) __U, __R);
6965 }
6966
6967 extern __inline __m512
6968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6969 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6970                                 __m512i __C, const int __imm, const int __R)
6971 {
6972   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6973                                                       (__v16sf) __B,
6974                                                       (__v16si) __C,
6975                                                       __imm,
6976                                                       (__mmask16) __U, __R);
6977 }
6978
6979 extern __inline __m128d
6980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6981 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6982                        const int __imm, const int __R)
6983 {
6984   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6985                                                    (__v2df) __B,
6986                                                    (__v2di) __C, __imm,
6987                                                    (__mmask8) -1, __R);
6988 }
6989
6990 extern __inline __m128d
6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6993                             __m128i __C, const int __imm, const int __R)
6994 {
6995   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6996                                                    (__v2df) __B,
6997                                                    (__v2di) __C, __imm,
6998                                                    (__mmask8) __U, __R);
6999 }
7000
7001 extern __inline __m128d
7002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7003 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7004                              __m128i __C, const int __imm, const int __R)
7005 {
7006   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
7007                                                     (__v2df) __B,
7008                                                     (__v2di) __C,
7009                                                     __imm,
7010                                                     (__mmask8) __U, __R);
7011 }
7012
7013 extern __inline __m128
7014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7015 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
7016                        const int __imm, const int __R)
7017 {
7018   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7019                                                   (__v4sf) __B,
7020                                                   (__v4si) __C, __imm,
7021                                                   (__mmask8) -1, __R);
7022 }
7023
7024 extern __inline __m128
7025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7026 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7027                             __m128i __C, const int __imm, const int __R)
7028 {
7029   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7030                                                   (__v4sf) __B,
7031                                                   (__v4si) __C, __imm,
7032                                                   (__mmask8) __U, __R);
7033 }
7034
7035 extern __inline __m128
7036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7037 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7038                              __m128i __C, const int __imm, const int __R)
7039 {
7040   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
7041                                                    (__v4sf) __B,
7042                                                    (__v4si) __C, __imm,
7043                                                    (__mmask8) __U, __R);
7044 }
7045
7046 #else
7047 #define _mm512_shuffle_pd(X, Y, C)                                      \
7048     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
7049         (__v8df)(__m512d)(Y), (int)(C),\
7050     (__v8df)(__m512d)_mm512_undefined_pd(),\
7051     (__mmask8)-1))
7052
7053 #define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
7054     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
7055         (__v8df)(__m512d)(Y), (int)(C),\
7056     (__v8df)(__m512d)(W),\
7057     (__mmask8)(U)))
7058
7059 #define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
7060     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
7061         (__v8df)(__m512d)(Y), (int)(C),\
7062     (__v8df)(__m512d)_mm512_setzero_pd(),\
7063     (__mmask8)(U)))
7064
7065 #define _mm512_shuffle_ps(X, Y, C)                                      \
7066     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
7067         (__v16sf)(__m512)(Y), (int)(C),\
7068     (__v16sf)(__m512)_mm512_undefined_ps(),\
7069     (__mmask16)-1))
7070
7071 #define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
7072     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
7073         (__v16sf)(__m512)(Y), (int)(C),\
7074     (__v16sf)(__m512)(W),\
7075     (__mmask16)(U)))
7076
7077 #define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
7078     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
7079         (__v16sf)(__m512)(Y), (int)(C),\
7080     (__v16sf)(__m512)_mm512_setzero_ps(),\
7081     (__mmask16)(U)))
7082
7083 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R)                                 \
7084   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
7085       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
7086       (__mmask8)(-1), (R)))
7087
7088 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
7089   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
7090       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
7091       (__mmask8)(U), (R)))
7092
7093 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
7094   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
7095       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
7096       (__mmask8)(U), (R)))
7097
7098 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R)                                 \
7099   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
7100     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
7101     (__mmask16)(-1), (R)))
7102
7103 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
7104   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
7105     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
7106     (__mmask16)(U), (R)))
7107
7108 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
7109   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
7110     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
7111     (__mmask16)(U), (R)))
7112
7113 #define _mm_fixupimm_round_sd(X, Y, Z, C, R)                                    \
7114     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
7115       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
7116       (__mmask8)(-1), (R)))
7117
7118 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)                            \
7119     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
7120       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
7121       (__mmask8)(U), (R)))
7122
7123 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)                           \
7124     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),    \
7125       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
7126       (__mmask8)(U), (R)))
7127
7128 #define _mm_fixupimm_round_ss(X, Y, Z, C, R)                                    \
7129     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
7130       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
7131       (__mmask8)(-1), (R)))
7132
7133 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)                            \
7134     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
7135       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
7136       (__mmask8)(U), (R)))
7137
7138 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)                           \
7139     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),      \
7140       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
7141       (__mmask8)(U), (R)))
7142 #endif
7143
7144 extern __inline __m512
7145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7146 _mm512_movehdup_ps (__m512 __A)
7147 {
7148   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7149                                                    (__v16sf)
7150                                                    _mm512_undefined_ps (),
7151                                                    (__mmask16) -1);
7152 }
7153
7154 extern __inline __m512
7155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7156 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7157 {
7158   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7159                                                    (__v16sf) __W,
7160                                                    (__mmask16) __U);
7161 }
7162
7163 extern __inline __m512
7164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7165 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7166 {
7167   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7168                                                    (__v16sf)
7169                                                    _mm512_setzero_ps (),
7170                                                    (__mmask16) __U);
7171 }
7172
7173 extern __inline __m512
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm512_moveldup_ps (__m512 __A)
7176 {
7177   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7178                                                    (__v16sf)
7179                                                    _mm512_undefined_ps (),
7180                                                    (__mmask16) -1);
7181 }
7182
7183 extern __inline __m512
7184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7185 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7186 {
7187   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7188                                                    (__v16sf) __W,
7189                                                    (__mmask16) __U);
7190 }
7191
7192 extern __inline __m512
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7195 {
7196   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7197                                                    (__v16sf)
7198                                                    _mm512_setzero_ps (),
7199                                                    (__mmask16) __U);
7200 }
7201
7202 extern __inline __m512i
7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204 _mm512_or_si512 (__m512i __A, __m512i __B)
7205 {
7206   return (__m512i) ((__v16su) __A | (__v16su) __B);
7207 }
7208
7209 extern __inline __m512i
7210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7211 _mm512_or_epi32 (__m512i __A, __m512i __B)
7212 {
7213   return (__m512i) ((__v16su) __A | (__v16su) __B);
7214 }
7215
7216 extern __inline __m512i
7217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7218 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7219 {
7220   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7221                                                 (__v16si) __B,
7222                                                 (__v16si) __W,
7223                                                 (__mmask16) __U);
7224 }
7225
7226 extern __inline __m512i
7227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7229 {
7230   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7231                                                 (__v16si) __B,
7232                                                 (__v16si)
7233                                                 _mm512_setzero_si512 (),
7234                                                 (__mmask16) __U);
7235 }
7236
7237 extern __inline __m512i
7238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239 _mm512_or_epi64 (__m512i __A, __m512i __B)
7240 {
7241   return (__m512i) ((__v8du) __A | (__v8du) __B);
7242 }
7243
7244 extern __inline __m512i
7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7246 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7247 {
7248   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7249                                                 (__v8di) __B,
7250                                                 (__v8di) __W,
7251                                                 (__mmask8) __U);
7252 }
7253
7254 extern __inline __m512i
7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7257 {
7258   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7259                                                 (__v8di) __B,
7260                                                 (__v8di)
7261                                                 _mm512_setzero_si512 (),
7262                                                 (__mmask8) __U);
7263 }
7264
7265 extern __inline __m512i
7266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7267 _mm512_xor_si512 (__m512i __A, __m512i __B)
7268 {
7269   return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7270 }
7271
7272 extern __inline __m512i
7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274 _mm512_xor_epi32 (__m512i __A, __m512i __B)
7275 {
7276   return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7277 }
7278
7279 extern __inline __m512i
7280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7281 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7282 {
7283   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7284                                                  (__v16si) __B,
7285                                                  (__v16si) __W,
7286                                                  (__mmask16) __U);
7287 }
7288
7289 extern __inline __m512i
7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7292 {
7293   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7294                                                  (__v16si) __B,
7295                                                  (__v16si)
7296                                                  _mm512_setzero_si512 (),
7297                                                  (__mmask16) __U);
7298 }
7299
7300 extern __inline __m512i
7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302 _mm512_xor_epi64 (__m512i __A, __m512i __B)
7303 {
7304   return (__m512i) ((__v8du) __A ^ (__v8du) __B);
7305 }
7306
7307 extern __inline __m512i
7308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7309 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7310 {
7311   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7312                                                  (__v8di) __B,
7313                                                  (__v8di) __W,
7314                                                  (__mmask8) __U);
7315 }
7316
7317 extern __inline __m512i
7318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7319 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
7320 {
7321   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7322                                                  (__v8di) __B,
7323                                                  (__v8di)
7324                                                  _mm512_setzero_si512 (),
7325                                                  (__mmask8) __U);
7326 }
7327
7328 #ifdef __OPTIMIZE__
7329 extern __inline __m512i
7330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7331 _mm512_rol_epi32 (__m512i __A, const int __B)
7332 {
7333   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7334                                                  (__v16si)
7335                                                  _mm512_undefined_epi32 (),
7336                                                  (__mmask16) -1);
7337 }
7338
7339 extern __inline __m512i
7340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7341 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7342 {
7343   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7344                                                  (__v16si) __W,
7345                                                  (__mmask16) __U);
7346 }
7347
7348 extern __inline __m512i
7349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7350 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7351 {
7352   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7353                                                  (__v16si)
7354                                                  _mm512_setzero_si512 (),
7355                                                  (__mmask16) __U);
7356 }
7357
7358 extern __inline __m512i
7359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7360 _mm512_ror_epi32 (__m512i __A, int __B)
7361 {
7362   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7363                                                  (__v16si)
7364                                                  _mm512_undefined_epi32 (),
7365                                                  (__mmask16) -1);
7366 }
7367
7368 extern __inline __m512i
7369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7370 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7371 {
7372   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7373                                                  (__v16si) __W,
7374                                                  (__mmask16) __U);
7375 }
7376
7377 extern __inline __m512i
7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7380 {
7381   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7382                                                  (__v16si)
7383                                                  _mm512_setzero_si512 (),
7384                                                  (__mmask16) __U);
7385 }
7386
7387 extern __inline __m512i
7388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7389 _mm512_rol_epi64 (__m512i __A, const int __B)
7390 {
7391   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7392                                                  (__v8di)
7393                                                  _mm512_undefined_epi32 (),
7394                                                  (__mmask8) -1);
7395 }
7396
7397 extern __inline __m512i
7398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7399 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7400 {
7401   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7402                                                  (__v8di) __W,
7403                                                  (__mmask8) __U);
7404 }
7405
7406 extern __inline __m512i
7407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7408 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7409 {
7410   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7411                                                  (__v8di)
7412                                                  _mm512_setzero_si512 (),
7413                                                  (__mmask8) __U);
7414 }
7415
7416 extern __inline __m512i
7417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7418 _mm512_ror_epi64 (__m512i __A, int __B)
7419 {
7420   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7421                                                  (__v8di)
7422                                                  _mm512_undefined_epi32 (),
7423                                                  (__mmask8) -1);
7424 }
7425
7426 extern __inline __m512i
7427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7428 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7429 {
7430   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7431                                                  (__v8di) __W,
7432                                                  (__mmask8) __U);
7433 }
7434
7435 extern __inline __m512i
7436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7437 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7438 {
7439   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7440                                                  (__v8di)
7441                                                  _mm512_setzero_si512 (),
7442                                                  (__mmask8) __U);
7443 }
7444
7445 #else
7446 #define _mm512_rol_epi32(A, B)                                            \
7447     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7448                                             (int)(B),                     \
7449                                             (__v16si)_mm512_undefined_epi32 (), \
7450                                             (__mmask16)(-1)))
7451 #define _mm512_mask_rol_epi32(W, U, A, B)                                 \
7452     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7453                                             (int)(B),                     \
7454                                             (__v16si)(__m512i)(W),        \
7455                                             (__mmask16)(U)))
7456 #define _mm512_maskz_rol_epi32(U, A, B)                                   \
7457     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7458                                             (int)(B),                     \
7459                                             (__v16si)_mm512_setzero_si512 (), \
7460                                             (__mmask16)(U)))
7461 #define _mm512_ror_epi32(A, B)                                            \
7462     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7463                                             (int)(B),                     \
7464                                             (__v16si)_mm512_undefined_epi32 (), \
7465                                             (__mmask16)(-1)))
7466 #define _mm512_mask_ror_epi32(W, U, A, B)                                 \
7467     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7468                                             (int)(B),                     \
7469                                             (__v16si)(__m512i)(W),        \
7470                                             (__mmask16)(U)))
7471 #define _mm512_maskz_ror_epi32(U, A, B)                                   \
7472     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7473                                             (int)(B),                     \
7474                                             (__v16si)_mm512_setzero_si512 (), \
7475                                             (__mmask16)(U)))
7476 #define _mm512_rol_epi64(A, B)                                            \
7477     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7478                                             (int)(B),                     \
7479                                             (__v8di)_mm512_undefined_epi32 (),  \
7480                                             (__mmask8)(-1)))
7481 #define _mm512_mask_rol_epi64(W, U, A, B)                                 \
7482     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7483                                             (int)(B),                     \
7484                                             (__v8di)(__m512i)(W),         \
7485                                             (__mmask8)(U)))
7486 #define _mm512_maskz_rol_epi64(U, A, B)                                   \
7487     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7488                                             (int)(B),                     \
7489                                             (__v8di)_mm512_setzero_si512 (),  \
7490                                             (__mmask8)(U)))
7491
7492 #define _mm512_ror_epi64(A, B)                                            \
7493     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7494                                             (int)(B),                     \
7495                                             (__v8di)_mm512_undefined_epi32 (),  \
7496                                             (__mmask8)(-1)))
7497 #define _mm512_mask_ror_epi64(W, U, A, B)                                 \
7498     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7499                                             (int)(B),                     \
7500                                             (__v8di)(__m512i)(W),         \
7501                                             (__mmask8)(U)))
7502 #define _mm512_maskz_ror_epi64(U, A, B)                                   \
7503     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7504                                             (int)(B),                     \
7505                                             (__v8di)_mm512_setzero_si512 (),  \
7506                                             (__mmask8)(U)))
7507 #endif
7508
7509 extern __inline __m512i
7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511 _mm512_and_si512 (__m512i __A, __m512i __B)
7512 {
7513   return (__m512i) ((__v16su) __A & (__v16su) __B);
7514 }
7515
7516 extern __inline __m512i
7517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7518 _mm512_and_epi32 (__m512i __A, __m512i __B)
7519 {
7520   return (__m512i) ((__v16su) __A & (__v16su) __B);
7521 }
7522
7523 extern __inline __m512i
7524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7525 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7526 {
7527   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7528                                                  (__v16si) __B,
7529                                                  (__v16si) __W,
7530                                                  (__mmask16) __U);
7531 }
7532
7533 extern __inline __m512i
7534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7535 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7536 {
7537   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7538                                                  (__v16si) __B,
7539                                                  (__v16si)
7540                                                  _mm512_setzero_si512 (),
7541                                                  (__mmask16) __U);
7542 }
7543
7544 extern __inline __m512i
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm512_and_epi64 (__m512i __A, __m512i __B)
7547 {
7548   return (__m512i) ((__v8du) __A & (__v8du) __B);
7549 }
7550
7551 extern __inline __m512i
7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7554 {
7555   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7556                                                  (__v8di) __B,
7557                                                  (__v8di) __W, __U);
7558 }
7559
7560 extern __inline __m512i
7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7562 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7563 {
7564   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7565                                                  (__v8di) __B,
7566                                                  (__v8di)
7567                                                  _mm512_setzero_pd (),
7568                                                  __U);
7569 }
7570
7571 extern __inline __m512i
7572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7573 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7574 {
7575   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7576                                                   (__v16si) __B,
7577                                                   (__v16si)
7578                                                   _mm512_undefined_epi32 (),
7579                                                   (__mmask16) -1);
7580 }
7581
7582 extern __inline __m512i
7583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7584 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7585 {
7586   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7587                                                   (__v16si) __B,
7588                                                   (__v16si)
7589                                                   _mm512_undefined_epi32 (),
7590                                                   (__mmask16) -1);
7591 }
7592
7593 extern __inline __m512i
7594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7595 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7596 {
7597   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7598                                                   (__v16si) __B,
7599                                                   (__v16si) __W,
7600                                                   (__mmask16) __U);
7601 }
7602
7603 extern __inline __m512i
7604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7605 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7606 {
7607   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7608                                                   (__v16si) __B,
7609                                                   (__v16si)
7610                                                   _mm512_setzero_si512 (),
7611                                                   (__mmask16) __U);
7612 }
7613
7614 extern __inline __m512i
7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7616 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7617 {
7618   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7619                                                   (__v8di) __B,
7620                                                   (__v8di)
7621                                                   _mm512_undefined_epi32 (),
7622                                                   (__mmask8) -1);
7623 }
7624
7625 extern __inline __m512i
7626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7627 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7628 {
7629   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7630                                                   (__v8di) __B,
7631                                                   (__v8di) __W, __U);
7632 }
7633
7634 extern __inline __m512i
7635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7636 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7637 {
7638   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7639                                                   (__v8di) __B,
7640                                                   (__v8di)
7641                                                   _mm512_setzero_pd (),
7642                                                   __U);
7643 }
7644
7645 extern __inline __mmask16
7646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7647 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7648 {
7649   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7650                                                 (__v16si) __B,
7651                                                 (__mmask16) -1);
7652 }
7653
7654 extern __inline __mmask16
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7657 {
7658   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7659                                                 (__v16si) __B, __U);
7660 }
7661
7662 extern __inline __mmask8
7663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7664 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7665 {
7666   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7667                                                (__v8di) __B,
7668                                                (__mmask8) -1);
7669 }
7670
7671 extern __inline __mmask8
7672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7673 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7674 {
7675   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7676 }
7677
7678 extern __inline __mmask16
7679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7680 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7681 {
7682   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7683                                                  (__v16si) __B,
7684                                                  (__mmask16) -1);
7685 }
7686
7687 extern __inline __mmask16
7688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7689 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7690 {
7691   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7692                                                  (__v16si) __B, __U);
7693 }
7694
7695 extern __inline __mmask8
7696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7697 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7698 {
7699   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7700                                                 (__v8di) __B,
7701                                                 (__mmask8) -1);
7702 }
7703
7704 extern __inline __mmask8
7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7706 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7707 {
7708   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7709                                                 (__v8di) __B, __U);
7710 }
7711
7712 extern __inline __m512
7713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7714 _mm512_abs_ps (__m512 __A)
7715 {
7716   return (__m512) _mm512_and_epi32 ((__m512i) __A,
7717                                     _mm512_set1_epi32 (0x7fffffff));
7718 }
7719
7720 extern __inline __m512
7721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7722 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7723 {
7724   return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7725                                          _mm512_set1_epi32 (0x7fffffff));
7726 }
7727
7728 extern __inline __m512d
7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730 _mm512_abs_pd (__m512 __A)
7731 {
7732   return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7733                                      _mm512_set1_epi64 (0x7fffffffffffffffLL));
7734 }
7735
7736 extern __inline __m512d
7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7738 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7739 {
7740   return (__m512d)
7741          _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7742                                 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7743 }
7744
7745 extern __inline __m512i
7746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7747 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7748 {
7749   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7750                                                      (__v16si) __B,
7751                                                      (__v16si)
7752                                                      _mm512_undefined_epi32 (),
7753                                                      (__mmask16) -1);
7754 }
7755
7756 extern __inline __m512i
7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7758 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7759                             __m512i __B)
7760 {
7761   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7762                                                      (__v16si) __B,
7763                                                      (__v16si) __W,
7764                                                      (__mmask16) __U);
7765 }
7766
7767 extern __inline __m512i
7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7770 {
7771   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7772                                                      (__v16si) __B,
7773                                                      (__v16si)
7774                                                      _mm512_setzero_si512 (),
7775                                                      (__mmask16) __U);
7776 }
7777
7778 extern __inline __m512i
7779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7780 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7781 {
7782   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7783                                                       (__v8di) __B,
7784                                                       (__v8di)
7785                                                       _mm512_undefined_epi32 (),
7786                                                       (__mmask8) -1);
7787 }
7788
7789 extern __inline __m512i
7790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7792 {
7793   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7794                                                       (__v8di) __B,
7795                                                       (__v8di) __W,
7796                                                       (__mmask8) __U);
7797 }
7798
7799 extern __inline __m512i
7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7802 {
7803   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7804                                                       (__v8di) __B,
7805                                                       (__v8di)
7806                                                       _mm512_setzero_si512 (),
7807                                                       (__mmask8) __U);
7808 }
7809
7810 extern __inline __m512i
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7813 {
7814   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7815                                                      (__v16si) __B,
7816                                                      (__v16si)
7817                                                      _mm512_undefined_epi32 (),
7818                                                      (__mmask16) -1);
7819 }
7820
7821 extern __inline __m512i
7822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7823 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7824                             __m512i __B)
7825 {
7826   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7827                                                      (__v16si) __B,
7828                                                      (__v16si) __W,
7829                                                      (__mmask16) __U);
7830 }
7831
7832 extern __inline __m512i
7833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7834 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7835 {
7836   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7837                                                      (__v16si) __B,
7838                                                      (__v16si)
7839                                                      _mm512_setzero_si512 (),
7840                                                      (__mmask16) __U);
7841 }
7842
7843 extern __inline __m512i
7844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7845 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7846 {
7847   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7848                                                       (__v8di) __B,
7849                                                       (__v8di)
7850                                                       _mm512_undefined_epi32 (),
7851                                                       (__mmask8) -1);
7852 }
7853
7854 extern __inline __m512i
7855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7856 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7857 {
7858   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7859                                                       (__v8di) __B,
7860                                                       (__v8di) __W,
7861                                                       (__mmask8) __U);
7862 }
7863
7864 extern __inline __m512i
7865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7866 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7867 {
7868   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7869                                                       (__v8di) __B,
7870                                                       (__v8di)
7871                                                       _mm512_setzero_si512 (),
7872                                                       (__mmask8) __U);
7873 }
7874
7875 #ifdef __x86_64__
7876 #ifdef __OPTIMIZE__
7877 extern __inline unsigned long long
7878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7879 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7880 {
7881   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7882 }
7883
7884 extern __inline long long
7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7887 {
7888   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7889 }
7890
7891 extern __inline long long
7892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7893 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7894 {
7895   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7896 }
7897
7898 extern __inline unsigned long long
7899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7900 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7901 {
7902   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7903 }
7904
7905 extern __inline long long
7906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7907 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7908 {
7909   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7910 }
7911
7912 extern __inline long long
7913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7914 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7915 {
7916   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7917 }
7918 #else
7919 #define _mm_cvt_roundss_u64(A, B)   \
7920     ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7921
7922 #define _mm_cvt_roundss_si64(A, B)   \
7923     ((long long)__builtin_ia32_vcvtss2si64(A, B))
7924
7925 #define _mm_cvt_roundss_i64(A, B)   \
7926     ((long long)__builtin_ia32_vcvtss2si64(A, B))
7927
7928 #define _mm_cvtt_roundss_u64(A, B)  \
7929     ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7930
7931 #define _mm_cvtt_roundss_i64(A, B)  \
7932     ((long long)__builtin_ia32_vcvttss2si64(A, B))
7933
7934 #define _mm_cvtt_roundss_si64(A, B)  \
7935     ((long long)__builtin_ia32_vcvttss2si64(A, B))
7936 #endif
7937 #endif
7938
7939 #ifdef __OPTIMIZE__
7940 extern __inline unsigned
7941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7942 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7943 {
7944   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7945 }
7946
7947 extern __inline int
7948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7949 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7950 {
7951   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7952 }
7953
7954 extern __inline int
7955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7956 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7957 {
7958   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7959 }
7960
7961 extern __inline unsigned
7962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7963 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7964 {
7965   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7966 }
7967
7968 extern __inline int
7969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7970 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7971 {
7972   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7973 }
7974
7975 extern __inline int
7976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7977 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7978 {
7979   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7980 }
7981 #else
7982 #define _mm_cvt_roundss_u32(A, B)   \
7983     ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7984
7985 #define _mm_cvt_roundss_si32(A, B)   \
7986     ((int)__builtin_ia32_vcvtss2si32(A, B))
7987
7988 #define _mm_cvt_roundss_i32(A, B)   \
7989     ((int)__builtin_ia32_vcvtss2si32(A, B))
7990
7991 #define _mm_cvtt_roundss_u32(A, B)  \
7992     ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7993
7994 #define _mm_cvtt_roundss_si32(A, B)  \
7995     ((int)__builtin_ia32_vcvttss2si32(A, B))
7996
7997 #define _mm_cvtt_roundss_i32(A, B)  \
7998     ((int)__builtin_ia32_vcvttss2si32(A, B))
7999 #endif
8000
8001 #ifdef __x86_64__
8002 #ifdef __OPTIMIZE__
8003 extern __inline unsigned long long
8004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8005 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8006 {
8007   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8008 }
8009
8010 extern __inline long long
8011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8012 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8013 {
8014   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8015 }
8016
8017 extern __inline long long
8018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8020 {
8021   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8022 }
8023
8024 extern __inline unsigned long long
8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8027 {
8028   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8029 }
8030
8031 extern __inline long long
8032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8033 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8034 {
8035   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8036 }
8037
8038 extern __inline long long
8039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8040 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8041 {
8042   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8043 }
8044 #else
8045 #define _mm_cvt_roundsd_u64(A, B)   \
8046     ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8047
8048 #define _mm_cvt_roundsd_si64(A, B)   \
8049     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8050
8051 #define _mm_cvt_roundsd_i64(A, B)   \
8052     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8053
8054 #define _mm_cvtt_roundsd_u64(A, B)   \
8055     ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8056
8057 #define _mm_cvtt_roundsd_si64(A, B)   \
8058     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8059
8060 #define _mm_cvtt_roundsd_i64(A, B)   \
8061     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8062 #endif
8063 #endif
8064
8065 #ifdef __OPTIMIZE__
8066 extern __inline unsigned
8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8069 {
8070   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8071 }
8072
8073 extern __inline int
8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8076 {
8077   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8078 }
8079
8080 extern __inline int
8081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8083 {
8084   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8085 }
8086
8087 extern __inline unsigned
8088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8089 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8090 {
8091   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8092 }
8093
8094 extern __inline int
8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8097 {
8098   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8099 }
8100
8101 extern __inline int
8102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8103 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8104 {
8105   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8106 }
8107 #else
8108 #define _mm_cvt_roundsd_u32(A, B)   \
8109     ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8110
8111 #define _mm_cvt_roundsd_si32(A, B)   \
8112     ((int)__builtin_ia32_vcvtsd2si32(A, B))
8113
8114 #define _mm_cvt_roundsd_i32(A, B)   \
8115     ((int)__builtin_ia32_vcvtsd2si32(A, B))
8116
8117 #define _mm_cvtt_roundsd_u32(A, B)   \
8118     ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8119
8120 #define _mm_cvtt_roundsd_si32(A, B)   \
8121     ((int)__builtin_ia32_vcvttsd2si32(A, B))
8122
8123 #define _mm_cvtt_roundsd_i32(A, B)   \
8124     ((int)__builtin_ia32_vcvttsd2si32(A, B))
8125 #endif
8126
8127 extern __inline __m512d
8128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8129 _mm512_movedup_pd (__m512d __A)
8130 {
8131   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8132                                                    (__v8df)
8133                                                    _mm512_undefined_pd (),
8134                                                    (__mmask8) -1);
8135 }
8136
8137 extern __inline __m512d
8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8140 {
8141   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8142                                                    (__v8df) __W,
8143                                                    (__mmask8) __U);
8144 }
8145
8146 extern __inline __m512d
8147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8148 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8149 {
8150   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8151                                                    (__v8df)
8152                                                    _mm512_setzero_pd (),
8153                                                    (__mmask8) __U);
8154 }
8155
8156 extern __inline __m512d
8157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8158 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
8159 {
8160   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8161                                                     (__v8df) __B,
8162                                                     (__v8df)
8163                                                     _mm512_undefined_pd (),
8164                                                     (__mmask8) -1);
8165 }
8166
8167 extern __inline __m512d
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8170 {
8171   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8172                                                     (__v8df) __B,
8173                                                     (__v8df) __W,
8174                                                     (__mmask8) __U);
8175 }
8176
8177 extern __inline __m512d
8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8180 {
8181   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8182                                                     (__v8df) __B,
8183                                                     (__v8df)
8184                                                     _mm512_setzero_pd (),
8185                                                     (__mmask8) __U);
8186 }
8187
8188 extern __inline __m512d
8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
8191 {
8192   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8193                                                     (__v8df) __B,
8194                                                     (__v8df)
8195                                                     _mm512_undefined_pd (),
8196                                                     (__mmask8) -1);
8197 }
8198
8199 extern __inline __m512d
8200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8202 {
8203   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8204                                                     (__v8df) __B,
8205                                                     (__v8df) __W,
8206                                                     (__mmask8) __U);
8207 }
8208
8209 extern __inline __m512d
8210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8212 {
8213   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8214                                                     (__v8df) __B,
8215                                                     (__v8df)
8216                                                     _mm512_setzero_pd (),
8217                                                     (__mmask8) __U);
8218 }
8219
8220 extern __inline __m512
8221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8222 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
8223 {
8224   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8225                                                    (__v16sf) __B,
8226                                                    (__v16sf)
8227                                                    _mm512_undefined_ps (),
8228                                                    (__mmask16) -1);
8229 }
8230
8231 extern __inline __m512
8232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8233 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8234 {
8235   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8236                                                    (__v16sf) __B,
8237                                                    (__v16sf) __W,
8238                                                    (__mmask16) __U);
8239 }
8240
8241 extern __inline __m512
8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8244 {
8245   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8246                                                    (__v16sf) __B,
8247                                                    (__v16sf)
8248                                                    _mm512_setzero_ps (),
8249                                                    (__mmask16) __U);
8250 }
8251
8252 #ifdef __OPTIMIZE__
8253 extern __inline __m512d
8254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8255 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
8256 {
8257   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8258                                                     (__v8df)
8259                                                     _mm512_undefined_pd (),
8260                                                     (__mmask8) -1, __R);
8261 }
8262
8263 extern __inline __m512d
8264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8265 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8266                             const int __R)
8267 {
8268   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8269                                                     (__v8df) __W,
8270                                                     (__mmask8) __U, __R);
8271 }
8272
8273 extern __inline __m512d
8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8276 {
8277   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8278                                                     (__v8df)
8279                                                     _mm512_setzero_pd (),
8280                                                     (__mmask8) __U, __R);
8281 }
8282
8283 extern __inline __m512
8284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8285 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
8286 {
8287   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8288                                                     (__v16sf)
8289                                                     _mm512_undefined_ps (),
8290                                                     (__mmask16) -1, __R);
8291 }
8292
8293 extern __inline __m512
8294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8295 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8296                             const int __R)
8297 {
8298   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8299                                                     (__v16sf) __W,
8300                                                     (__mmask16) __U, __R);
8301 }
8302
8303 extern __inline __m512
8304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8305 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8306 {
8307   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8308                                                     (__v16sf)
8309                                                     _mm512_setzero_ps (),
8310                                                     (__mmask16) __U, __R);
8311 }
8312
8313 extern __inline __m256i
8314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8315 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
8316 {
8317   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8318                                                      __I,
8319                                                      (__v16hi)
8320                                                      _mm256_undefined_si256 (),
8321                                                      -1);
8322 }
8323
8324 extern __inline __m256i
8325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8326 _mm512_cvtps_ph (__m512 __A, const int __I)
8327 {
8328   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8329                                                      __I,
8330                                                      (__v16hi)
8331                                                      _mm256_undefined_si256 (),
8332                                                      -1);
8333 }
8334
8335 extern __inline __m256i
8336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8337 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8338                             const int __I)
8339 {
8340   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8341                                                      __I,
8342                                                      (__v16hi) __U,
8343                                                      (__mmask16) __W);
8344 }
8345
8346 extern __inline __m256i
8347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8348 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8349 {
8350   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8351                                                      __I,
8352                                                      (__v16hi) __U,
8353                                                      (__mmask16) __W);
8354 }
8355
8356 extern __inline __m256i
8357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8358 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8359 {
8360   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8361                                                      __I,
8362                                                      (__v16hi)
8363                                                      _mm256_setzero_si256 (),
8364                                                      (__mmask16) __W);
8365 }
8366
8367 extern __inline __m256i
8368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8370 {
8371   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8372                                                      __I,
8373                                                      (__v16hi)
8374                                                      _mm256_setzero_si256 (),
8375                                                      (__mmask16) __W);
8376 }
8377 #else
8378 #define _mm512_cvt_roundps_pd(A, B)              \
8379     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
8380
8381 #define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
8382     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8383
8384 #define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
8385     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8386
8387 #define _mm512_cvt_roundph_ps(A, B)              \
8388     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8389
8390 #define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
8391     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8392
8393 #define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
8394     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8395
8396 #define _mm512_cvt_roundps_ph(A, I)                                              \
8397   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8398     (__v16hi)_mm256_undefined_si256 (), -1))
8399 #define _mm512_cvtps_ph(A, I)                                            \
8400   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8401     (__v16hi)_mm256_undefined_si256 (), -1))
8402 #define _mm512_mask_cvt_roundps_ph(U, W, A, I)                           \
8403   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8404     (__v16hi)(__m256i)(U), (__mmask16) (W)))
8405 #define _mm512_mask_cvtps_ph(U, W, A, I)                                 \
8406   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8407     (__v16hi)(__m256i)(U), (__mmask16) (W)))
8408 #define _mm512_maskz_cvt_roundps_ph(W, A, I)                                     \
8409   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8410     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8411 #define _mm512_maskz_cvtps_ph(W, A, I)                                   \
8412   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8413     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8414 #endif
8415
8416 #ifdef __OPTIMIZE__
8417 extern __inline __m256
8418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8419 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8420 {
8421   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8422                                                    (__v8sf)
8423                                                    _mm256_undefined_ps (),
8424                                                    (__mmask8) -1, __R);
8425 }
8426
8427 extern __inline __m256
8428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8430                             const int __R)
8431 {
8432   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8433                                                    (__v8sf) __W,
8434                                                    (__mmask8) __U, __R);
8435 }
8436
8437 extern __inline __m256
8438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8440 {
8441   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8442                                                    (__v8sf)
8443                                                    _mm256_setzero_ps (),
8444                                                    (__mmask8) __U, __R);
8445 }
8446
8447 extern __inline __m128
8448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8450 {
8451   return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8452                                                  (__v2df) __B,
8453                                                  __R);
8454 }
8455
8456 extern __inline __m128d
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8459 {
8460   return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8461                                                   (__v4sf) __B,
8462                                                   __R);
8463 }
8464 #else
8465 #define _mm512_cvt_roundpd_ps(A, B)              \
8466     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8467
8468 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
8469     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8470
8471 #define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
8472     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8473
8474 #define _mm_cvt_roundsd_ss(A, B, C)              \
8475     (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8476
8477 #define _mm_cvt_roundss_sd(A, B, C)              \
8478     (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8479 #endif
8480
8481 extern __inline void
8482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8483 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8484 {
8485   __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8486 }
8487
8488 extern __inline void
8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490 _mm512_stream_ps (float *__P, __m512 __A)
8491 {
8492   __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8493 }
8494
8495 extern __inline void
8496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8497 _mm512_stream_pd (double *__P, __m512d __A)
8498 {
8499   __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8500 }
8501
8502 extern __inline __m512i
8503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8504 _mm512_stream_load_si512 (void *__P)
8505 {
8506   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8507 }
8508
8509 /* Constants for mantissa extraction */
8510 typedef enum
8511 {
8512   _MM_MANT_NORM_1_2,            /* interval [1, 2)      */
8513   _MM_MANT_NORM_p5_2,           /* interval [0.5, 2)    */
8514   _MM_MANT_NORM_p5_1,           /* interval [0.5, 1)    */
8515   _MM_MANT_NORM_p75_1p5         /* interval [0.75, 1.5) */
8516 } _MM_MANTISSA_NORM_ENUM;
8517
8518 typedef enum
8519 {
8520   _MM_MANT_SIGN_src,            /* sign = sign(SRC)     */
8521   _MM_MANT_SIGN_zero,           /* sign = 0             */
8522   _MM_MANT_SIGN_nan             /* DEST = NaN if sign(SRC) = 1 */
8523 } _MM_MANTISSA_SIGN_ENUM;
8524
8525 #ifdef __OPTIMIZE__
8526 extern __inline __m128
8527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8529 {
8530   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8531                                                     (__v4sf) __B,
8532                                                     __R);
8533 }
8534
8535 extern __inline __m128
8536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8537 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8538                           __m128 __B, const int __R)
8539 {
8540   return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8541                                                  (__v4sf) __B,
8542                                                  (__v4sf) __W,
8543                                                  (__mmask8) __U, __R);
8544 }
8545
8546 extern __inline __m128
8547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8548 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8549                            const int __R)
8550 {
8551   return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8552                                                  (__v4sf) __B,
8553                                                  (__v4sf)
8554                                                  _mm_setzero_ps (),
8555                                                  (__mmask8) __U, __R);
8556 }
8557
8558 extern __inline __m128d
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8561 {
8562   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8563                                                      (__v2df) __B,
8564                                                      __R);
8565 }
8566
8567 extern __inline __m128d
8568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8569 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8570                           __m128d __B, const int __R)
8571 {
8572   return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8573                                                  (__v2df) __B,
8574                                                  (__v2df) __W,
8575                                                  (__mmask8) __U, __R);
8576 }
8577
8578 extern __inline __m128d
8579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8580 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8581                            const int __R)
8582 {
8583   return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8584                                                  (__v2df) __B,
8585                                                  (__v2df)
8586                                                  _mm_setzero_pd (),
8587                                                  (__mmask8) __U, __R);
8588 }
8589
8590 extern __inline __m512
8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592 _mm512_getexp_round_ps (__m512 __A, const int __R)
8593 {
8594   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8595                                                    (__v16sf)
8596                                                    _mm512_undefined_ps (),
8597                                                    (__mmask16) -1, __R);
8598 }
8599
8600 extern __inline __m512
8601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8602 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8603                              const int __R)
8604 {
8605   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8606                                                    (__v16sf) __W,
8607                                                    (__mmask16) __U, __R);
8608 }
8609
8610 extern __inline __m512
8611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8612 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8613 {
8614   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8615                                                    (__v16sf)
8616                                                    _mm512_setzero_ps (),
8617                                                    (__mmask16) __U, __R);
8618 }
8619
8620 extern __inline __m512d
8621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8622 _mm512_getexp_round_pd (__m512d __A, const int __R)
8623 {
8624   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8625                                                     (__v8df)
8626                                                     _mm512_undefined_pd (),
8627                                                     (__mmask8) -1, __R);
8628 }
8629
8630 extern __inline __m512d
8631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8632 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8633                              const int __R)
8634 {
8635   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8636                                                     (__v8df) __W,
8637                                                     (__mmask8) __U, __R);
8638 }
8639
8640 extern __inline __m512d
8641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8643 {
8644   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8645                                                     (__v8df)
8646                                                     _mm512_setzero_pd (),
8647                                                     (__mmask8) __U, __R);
8648 }
8649
8650 extern __inline __m512d
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8653                          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8654 {
8655   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8656                                                      (__C << 2) | __B,
8657                                                      _mm512_undefined_pd (),
8658                                                      (__mmask8) -1, __R);
8659 }
8660
8661 extern __inline __m512d
8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8664                               _MM_MANTISSA_NORM_ENUM __B,
8665                               _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8666 {
8667   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8668                                                      (__C << 2) | __B,
8669                                                      (__v8df) __W, __U,
8670                                                      __R);
8671 }
8672
8673 extern __inline __m512d
8674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8675 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8676                                _MM_MANTISSA_NORM_ENUM __B,
8677                                _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8678 {
8679   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8680                                                      (__C << 2) | __B,
8681                                                      (__v8df)
8682                                                      _mm512_setzero_pd (),
8683                                                      __U, __R);
8684 }
8685
8686 extern __inline __m512
8687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8688 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8689                          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8690 {
8691   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8692                                                     (__C << 2) | __B,
8693                                                     _mm512_undefined_ps (),
8694                                                     (__mmask16) -1, __R);
8695 }
8696
8697 extern __inline __m512
8698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8699 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8700                               _MM_MANTISSA_NORM_ENUM __B,
8701                               _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8702 {
8703   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8704                                                     (__C << 2) | __B,
8705                                                     (__v16sf) __W, __U,
8706                                                     __R);
8707 }
8708
8709 extern __inline __m512
8710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8711 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8712                                _MM_MANTISSA_NORM_ENUM __B,
8713                                _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8714 {
8715   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8716                                                     (__C << 2) | __B,
8717                                                     (__v16sf)
8718                                                     _mm512_setzero_ps (),
8719                                                     __U, __R);
8720 }
8721
8722 extern __inline __m128d
8723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8724 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8725                       _MM_MANTISSA_NORM_ENUM __C,
8726                       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8727 {
8728   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8729                                                   (__v2df) __B,
8730                                                   (__D << 2) | __C,
8731                                                    __R);
8732 }
8733
8734 extern __inline __m128d
8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8737                               __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8738                               _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8739 {
8740   return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8741                                                     (__v2df) __B,
8742                                                     (__D << 2) | __C,
8743                                                     (__v2df) __W,
8744                                                      __U, __R);
8745 }
8746
8747 extern __inline __m128d
8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8750                                _MM_MANTISSA_NORM_ENUM __C,
8751                                _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8752 {
8753   return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8754                                                         (__v2df) __B,
8755                                                         (__D << 2) | __C,
8756                                                         (__v2df)
8757                                                         _mm_setzero_pd(),
8758                                                         __U, __R);
8759 }
8760
8761 extern __inline __m128
8762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8763 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8764                       _MM_MANTISSA_NORM_ENUM __C,
8765                       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8766 {
8767   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8768                                                   (__v4sf) __B,
8769                                                   (__D << 2) | __C,
8770                                                   __R);
8771 }
8772
8773 extern __inline __m128
8774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8776                               __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8777                               _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8778 {
8779   return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8780                                                     (__v4sf) __B,
8781                                                     (__D << 2) | __C,
8782                                                     (__v4sf) __W,
8783                                                      __U, __R);
8784 }
8785
8786 extern __inline __m128
8787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8788 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8789                                _MM_MANTISSA_NORM_ENUM __C,
8790                                _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8791 {
8792   return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8793                                                         (__v4sf) __B,
8794                                                         (__D << 2) | __C,
8795                                                         (__v4sf)
8796                                                         _mm_setzero_ps(),
8797                                                         __U, __R);
8798 }
8799
8800 #else
8801 #define _mm512_getmant_round_pd(X, B, C, R)                                                  \
8802   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8803                                               (int)(((C)<<2) | (B)),                \
8804                                               (__v8df)(__m512d)_mm512_undefined_pd(), \
8805                                               (__mmask8)-1,\
8806                                               (R)))
8807
8808 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
8809   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8810                                               (int)(((C)<<2) | (B)),                \
8811                                               (__v8df)(__m512d)(W),                 \
8812                                               (__mmask8)(U),\
8813                                               (R)))
8814
8815 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
8816   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8817                                               (int)(((C)<<2) | (B)),                \
8818                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
8819                                               (__mmask8)(U),\
8820                                               (R)))
8821 #define _mm512_getmant_round_ps(X, B, C, R)                                                  \
8822   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8823                                              (int)(((C)<<2) | (B)),                 \
8824                                              (__v16sf)(__m512)_mm512_undefined_ps(), \
8825                                              (__mmask16)-1,\
8826                                              (R)))
8827
8828 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
8829   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8830                                              (int)(((C)<<2) | (B)),                 \
8831                                              (__v16sf)(__m512)(W),                  \
8832                                              (__mmask16)(U),\
8833                                              (R)))
8834
8835 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
8836   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8837                                              (int)(((C)<<2) | (B)),                 \
8838                                              (__v16sf)(__m512)_mm512_setzero_ps(),  \
8839                                              (__mmask16)(U),\
8840                                              (R)))
8841 #define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
8842   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
8843                                             (__v2df)(__m128d)(Y),       \
8844                                             (int)(((D)<<2) | (C)),      \
8845                                             (R)))
8846
8847 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R)                                       \
8848   ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X),                  \
8849                                              (__v2df)(__m128d)(Y),                  \
8850                                              (int)(((D)<<2) | (C)),                 \
8851                                              (__v2df)(__m128d)(W),                   \
8852                                              (__mmask8)(U),\
8853                                              (R)))
8854
8855 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R)                                         \
8856   ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X),                  \
8857                                                  (__v2df)(__m128d)(Y),                  \
8858                                              (int)(((D)<<2) | (C)),              \
8859                                              (__v2df)(__m128d)_mm_setzero_pd(),  \
8860                                              (__mmask8)(U),\
8861                                              (R)))
8862
8863 #define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
8864   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
8865                                            (__v4sf)(__m128)(Y),         \
8866                                            (int)(((D)<<2) | (C)),       \
8867                                            (R)))
8868
8869 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R)                                       \
8870   ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X),                  \
8871                                              (__v4sf)(__m128)(Y),                  \
8872                                              (int)(((D)<<2) | (C)),                 \
8873                                              (__v4sf)(__m128)(W),                   \
8874                                              (__mmask8)(U),\
8875                                              (R)))
8876
8877 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R)                                         \
8878   ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X),                  \
8879                                                  (__v4sf)(__m128)(Y),                  \
8880                                              (int)(((D)<<2) | (C)),              \
8881                                              (__v4sf)(__m128)_mm_setzero_ps(),  \
8882                                              (__mmask8)(U),\
8883                                              (R)))
8884
8885 #define _mm_getexp_round_ss(A, B, R)                                                  \
8886   ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8887
8888 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
8889     (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
8890
8891 #define _mm_maskz_getexp_round_ss(U, A, B, C)   \
8892     (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
8893
8894 #define _mm_getexp_round_sd(A, B, R)                                                   \
8895   ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8896
8897 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
8898     (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
8899
8900 #define _mm_maskz_getexp_round_sd(U, A, B, C)   \
8901     (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
8902
8903
8904 #define _mm512_getexp_round_ps(A, R)                                            \
8905   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8906   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8907
8908 #define _mm512_mask_getexp_round_ps(W, U, A, R)                                 \
8909   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8910   (__v16sf)(__m512)(W), (__mmask16)(U), R))
8911
8912 #define _mm512_maskz_getexp_round_ps(U, A, R)                                   \
8913   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8914   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8915
8916 #define _mm512_getexp_round_pd(A, R)                                            \
8917   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8918   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8919
8920 #define _mm512_mask_getexp_round_pd(W, U, A, R)                                 \
8921   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8922   (__v8df)(__m512d)(W), (__mmask8)(U), R))
8923
8924 #define _mm512_maskz_getexp_round_pd(U, A, R)                                   \
8925   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8926   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8927 #endif
8928
8929 #ifdef __OPTIMIZE__
8930 extern __inline __m512
8931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8933 {
8934   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8935                                                   (__v16sf)
8936                                                   _mm512_undefined_ps (),
8937                                                   -1, __R);
8938 }
8939
8940 extern __inline __m512
8941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8942 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8943                                  const int __imm, const int __R)
8944 {
8945   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8946                                                   (__v16sf) __A,
8947                                                   (__mmask16) __B, __R);
8948 }
8949
8950 extern __inline __m512
8951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8952 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8953                                   const int __imm, const int __R)
8954 {
8955   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8956                                                   __imm,
8957                                                   (__v16sf)
8958                                                   _mm512_setzero_ps (),
8959                                                   (__mmask16) __A, __R);
8960 }
8961
8962 extern __inline __m512d
8963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8964 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8965 {
8966   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8967                                                    (__v8df)
8968                                                    _mm512_undefined_pd (),
8969                                                    -1, __R);
8970 }
8971
8972 extern __inline __m512d
8973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8974 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8975                                  __m512d __C, const int __imm, const int __R)
8976 {
8977   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8978                                                    (__v8df) __A,
8979                                                    (__mmask8) __B, __R);
8980 }
8981
8982 extern __inline __m512d
8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8985                                   const int __imm, const int __R)
8986 {
8987   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8988                                                    __imm,
8989                                                    (__v8df)
8990                                                    _mm512_setzero_pd (),
8991                                                    (__mmask8) __A, __R);
8992 }
8993
8994 extern __inline __m128
8995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8996 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8997 {
8998   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8999                                                    (__v4sf) __B, __imm, __R);
9000 }
9001
9002 extern __inline __m128d
9003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9004 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9005                          const int __R)
9006 {
9007   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
9008                                                     (__v2df) __B, __imm, __R);
9009 }
9010
9011 #else
9012 #define _mm512_roundscale_round_ps(A, B, R) \
9013   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
9014     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
9015 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R)                          \
9016   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),       \
9017                                             (int)(D),                   \
9018                                             (__v16sf)(__m512)(A),       \
9019                                             (__mmask16)(B), R))
9020 #define _mm512_maskz_roundscale_round_ps(A, B, C, R)                            \
9021   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),       \
9022                                             (int)(C),                   \
9023                                             (__v16sf)_mm512_setzero_ps(),\
9024                                             (__mmask16)(A), R))
9025 #define _mm512_roundscale_round_pd(A, B, R) \
9026   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
9027     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
9028 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R)                          \
9029   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),      \
9030                                              (int)(D),                  \
9031                                              (__v8df)(__m512d)(A),      \
9032                                              (__mmask8)(B), R))
9033 #define _mm512_maskz_roundscale_round_pd(A, B, C, R)                            \
9034   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),      \
9035                                              (int)(C),                  \
9036                                              (__v8df)_mm512_setzero_pd(),\
9037                                              (__mmask8)(A), R))
9038 #define _mm_roundscale_round_ss(A, B, C, R)                                     \
9039   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),       \
9040     (__v4sf)(__m128)(B), (int)(C), R))
9041 #define _mm_roundscale_round_sd(A, B, C, R)                                     \
9042   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),     \
9043     (__v2df)(__m128d)(B), (int)(C), R))
9044 #endif
9045
9046 extern __inline __m512
9047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9048 _mm512_floor_ps (__m512 __A)
9049 {
9050   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9051                                                   _MM_FROUND_FLOOR,
9052                                                   (__v16sf) __A, -1,
9053                                                   _MM_FROUND_CUR_DIRECTION);
9054 }
9055
9056 extern __inline __m512d
9057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9058 _mm512_floor_pd (__m512d __A)
9059 {
9060   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9061                                                    _MM_FROUND_FLOOR,
9062                                                    (__v8df) __A, -1,
9063                                                    _MM_FROUND_CUR_DIRECTION);
9064 }
9065
9066 extern __inline __m512
9067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9068 _mm512_ceil_ps (__m512 __A)
9069 {
9070   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9071                                                   _MM_FROUND_CEIL,
9072                                                   (__v16sf) __A, -1,
9073                                                   _MM_FROUND_CUR_DIRECTION);
9074 }
9075
9076 extern __inline __m512d
9077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9078 _mm512_ceil_pd (__m512d __A)
9079 {
9080   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9081                                                    _MM_FROUND_CEIL,
9082                                                    (__v8df) __A, -1,
9083                                                    _MM_FROUND_CUR_DIRECTION);
9084 }
9085
9086 extern __inline __m512
9087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9088 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9089 {
9090   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9091                                                   _MM_FROUND_FLOOR,
9092                                                   (__v16sf) __W, __U,
9093                                                   _MM_FROUND_CUR_DIRECTION);
9094 }
9095
9096 extern __inline __m512d
9097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9099 {
9100   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9101                                                    _MM_FROUND_FLOOR,
9102                                                    (__v8df) __W, __U,
9103                                                    _MM_FROUND_CUR_DIRECTION);
9104 }
9105
9106 extern __inline __m512
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9109 {
9110   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9111                                                   _MM_FROUND_CEIL,
9112                                                   (__v16sf) __W, __U,
9113                                                   _MM_FROUND_CUR_DIRECTION);
9114 }
9115
9116 extern __inline __m512d
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9119 {
9120   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9121                                                    _MM_FROUND_CEIL,
9122                                                    (__v8df) __W, __U,
9123                                                    _MM_FROUND_CUR_DIRECTION);
9124 }
9125
9126 #ifdef __OPTIMIZE__
9127 extern __inline __m512i
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9130 {
9131   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9132                                                   (__v16si) __B, __imm,
9133                                                   (__v16si)
9134                                                   _mm512_undefined_epi32 (),
9135                                                   (__mmask16) -1);
9136 }
9137
9138 extern __inline __m512i
9139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9141                           __m512i __B, const int __imm)
9142 {
9143   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9144                                                   (__v16si) __B, __imm,
9145                                                   (__v16si) __W,
9146                                                   (__mmask16) __U);
9147 }
9148
9149 extern __inline __m512i
9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9151 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9152                            const int __imm)
9153 {
9154   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9155                                                   (__v16si) __B, __imm,
9156                                                   (__v16si)
9157                                                   _mm512_setzero_si512 (),
9158                                                   (__mmask16) __U);
9159 }
9160
9161 extern __inline __m512i
9162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9163 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9164 {
9165   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9166                                                   (__v8di) __B, __imm,
9167                                                   (__v8di)
9168                                                   _mm512_undefined_epi32 (),
9169                                                   (__mmask8) -1);
9170 }
9171
9172 extern __inline __m512i
9173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9174 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9175                           __m512i __B, const int __imm)
9176 {
9177   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9178                                                   (__v8di) __B, __imm,
9179                                                   (__v8di) __W,
9180                                                   (__mmask8) __U);
9181 }
9182
9183 extern __inline __m512i
9184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9185 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9186                            const int __imm)
9187 {
9188   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9189                                                   (__v8di) __B, __imm,
9190                                                   (__v8di)
9191                                                   _mm512_setzero_si512 (),
9192                                                   (__mmask8) __U);
9193 }
9194 #else
9195 #define _mm512_alignr_epi32(X, Y, C)                                        \
9196     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
9197         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
9198         (__mmask16)-1))
9199
9200 #define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
9201     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
9202         (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
9203         (__mmask16)(U)))
9204
9205 #define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
9206     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
9207         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
9208         (__mmask16)(U)))
9209
9210 #define _mm512_alignr_epi64(X, Y, C)                                        \
9211     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
9212         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (),  \
9213         (__mmask8)-1))
9214
9215 #define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
9216     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
9217         (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9218
9219 #define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
9220     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
9221         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
9222         (__mmask8)(U)))
9223 #endif
9224
9225 extern __inline __mmask16
9226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9227 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9228 {
9229   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9230                                                      (__v16si) __B,
9231                                                      (__mmask16) -1);
9232 }
9233
9234 extern __inline __mmask16
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9237 {
9238   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9239                                                      (__v16si) __B, __U);
9240 }
9241
9242 extern __inline __mmask8
9243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9244 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9245 {
9246   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9247                                                     (__v8di) __B, __U);
9248 }
9249
9250 extern __inline __mmask8
9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9253 {
9254   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9255                                                     (__v8di) __B,
9256                                                     (__mmask8) -1);
9257 }
9258
9259 extern __inline __mmask16
9260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9261 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9262 {
9263   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9264                                                      (__v16si) __B,
9265                                                      (__mmask16) -1);
9266 }
9267
9268 extern __inline __mmask16
9269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9270 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9271 {
9272   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9273                                                      (__v16si) __B, __U);
9274 }
9275
9276 extern __inline __mmask8
9277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9278 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9279 {
9280   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9281                                                     (__v8di) __B, __U);
9282 }
9283
9284 extern __inline __mmask8
9285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9286 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9287 {
9288   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9289                                                     (__v8di) __B,
9290                                                     (__mmask8) -1);
9291 }
9292
9293 extern __inline __mmask16
9294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9295 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9296 {
9297   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9298                                                     (__v16si) __Y, 5,
9299                                                     (__mmask16) -1);
9300 }
9301
9302 extern __inline __mmask16
9303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9304 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9305 {
9306   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9307                                                     (__v16si) __Y, 5,
9308                                                     (__mmask16) __M);
9309 }
9310
9311 extern __inline __mmask16
9312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9313 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9314 {
9315   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9316                                                     (__v16si) __Y, 5,
9317                                                     (__mmask16) __M);
9318 }
9319
9320 extern __inline __mmask16
9321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9322 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9323 {
9324   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9325                                                     (__v16si) __Y, 5,
9326                                                     (__mmask16) -1);
9327 }
9328
9329 extern __inline __mmask8
9330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9331 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9332 {
9333   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9334                                                     (__v8di) __Y, 5,
9335                                                     (__mmask8) __M);
9336 }
9337
9338 extern __inline __mmask8
9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9340 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9341 {
9342   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9343                                                     (__v8di) __Y, 5,
9344                                                     (__mmask8) -1);
9345 }
9346
9347 extern __inline __mmask8
9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9350 {
9351   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9352                                                     (__v8di) __Y, 5,
9353                                                     (__mmask8) __M);
9354 }
9355
9356 extern __inline __mmask8
9357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9358 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9359 {
9360   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9361                                                     (__v8di) __Y, 5,
9362                                                     (__mmask8) -1);
9363 }
9364
9365 extern __inline __mmask16
9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9367 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9368 {
9369   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9370                                                     (__v16si) __Y, 2,
9371                                                     (__mmask16) __M);
9372 }
9373
9374 extern __inline __mmask16
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9377 {
9378   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9379                                                     (__v16si) __Y, 2,
9380                                                     (__mmask16) -1);
9381 }
9382
9383 extern __inline __mmask16
9384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9385 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9386 {
9387   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9388                                                     (__v16si) __Y, 2,
9389                                                     (__mmask16) __M);
9390 }
9391
9392 extern __inline __mmask16
9393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9394 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9395 {
9396   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9397                                                     (__v16si) __Y, 2,
9398                                                     (__mmask16) -1);
9399 }
9400
9401 extern __inline __mmask8
9402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9403 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9404 {
9405   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9406                                                     (__v8di) __Y, 2,
9407                                                     (__mmask8) __M);
9408 }
9409
9410 extern __inline __mmask8
9411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9412 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9413 {
9414   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9415                                                     (__v8di) __Y, 2,
9416                                                     (__mmask8) -1);
9417 }
9418
9419 extern __inline __mmask8
9420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9421 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9422 {
9423   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9424                                                     (__v8di) __Y, 2,
9425                                                     (__mmask8) __M);
9426 }
9427
9428 extern __inline __mmask8
9429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9430 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9431 {
9432   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9433                                                     (__v8di) __Y, 2,
9434                                                     (__mmask8) -1);
9435 }
9436
9437 extern __inline __mmask16
9438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9439 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9440 {
9441   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9442                                                     (__v16si) __Y, 1,
9443                                                     (__mmask16) __M);
9444 }
9445
9446 extern __inline __mmask16
9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9449 {
9450   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9451                                                     (__v16si) __Y, 1,
9452                                                     (__mmask16) -1);
9453 }
9454
9455 extern __inline __mmask16
9456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9457 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9458 {
9459   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9460                                                     (__v16si) __Y, 1,
9461                                                     (__mmask16) __M);
9462 }
9463
9464 extern __inline __mmask16
9465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9467 {
9468   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9469                                                     (__v16si) __Y, 1,
9470                                                     (__mmask16) -1);
9471 }
9472
9473 extern __inline __mmask8
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9476 {
9477   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9478                                                     (__v8di) __Y, 1,
9479                                                     (__mmask8) __M);
9480 }
9481
9482 extern __inline __mmask8
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9485 {
9486   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9487                                                     (__v8di) __Y, 1,
9488                                                     (__mmask8) -1);
9489 }
9490
9491 extern __inline __mmask8
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9494 {
9495   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9496                                                     (__v8di) __Y, 1,
9497                                                     (__mmask8) __M);
9498 }
9499
9500 extern __inline __mmask8
9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9503 {
9504   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9505                                                     (__v8di) __Y, 1,
9506                                                     (__mmask8) -1);
9507 }
9508
9509 extern __inline __mmask16
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9512 {
9513   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9514                                                     (__v16si) __Y, 4,
9515                                                     (__mmask16) -1);
9516 }
9517
9518 extern __inline __mmask16
9519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9521 {
9522   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9523                                                     (__v16si) __Y, 4,
9524                                                     (__mmask16) __M);
9525 }
9526
9527 extern __inline __mmask16
9528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9530 {
9531   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9532                                                     (__v16si) __Y, 4,
9533                                                     (__mmask16) __M);
9534 }
9535
9536 extern __inline __mmask16
9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9539 {
9540   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9541                                                     (__v16si) __Y, 4,
9542                                                     (__mmask16) -1);
9543 }
9544
9545 extern __inline __mmask8
9546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9547 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9548 {
9549   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9550                                                     (__v8di) __Y, 4,
9551                                                     (__mmask8) __M);
9552 }
9553
9554 extern __inline __mmask8
9555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9556 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9557 {
9558   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9559                                                     (__v8di) __Y, 4,
9560                                                     (__mmask8) -1);
9561 }
9562
9563 extern __inline __mmask8
9564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9565 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9566 {
9567   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9568                                                     (__v8di) __Y, 4,
9569                                                     (__mmask8) __M);
9570 }
9571
9572 extern __inline __mmask8
9573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9574 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9575 {
9576   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9577                                                     (__v8di) __Y, 4,
9578                                                     (__mmask8) -1);
9579 }
9580
9581 #define _MM_CMPINT_EQ       0x0
9582 #define _MM_CMPINT_LT       0x1
9583 #define _MM_CMPINT_LE       0x2
9584 #define _MM_CMPINT_UNUSED   0x3
9585 #define _MM_CMPINT_NE       0x4
9586 #define _MM_CMPINT_NLT      0x5
9587 #define _MM_CMPINT_GE       0x5
9588 #define _MM_CMPINT_NLE      0x6
9589 #define _MM_CMPINT_GT       0x6
9590
9591 #ifdef __OPTIMIZE__
9592 extern __inline __mmask16
9593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9594 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9595 {
9596   return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9597                                                 (__mmask8) __B);
9598 }
9599
9600 extern __inline __mmask16
9601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9602 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9603 {
9604   return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9605                                                 (__mmask8) __B);
9606 }
9607
9608 extern __inline __mmask8
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9611 {
9612   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9613                                                  (__v8di) __Y, __P,
9614                                                  (__mmask8) -1);
9615 }
9616
9617 extern __inline __mmask16
9618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9619 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9620 {
9621   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9622                                                   (__v16si) __Y, __P,
9623                                                   (__mmask16) -1);
9624 }
9625
9626 extern __inline __mmask8
9627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9628 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9629 {
9630   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9631                                                   (__v8di) __Y, __P,
9632                                                   (__mmask8) -1);
9633 }
9634
9635 extern __inline __mmask16
9636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9637 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9638 {
9639   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9640                                                    (__v16si) __Y, __P,
9641                                                    (__mmask16) -1);
9642 }
9643
9644 extern __inline __mmask8
9645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9647                           const int __R)
9648 {
9649   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9650                                                   (__v8df) __Y, __P,
9651                                                   (__mmask8) -1, __R);
9652 }
9653
9654 extern __inline __mmask16
9655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9656 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9657 {
9658   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9659                                                    (__v16sf) __Y, __P,
9660                                                    (__mmask16) -1, __R);
9661 }
9662
9663 extern __inline __mmask8
9664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9665 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9666                             const int __P)
9667 {
9668   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9669                                                  (__v8di) __Y, __P,
9670                                                  (__mmask8) __U);
9671 }
9672
9673 extern __inline __mmask16
9674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9675 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9676                             const int __P)
9677 {
9678   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9679                                                   (__v16si) __Y, __P,
9680                                                   (__mmask16) __U);
9681 }
9682
9683 extern __inline __mmask8
9684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9685 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9686                             const int __P)
9687 {
9688   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9689                                                   (__v8di) __Y, __P,
9690                                                   (__mmask8) __U);
9691 }
9692
9693 extern __inline __mmask16
9694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9695 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9696                             const int __P)
9697 {
9698   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9699                                                    (__v16si) __Y, __P,
9700                                                    (__mmask16) __U);
9701 }
9702
9703 extern __inline __mmask8
9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9706                                const int __P, const int __R)
9707 {
9708   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9709                                                   (__v8df) __Y, __P,
9710                                                   (__mmask8) __U, __R);
9711 }
9712
9713 extern __inline __mmask16
9714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9716                                const int __P, const int __R)
9717 {
9718   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9719                                                    (__v16sf) __Y, __P,
9720                                                    (__mmask16) __U, __R);
9721 }
9722
9723 extern __inline __mmask8
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9726 {
9727   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9728                                                (__v2df) __Y, __P,
9729                                                (__mmask8) -1, __R);
9730 }
9731
9732 extern __inline __mmask8
9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9735                             const int __P, const int __R)
9736 {
9737   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9738                                                (__v2df) __Y, __P,
9739                                                (__mmask8) __M, __R);
9740 }
9741
9742 extern __inline __mmask8
9743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9744 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9745 {
9746   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9747                                                (__v4sf) __Y, __P,
9748                                                (__mmask8) -1, __R);
9749 }
9750
9751 extern __inline __mmask8
9752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9753 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9754                             const int __P, const int __R)
9755 {
9756   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9757                                                (__v4sf) __Y, __P,
9758                                                (__mmask8) __M, __R);
9759 }
9760
9761 #else
9762 #define _kshiftli_mask16(X, Y)                                          \
9763   ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9764
9765 #define _kshiftri_mask16(X, Y)                                          \
9766   ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9767
9768 #define _mm512_cmp_epi64_mask(X, Y, P)                                  \
9769   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),        \
9770                                            (__v8di)(__m512i)(Y), (int)(P),\
9771                                            (__mmask8)-1))
9772
9773 #define _mm512_cmp_epi32_mask(X, Y, P)                                  \
9774   ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),      \
9775                                             (__v16si)(__m512i)(Y), (int)(P), \
9776                                             (__mmask16)-1))
9777
9778 #define _mm512_cmp_epu64_mask(X, Y, P)                                  \
9779   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),       \
9780                                             (__v8di)(__m512i)(Y), (int)(P),\
9781                                             (__mmask8)-1))
9782
9783 #define _mm512_cmp_epu32_mask(X, Y, P)                                  \
9784   ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),     \
9785                                              (__v16si)(__m512i)(Y), (int)(P), \
9786                                              (__mmask16)-1))
9787
9788 #define _mm512_cmp_round_pd_mask(X, Y, P, R)                            \
9789   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
9790                                             (__v8df)(__m512d)(Y), (int)(P),\
9791                                             (__mmask8)-1, R))
9792
9793 #define _mm512_cmp_round_ps_mask(X, Y, P, R)                            \
9794   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
9795                                              (__v16sf)(__m512)(Y), (int)(P),\
9796                                              (__mmask16)-1, R))
9797
9798 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P)                          \
9799   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),        \
9800                                            (__v8di)(__m512i)(Y), (int)(P),\
9801                                            (__mmask8)M))
9802
9803 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P)                          \
9804   ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),      \
9805                                             (__v16si)(__m512i)(Y), (int)(P), \
9806                                             (__mmask16)M))
9807
9808 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P)                          \
9809   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),       \
9810                                             (__v8di)(__m512i)(Y), (int)(P),\
9811                                             (__mmask8)M))
9812
9813 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P)                          \
9814   ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),     \
9815                                              (__v16si)(__m512i)(Y), (int)(P), \
9816                                              (__mmask16)M))
9817
9818 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)                    \
9819   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
9820                                             (__v8df)(__m512d)(Y), (int)(P),\
9821                                             (__mmask8)M, R))
9822
9823 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)                    \
9824   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
9825                                              (__v16sf)(__m512)(Y), (int)(P),\
9826                                              (__mmask16)M, R))
9827
9828 #define _mm_cmp_round_sd_mask(X, Y, P, R)                               \
9829   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
9830                                          (__v2df)(__m128d)(Y), (int)(P),\
9831                                          (__mmask8)-1, R))
9832
9833 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)                       \
9834   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
9835                                          (__v2df)(__m128d)(Y), (int)(P),\
9836                                          (M), R))
9837
9838 #define _mm_cmp_round_ss_mask(X, Y, P, R)                               \
9839   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
9840                                          (__v4sf)(__m128)(Y), (int)(P), \
9841                                          (__mmask8)-1, R))
9842
9843 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)                       \
9844   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
9845                                          (__v4sf)(__m128)(Y), (int)(P), \
9846                                          (M), R))
9847 #endif
9848
9849 #ifdef __OPTIMIZE__
9850 extern __inline __m512
9851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9852 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9853 {
9854   __m512 __v1_old = _mm512_undefined_ps ();
9855   __mmask16 __mask = 0xFFFF;
9856
9857   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9858                                                 __addr,
9859                                                 (__v16si) __index,
9860                                                 __mask, __scale);
9861 }
9862
9863 extern __inline __m512
9864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9865 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9866                           __m512i __index, void const *__addr, int __scale)
9867 {
9868   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9869                                                 __addr,
9870                                                 (__v16si) __index,
9871                                                 __mask, __scale);
9872 }
9873
9874 extern __inline __m512d
9875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9876 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9877 {
9878   __m512d __v1_old = _mm512_undefined_pd ();
9879   __mmask8 __mask = 0xFF;
9880
9881   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9882                                                 __addr,
9883                                                 (__v8si) __index, __mask,
9884                                                 __scale);
9885 }
9886
9887 extern __inline __m512d
9888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9890                           __m256i __index, void const *__addr, int __scale)
9891 {
9892   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9893                                                 __addr,
9894                                                 (__v8si) __index,
9895                                                 __mask, __scale);
9896 }
9897
9898 extern __inline __m256
9899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9900 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9901 {
9902   __m256 __v1_old = _mm256_undefined_ps ();
9903   __mmask8 __mask = 0xFF;
9904
9905   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9906                                                 __addr,
9907                                                 (__v8di) __index, __mask,
9908                                                 __scale);
9909 }
9910
9911 extern __inline __m256
9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9913 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9914                           __m512i __index, void const *__addr, int __scale)
9915 {
9916   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9917                                                 __addr,
9918                                                 (__v8di) __index,
9919                                                 __mask, __scale);
9920 }
9921
9922 extern __inline __m512d
9923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9924 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9925 {
9926   __m512d __v1_old = _mm512_undefined_pd ();
9927   __mmask8 __mask = 0xFF;
9928
9929   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9930                                                 __addr,
9931                                                 (__v8di) __index, __mask,
9932                                                 __scale);
9933 }
9934
9935 extern __inline __m512d
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9938                           __m512i __index, void const *__addr, int __scale)
9939 {
9940   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9941                                                 __addr,
9942                                                 (__v8di) __index,
9943                                                 __mask, __scale);
9944 }
9945
9946 extern __inline __m512i
9947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9948 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9949 {
9950   __m512i __v1_old = _mm512_undefined_epi32 ();
9951   __mmask16 __mask = 0xFFFF;
9952
9953   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9954                                                  __addr,
9955                                                  (__v16si) __index,
9956                                                  __mask, __scale);
9957 }
9958
9959 extern __inline __m512i
9960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9961 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9962                              __m512i __index, void const *__addr, int __scale)
9963 {
9964   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9965                                                  __addr,
9966                                                  (__v16si) __index,
9967                                                  __mask, __scale);
9968 }
9969
9970 extern __inline __m512i
9971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9972 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9973 {
9974   __m512i __v1_old = _mm512_undefined_epi32 ();
9975   __mmask8 __mask = 0xFF;
9976
9977   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9978                                                 __addr,
9979                                                 (__v8si) __index, __mask,
9980                                                 __scale);
9981 }
9982
9983 extern __inline __m512i
9984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9985 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9986                              __m256i __index, void const *__addr,
9987                              int __scale)
9988 {
9989   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9990                                                 __addr,
9991                                                 (__v8si) __index,
9992                                                 __mask, __scale);
9993 }
9994
9995 extern __inline __m256i
9996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9997 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9998 {
9999   __m256i __v1_old = _mm256_undefined_si256 ();
10000   __mmask8 __mask = 0xFF;
10001
10002   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10003                                                  __addr,
10004                                                  (__v8di) __index,
10005                                                  __mask, __scale);
10006 }
10007
10008 extern __inline __m256i
10009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10010 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10011                              __m512i __index, void const *__addr, int __scale)
10012 {
10013   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10014                                                  __addr,
10015                                                  (__v8di) __index,
10016                                                  __mask, __scale);
10017 }
10018
10019 extern __inline __m512i
10020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10021 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
10022 {
10023   __m512i __v1_old = _mm512_undefined_epi32 ();
10024   __mmask8 __mask = 0xFF;
10025
10026   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10027                                                 __addr,
10028                                                 (__v8di) __index, __mask,
10029                                                 __scale);
10030 }
10031
10032 extern __inline __m512i
10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10034 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
10035                              __m512i __index, void const *__addr,
10036                              int __scale)
10037 {
10038   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10039                                                 __addr,
10040                                                 (__v8di) __index,
10041                                                 __mask, __scale);
10042 }
10043
10044 extern __inline void
10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10046 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
10047 {
10048   __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10049                                  (__v16si) __index, (__v16sf) __v1, __scale);
10050 }
10051
10052 extern __inline void
10053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10054 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
10055                            __m512i __index, __m512 __v1, int __scale)
10056 {
10057   __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10058                                  (__v16sf) __v1, __scale);
10059 }
10060
10061 extern __inline void
10062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10063 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
10064                       int __scale)
10065 {
10066   __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10067                                 (__v8si) __index, (__v8df) __v1, __scale);
10068 }
10069
10070 extern __inline void
10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
10073                            __m256i __index, __m512d __v1, int __scale)
10074 {
10075   __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10076                                 (__v8df) __v1, __scale);
10077 }
10078
10079 extern __inline void
10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
10082 {
10083   __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10084                                  (__v8di) __index, (__v8sf) __v1, __scale);
10085 }
10086
10087 extern __inline void
10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
10090                            __m512i __index, __m256 __v1, int __scale)
10091 {
10092   __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10093                                  (__v8sf) __v1, __scale);
10094 }
10095
10096 extern __inline void
10097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
10099                       int __scale)
10100 {
10101   __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10102                                 (__v8di) __index, (__v8df) __v1, __scale);
10103 }
10104
10105 extern __inline void
10106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10107 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
10108                            __m512i __index, __m512d __v1, int __scale)
10109 {
10110   __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10111                                 (__v8df) __v1, __scale);
10112 }
10113
10114 extern __inline void
10115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10116 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
10117                          __m512i __v1, int __scale)
10118 {
10119   __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10120                                  (__v16si) __index, (__v16si) __v1, __scale);
10121 }
10122
10123 extern __inline void
10124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10125 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
10126                               __m512i __index, __m512i __v1, int __scale)
10127 {
10128   __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10129                                  (__v16si) __v1, __scale);
10130 }
10131
10132 extern __inline void
10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
10135                          __m512i __v1, int __scale)
10136 {
10137   __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10138                                 (__v8si) __index, (__v8di) __v1, __scale);
10139 }
10140
10141 extern __inline void
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
10144                               __m256i __index, __m512i __v1, int __scale)
10145 {
10146   __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10147                                 (__v8di) __v1, __scale);
10148 }
10149
10150 extern __inline void
10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
10153                          __m256i __v1, int __scale)
10154 {
10155   __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10156                                  (__v8di) __index, (__v8si) __v1, __scale);
10157 }
10158
10159 extern __inline void
10160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10161 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
10162                               __m512i __index, __m256i __v1, int __scale)
10163 {
10164   __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10165                                  (__v8si) __v1, __scale);
10166 }
10167
10168 extern __inline void
10169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10170 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
10171                          __m512i __v1, int __scale)
10172 {
10173   __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10174                                 (__v8di) __index, (__v8di) __v1, __scale);
10175 }
10176
10177 extern __inline void
10178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
10180                               __m512i __index, __m512i __v1, int __scale)
10181 {
10182   __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10183                                 (__v8di) __v1, __scale);
10184 }
10185 #else
10186 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE)                         \
10187   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
10188                                          (void const *)ADDR,            \
10189                                          (__v16si)(__m512i)INDEX,       \
10190                                          (__mmask16)0xFFFF, (int)SCALE)
10191
10192 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)       \
10193   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,        \
10194                                          (void const *)ADDR,            \
10195                                          (__v16si)(__m512i)INDEX,       \
10196                                          (__mmask16)MASK, (int)SCALE)
10197
10198 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE)                         \
10199   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
10200                                          (void const *)ADDR,            \
10201                                          (__v8si)(__m256i)INDEX,        \
10202                                          (__mmask8)0xFF, (int)SCALE)
10203
10204 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)       \
10205   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,        \
10206                                          (void const *)ADDR,            \
10207                                          (__v8si)(__m256i)INDEX,        \
10208                                          (__mmask8)MASK, (int)SCALE)
10209
10210 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE)                         \
10211   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
10212                                          (void const *)ADDR,            \
10213                                          (__v8di)(__m512i)INDEX,        \
10214                                          (__mmask8)0xFF, (int)SCALE)
10215
10216 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)       \
10217   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,         \
10218                                          (void const *)ADDR,            \
10219                                          (__v8di)(__m512i)INDEX,        \
10220                                          (__mmask8)MASK, (int)SCALE)
10221
10222 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE)                         \
10223   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
10224                                          (void const *)ADDR,            \
10225                                          (__v8di)(__m512i)INDEX,        \
10226                                          (__mmask8)0xFF, (int)SCALE)
10227
10228 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)       \
10229   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,        \
10230                                          (void const *)ADDR,            \
10231                                          (__v8di)(__m512i)INDEX,        \
10232                                          (__mmask8)MASK, (int)SCALE)
10233
10234 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)                      \
10235   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),   \
10236                                           (void const *)ADDR,           \
10237                                           (__v16si)(__m512i)INDEX,      \
10238                                           (__mmask16)0xFFFF, (int)SCALE)
10239
10240 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)    \
10241   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,      \
10242                                           (void const *)ADDR,           \
10243                                           (__v16si)(__m512i)INDEX,      \
10244                                           (__mmask16)MASK, (int)SCALE)
10245
10246 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)                      \
10247   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),     \
10248                                          (void const *)ADDR,            \
10249                                          (__v8si)(__m256i)INDEX,        \
10250                                          (__mmask8)0xFF, (int)SCALE)
10251
10252 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)    \
10253   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,        \
10254                                          (void const *)ADDR,            \
10255                                          (__v8si)(__m256i)INDEX,        \
10256                                          (__mmask8)MASK, (int)SCALE)
10257
10258 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)                        \
10259   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
10260                                           (void const *)ADDR,             \
10261                                           (__v8di)(__m512i)INDEX,         \
10262                                           (__mmask8)0xFF, (int)SCALE)
10263
10264 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)    \
10265   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,       \
10266                                           (void const *)ADDR,           \
10267                                           (__v8di)(__m512i)INDEX,       \
10268                                           (__mmask8)MASK, (int)SCALE)
10269
10270 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)                      \
10271   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),     \
10272                                          (void const *)ADDR,            \
10273                                          (__v8di)(__m512i)INDEX,        \
10274                                          (__mmask8)0xFF, (int)SCALE)
10275
10276 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)    \
10277   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,        \
10278                                          (void const *)ADDR,            \
10279                                          (__v8di)(__m512i)INDEX,        \
10280                                          (__mmask8)MASK, (int)SCALE)
10281
10282 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)                    \
10283   __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF,       \
10284                                  (__v16si)(__m512i)INDEX,               \
10285                                  (__v16sf)(__m512)V1, (int)SCALE)
10286
10287 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
10288   __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK,         \
10289                                  (__v16si)(__m512i)INDEX,               \
10290                                  (__v16sf)(__m512)V1, (int)SCALE)
10291
10292 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)                    \
10293   __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF,           \
10294                                 (__v8si)(__m256i)INDEX,                 \
10295                                 (__v8df)(__m512d)V1, (int)SCALE)
10296
10297 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
10298   __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK,           \
10299                                 (__v8si)(__m256i)INDEX,                 \
10300                                 (__v8df)(__m512d)V1, (int)SCALE)
10301
10302 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)                    \
10303   __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF,          \
10304                                  (__v8di)(__m512i)INDEX,                \
10305                                  (__v8sf)(__m256)V1, (int)SCALE)
10306
10307 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
10308   __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK,         \
10309                                  (__v8di)(__m512i)INDEX,                \
10310                                  (__v8sf)(__m256)V1, (int)SCALE)
10311
10312 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)                    \
10313   __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF,           \
10314                                 (__v8di)(__m512i)INDEX,                 \
10315                                 (__v8df)(__m512d)V1, (int)SCALE)
10316
10317 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
10318   __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK,           \
10319                                 (__v8di)(__m512i)INDEX,                 \
10320                                 (__v8df)(__m512d)V1, (int)SCALE)
10321
10322 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
10323   __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF,       \
10324                                  (__v16si)(__m512i)INDEX,               \
10325                                  (__v16si)(__m512i)V1, (int)SCALE)
10326
10327 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
10328   __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK,         \
10329                                  (__v16si)(__m512i)INDEX,               \
10330                                  (__v16si)(__m512i)V1, (int)SCALE)
10331
10332 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
10333   __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF,           \
10334                                 (__v8si)(__m256i)INDEX,                 \
10335                                 (__v8di)(__m512i)V1, (int)SCALE)
10336
10337 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
10338   __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK,           \
10339                                 (__v8si)(__m256i)INDEX,                 \
10340                                 (__v8di)(__m512i)V1, (int)SCALE)
10341
10342 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
10343   __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF,          \
10344                                  (__v8di)(__m512i)INDEX,                \
10345                                  (__v8si)(__m256i)V1, (int)SCALE)
10346
10347 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
10348   __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK,          \
10349                                  (__v8di)(__m512i)INDEX,                \
10350                                  (__v8si)(__m256i)V1, (int)SCALE)
10351
10352 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
10353   __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF,           \
10354                                 (__v8di)(__m512i)INDEX,                 \
10355                                 (__v8di)(__m512i)V1, (int)SCALE)
10356
10357 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
10358   __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK,           \
10359                                 (__v8di)(__m512i)INDEX,                 \
10360                                 (__v8di)(__m512i)V1, (int)SCALE)
10361 #endif
10362
10363 extern __inline __m512d
10364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10365 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10366 {
10367   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10368                                                       (__v8df) __W,
10369                                                       (__mmask8) __U);
10370 }
10371
10372 extern __inline __m512d
10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10375 {
10376   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10377                                                       (__v8df)
10378                                                       _mm512_setzero_pd (),
10379                                                       (__mmask8) __U);
10380 }
10381
10382 extern __inline void
10383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10384 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10385 {
10386   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10387                                           (__mmask8) __U);
10388 }
10389
10390 extern __inline __m512
10391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10392 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10393 {
10394   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10395                                                      (__v16sf) __W,
10396                                                      (__mmask16) __U);
10397 }
10398
10399 extern __inline __m512
10400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10402 {
10403   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10404                                                      (__v16sf)
10405                                                      _mm512_setzero_ps (),
10406                                                      (__mmask16) __U);
10407 }
10408
10409 extern __inline void
10410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10411 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10412 {
10413   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10414                                           (__mmask16) __U);
10415 }
10416
10417 extern __inline __m512i
10418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10419 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10420 {
10421   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10422                                                       (__v8di) __W,
10423                                                       (__mmask8) __U);
10424 }
10425
10426 extern __inline __m512i
10427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10428 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10429 {
10430   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10431                                                       (__v8di)
10432                                                       _mm512_setzero_si512 (),
10433                                                       (__mmask8) __U);
10434 }
10435
10436 extern __inline void
10437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10438 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10439 {
10440   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10441                                           (__mmask8) __U);
10442 }
10443
10444 extern __inline __m512i
10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10446 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10447 {
10448   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10449                                                       (__v16si) __W,
10450                                                       (__mmask16) __U);
10451 }
10452
10453 extern __inline __m512i
10454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10455 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10456 {
10457   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10458                                                       (__v16si)
10459                                                       _mm512_setzero_si512 (),
10460                                                       (__mmask16) __U);
10461 }
10462
10463 extern __inline void
10464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10465 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10466 {
10467   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10468                                           (__mmask16) __U);
10469 }
10470
10471 extern __inline __m512d
10472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10473 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10474 {
10475   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10476                                                     (__v8df) __W,
10477                                                     (__mmask8) __U);
10478 }
10479
10480 extern __inline __m512d
10481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10482 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10483 {
10484   return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10485                                                      (__v8df)
10486                                                      _mm512_setzero_pd (),
10487                                                      (__mmask8) __U);
10488 }
10489
10490 extern __inline __m512d
10491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10492 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10493 {
10494   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10495                                                         (__v8df) __W,
10496                                                         (__mmask8) __U);
10497 }
10498
10499 extern __inline __m512d
10500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10502 {
10503   return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10504                                                          (__v8df)
10505                                                          _mm512_setzero_pd (),
10506                                                          (__mmask8) __U);
10507 }
10508
10509 extern __inline __m512
10510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10511 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10512 {
10513   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10514                                                    (__v16sf) __W,
10515                                                    (__mmask16) __U);
10516 }
10517
10518 extern __inline __m512
10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10521 {
10522   return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10523                                                     (__v16sf)
10524                                                     _mm512_setzero_ps (),
10525                                                     (__mmask16) __U);
10526 }
10527
10528 extern __inline __m512
10529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10530 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10531 {
10532   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10533                                                        (__v16sf) __W,
10534                                                        (__mmask16) __U);
10535 }
10536
10537 extern __inline __m512
10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10540 {
10541   return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10542                                                         (__v16sf)
10543                                                         _mm512_setzero_ps (),
10544                                                         (__mmask16) __U);
10545 }
10546
10547 extern __inline __m512i
10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10550 {
10551   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10552                                                     (__v8di) __W,
10553                                                     (__mmask8) __U);
10554 }
10555
10556 extern __inline __m512i
10557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10558 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10559 {
10560   return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10561                                                      (__v8di)
10562                                                      _mm512_setzero_si512 (),
10563                                                      (__mmask8) __U);
10564 }
10565
10566 extern __inline __m512i
10567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10568 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10569 {
10570   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10571                                                         (__v8di) __W,
10572                                                         (__mmask8) __U);
10573 }
10574
10575 extern __inline __m512i
10576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10577 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10578 {
10579   return (__m512i)
10580          __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10581                                                (__v8di)
10582                                                _mm512_setzero_si512 (),
10583                                                (__mmask8) __U);
10584 }
10585
10586 extern __inline __m512i
10587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10588 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10589 {
10590   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10591                                                     (__v16si) __W,
10592                                                     (__mmask16) __U);
10593 }
10594
10595 extern __inline __m512i
10596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10597 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10598 {
10599   return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10600                                                      (__v16si)
10601                                                      _mm512_setzero_si512 (),
10602                                                      (__mmask16) __U);
10603 }
10604
10605 extern __inline __m512i
10606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10607 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10608 {
10609   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10610                                                         (__v16si) __W,
10611                                                         (__mmask16) __U);
10612 }
10613
10614 extern __inline __m512i
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10617 {
10618   return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10619                                                          (__v16si)
10620                                                          _mm512_setzero_si512
10621                                                          (), (__mmask16) __U);
10622 }
10623
10624 /* Mask arithmetic operations */
10625 #define _kand_mask16 _mm512_kand
10626 #define _kandn_mask16 _mm512_kandn
10627 #define _knot_mask16 _mm512_knot
10628 #define _kor_mask16 _mm512_kor
10629 #define _kxnor_mask16 _mm512_kxnor
10630 #define _kxor_mask16 _mm512_kxor
10631
10632 extern __inline unsigned char
10633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10634 _kortest_mask16_u8  (__mmask16 __A,  __mmask16 __B, unsigned char *__CF)
10635 {
10636   *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10637   return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10638 }
10639
10640 extern __inline unsigned char
10641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10642 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10643 {
10644   return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10645                                                     (__mmask16) __B);
10646 }
10647
10648 extern __inline unsigned char
10649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10650 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10651 {
10652   return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10653                                                     (__mmask16) __B);
10654 }
10655
10656 extern __inline unsigned int
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _cvtmask16_u32 (__mmask16 __A)
10659 {
10660   return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10661 }
10662
10663 extern __inline __mmask16
10664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10665 _cvtu32_mask16 (unsigned int __A)
10666 {
10667   return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10668 }
10669
10670 extern __inline __mmask16
10671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10672 _load_mask16 (__mmask16 *__A)
10673 {
10674   return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10675 }
10676
10677 extern __inline void
10678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10680 {
10681   *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10682 }
10683
10684 extern __inline __mmask16
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm512_kand (__mmask16 __A, __mmask16 __B)
10687 {
10688   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10689 }
10690
10691 extern __inline __mmask16
10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10694 {
10695   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10696                                              (__mmask16) __B);
10697 }
10698
10699 extern __inline __mmask16
10700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701 _mm512_kor (__mmask16 __A, __mmask16 __B)
10702 {
10703   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10704 }
10705
10706 extern __inline int
10707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10708 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10709 {
10710   return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10711                                                 (__mmask16) __B);
10712 }
10713
10714 extern __inline int
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10717 {
10718   return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10719                                                 (__mmask16) __B);
10720 }
10721
10722 extern __inline __mmask16
10723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10724 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10725 {
10726   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10727 }
10728
10729 extern __inline __mmask16
10730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10732 {
10733   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10734 }
10735
10736 extern __inline __mmask16
10737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10738 _mm512_knot (__mmask16 __A)
10739 {
10740   return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10741 }
10742
10743 extern __inline __mmask16
10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10745 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10746 {
10747   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10748 }
10749
10750 extern __inline __mmask16
10751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10752 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10753 {
10754   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10755 }
10756
10757 #ifdef __OPTIMIZE__
10758 extern __inline __m512i
10759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10760 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10761                           const int __imm)
10762 {
10763   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10764                                                     (__v4si) __D,
10765                                                     __imm,
10766                                                     (__v16si)
10767                                                     _mm512_setzero_si512 (),
10768                                                     __B);
10769 }
10770
10771 extern __inline __m512
10772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10773 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10774                           const int __imm)
10775 {
10776   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10777                                                    (__v4sf) __D,
10778                                                    __imm,
10779                                                    (__v16sf)
10780                                                    _mm512_setzero_ps (), __B);
10781 }
10782
10783 extern __inline __m512i
10784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10785 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10786                          __m128i __D, const int __imm)
10787 {
10788   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10789                                                     (__v4si) __D,
10790                                                     __imm,
10791                                                     (__v16si) __A,
10792                                                     __B);
10793 }
10794
10795 extern __inline __m512
10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10798                          __m128 __D, const int __imm)
10799 {
10800   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10801                                                    (__v4sf) __D,
10802                                                    __imm,
10803                                                    (__v16sf) __A, __B);
10804 }
10805 #else
10806 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
10807   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10808     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
10809     (__mmask8)(A)))
10810
10811 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
10812   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10813     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
10814     (__mmask8)(A)))
10815
10816 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
10817   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10818     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
10819                                              (__mmask8)(B)))
10820
10821 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
10822   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10823     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
10824                                               (__mmask8)(B)))
10825 #endif
10826
10827 extern __inline __m512i
10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829 _mm512_max_epi64 (__m512i __A, __m512i __B)
10830 {
10831   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10832                                                   (__v8di) __B,
10833                                                   (__v8di)
10834                                                   _mm512_undefined_epi32 (),
10835                                                   (__mmask8) -1);
10836 }
10837
10838 extern __inline __m512i
10839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10840 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10841 {
10842   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10843                                                   (__v8di) __B,
10844                                                   (__v8di)
10845                                                   _mm512_setzero_si512 (),
10846                                                   __M);
10847 }
10848
10849 extern __inline __m512i
10850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10851 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10852 {
10853   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10854                                                   (__v8di) __B,
10855                                                   (__v8di) __W, __M);
10856 }
10857
10858 extern __inline __m512i
10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10860 _mm512_min_epi64 (__m512i __A, __m512i __B)
10861 {
10862   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10863                                                   (__v8di) __B,
10864                                                   (__v8di)
10865                                                   _mm512_undefined_epi32 (),
10866                                                   (__mmask8) -1);
10867 }
10868
10869 extern __inline __m512i
10870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10871 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10872 {
10873   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10874                                                   (__v8di) __B,
10875                                                   (__v8di) __W, __M);
10876 }
10877
10878 extern __inline __m512i
10879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10880 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10881 {
10882   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10883                                                   (__v8di) __B,
10884                                                   (__v8di)
10885                                                   _mm512_setzero_si512 (),
10886                                                   __M);
10887 }
10888
10889 extern __inline __m512i
10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891 _mm512_max_epu64 (__m512i __A, __m512i __B)
10892 {
10893   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10894                                                   (__v8di) __B,
10895                                                   (__v8di)
10896                                                   _mm512_undefined_epi32 (),
10897                                                   (__mmask8) -1);
10898 }
10899
10900 extern __inline __m512i
10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10903 {
10904   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10905                                                   (__v8di) __B,
10906                                                   (__v8di)
10907                                                   _mm512_setzero_si512 (),
10908                                                   __M);
10909 }
10910
10911 extern __inline __m512i
10912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10914 {
10915   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10916                                                   (__v8di) __B,
10917                                                   (__v8di) __W, __M);
10918 }
10919
10920 extern __inline __m512i
10921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10922 _mm512_min_epu64 (__m512i __A, __m512i __B)
10923 {
10924   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10925                                                   (__v8di) __B,
10926                                                   (__v8di)
10927                                                   _mm512_undefined_epi32 (),
10928                                                   (__mmask8) -1);
10929 }
10930
10931 extern __inline __m512i
10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10934 {
10935   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10936                                                   (__v8di) __B,
10937                                                   (__v8di) __W, __M);
10938 }
10939
10940 extern __inline __m512i
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10943 {
10944   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10945                                                   (__v8di) __B,
10946                                                   (__v8di)
10947                                                   _mm512_setzero_si512 (),
10948                                                   __M);
10949 }
10950
10951 extern __inline __m512i
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm512_max_epi32 (__m512i __A, __m512i __B)
10954 {
10955   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10956                                                   (__v16si) __B,
10957                                                   (__v16si)
10958                                                   _mm512_undefined_epi32 (),
10959                                                   (__mmask16) -1);
10960 }
10961
10962 extern __inline __m512i
10963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10964 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10965 {
10966   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10967                                                   (__v16si) __B,
10968                                                   (__v16si)
10969                                                   _mm512_setzero_si512 (),
10970                                                   __M);
10971 }
10972
10973 extern __inline __m512i
10974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10975 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10976 {
10977   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10978                                                   (__v16si) __B,
10979                                                   (__v16si) __W, __M);
10980 }
10981
10982 extern __inline __m512i
10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984 _mm512_min_epi32 (__m512i __A, __m512i __B)
10985 {
10986   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10987                                                   (__v16si) __B,
10988                                                   (__v16si)
10989                                                   _mm512_undefined_epi32 (),
10990                                                   (__mmask16) -1);
10991 }
10992
10993 extern __inline __m512i
10994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10995 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10996 {
10997   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10998                                                   (__v16si) __B,
10999                                                   (__v16si)
11000                                                   _mm512_setzero_si512 (),
11001                                                   __M);
11002 }
11003
11004 extern __inline __m512i
11005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11006 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11007 {
11008   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11009                                                   (__v16si) __B,
11010                                                   (__v16si) __W, __M);
11011 }
11012
11013 extern __inline __m512i
11014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11015 _mm512_max_epu32 (__m512i __A, __m512i __B)
11016 {
11017   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11018                                                   (__v16si) __B,
11019                                                   (__v16si)
11020                                                   _mm512_undefined_epi32 (),
11021                                                   (__mmask16) -1);
11022 }
11023
11024 extern __inline __m512i
11025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11026 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11027 {
11028   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11029                                                   (__v16si) __B,
11030                                                   (__v16si)
11031                                                   _mm512_setzero_si512 (),
11032                                                   __M);
11033 }
11034
11035 extern __inline __m512i
11036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11037 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11038 {
11039   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11040                                                   (__v16si) __B,
11041                                                   (__v16si) __W, __M);
11042 }
11043
11044 extern __inline __m512i
11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046 _mm512_min_epu32 (__m512i __A, __m512i __B)
11047 {
11048   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11049                                                   (__v16si) __B,
11050                                                   (__v16si)
11051                                                   _mm512_undefined_epi32 (),
11052                                                   (__mmask16) -1);
11053 }
11054
11055 extern __inline __m512i
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11058 {
11059   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11060                                                   (__v16si) __B,
11061                                                   (__v16si)
11062                                                   _mm512_setzero_si512 (),
11063                                                   __M);
11064 }
11065
11066 extern __inline __m512i
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11069 {
11070   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11071                                                   (__v16si) __B,
11072                                                   (__v16si) __W, __M);
11073 }
11074
11075 extern __inline __m512
11076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
11078 {
11079   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11080                                                    (__v16sf) __B,
11081                                                    (__v16sf)
11082                                                    _mm512_undefined_ps (),
11083                                                    (__mmask16) -1);
11084 }
11085
11086 extern __inline __m512
11087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11088 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11089 {
11090   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11091                                                    (__v16sf) __B,
11092                                                    (__v16sf) __W,
11093                                                    (__mmask16) __U);
11094 }
11095
11096 extern __inline __m512
11097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11098 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11099 {
11100   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11101                                                    (__v16sf) __B,
11102                                                    (__v16sf)
11103                                                    _mm512_setzero_ps (),
11104                                                    (__mmask16) __U);
11105 }
11106
11107 #ifdef __OPTIMIZE__
11108 extern __inline __m128d
11109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11110 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11111 {
11112   return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11113                                                (__v2df) __B,
11114                                                __R);
11115 }
11116
11117 extern __inline __m128d
11118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11119 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11120                           __m128d __B, const int __R)
11121 {
11122   return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11123                                                  (__v2df) __B,
11124                                                  (__v2df) __W,
11125                                                  (__mmask8) __U, __R);
11126 }
11127
11128 extern __inline __m128d
11129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11130 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11131                            const int __R)
11132 {
11133   return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11134                                                  (__v2df) __B,
11135                                                  (__v2df)
11136                                                  _mm_setzero_pd (),
11137                                                  (__mmask8) __U, __R);
11138 }
11139
11140 extern __inline __m128
11141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11142 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11143 {
11144   return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11145                                               (__v4sf) __B,
11146                                               __R);
11147 }
11148
11149 extern __inline __m128
11150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11151 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11152                           __m128 __B, const int __R)
11153 {
11154   return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11155                                                  (__v4sf) __B,
11156                                                  (__v4sf) __W,
11157                                                  (__mmask8) __U, __R);
11158 }
11159
11160 extern __inline __m128
11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11162 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11163                            const int __R)
11164 {
11165   return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11166                                                  (__v4sf) __B,
11167                                                  (__v4sf)
11168                                                  _mm_setzero_ps (),
11169                                                  (__mmask8) __U, __R);
11170 }
11171
11172 extern __inline __m128d
11173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11174 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11175 {
11176   return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11177                                                (__v2df) __B,
11178                                                __R);
11179 }
11180
11181 extern __inline __m128d
11182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11183 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11184                           __m128d __B, const int __R)
11185 {
11186   return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11187                                                  (__v2df) __B,
11188                                                  (__v2df) __W,
11189                                                  (__mmask8) __U, __R);
11190 }
11191
11192 extern __inline __m128d
11193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11194 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11195                            const int __R)
11196 {
11197   return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11198                                                  (__v2df) __B,
11199                                                  (__v2df)
11200                                                  _mm_setzero_pd (),
11201                                                  (__mmask8) __U, __R);
11202 }
11203
11204 extern __inline __m128
11205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11206 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11207 {
11208   return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11209                                               (__v4sf) __B,
11210                                               __R);
11211 }
11212
11213 extern __inline __m128
11214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11215 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11216                           __m128 __B, const int __R)
11217 {
11218   return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11219                                                  (__v4sf) __B,
11220                                                  (__v4sf) __W,
11221                                                  (__mmask8) __U, __R);
11222 }
11223
11224 extern __inline __m128
11225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11226 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11227                            const int __R)
11228 {
11229   return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11230                                                  (__v4sf) __B,
11231                                                  (__v4sf)
11232                                                  _mm_setzero_ps (),
11233                                                  (__mmask8) __U, __R);
11234 }
11235
11236 #else
11237 #define _mm_max_round_sd(A, B, C)            \
11238     (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11239
11240 #define _mm_mask_max_round_sd(W, U, A, B, C) \
11241     (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11242
11243 #define _mm_maskz_max_round_sd(U, A, B, C)   \
11244     (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11245
11246 #define _mm_max_round_ss(A, B, C)            \
11247     (__m128)__builtin_ia32_maxss_round(A, B, C)
11248
11249 #define _mm_mask_max_round_ss(W, U, A, B, C) \
11250     (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11251
11252 #define _mm_maskz_max_round_ss(U, A, B, C)   \
11253     (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11254
11255 #define _mm_min_round_sd(A, B, C)            \
11256     (__m128d)__builtin_ia32_minsd_round(A, B, C)
11257
11258 #define _mm_mask_min_round_sd(W, U, A, B, C) \
11259     (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11260
11261 #define _mm_maskz_min_round_sd(U, A, B, C)   \
11262     (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11263
11264 #define _mm_min_round_ss(A, B, C)            \
11265     (__m128)__builtin_ia32_minss_round(A, B, C)
11266
11267 #define _mm_mask_min_round_ss(W, U, A, B, C) \
11268     (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11269
11270 #define _mm_maskz_min_round_ss(U, A, B, C)   \
11271     (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11272
11273 #endif
11274
11275 extern __inline __m512d
11276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11277 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11278 {
11279   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11280                                                      (__v8df) __W,
11281                                                      (__mmask8) __U);
11282 }
11283
11284 extern __inline __m512
11285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11286 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11287 {
11288   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11289                                                     (__v16sf) __W,
11290                                                     (__mmask16) __U);
11291 }
11292
11293 extern __inline __m512i
11294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11295 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11296 {
11297   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11298                                                     (__v8di) __W,
11299                                                     (__mmask8) __U);
11300 }
11301
11302 extern __inline __m512i
11303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11304 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11305 {
11306   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11307                                                     (__v16si) __W,
11308                                                     (__mmask16) __U);
11309 }
11310
11311 #ifdef __OPTIMIZE__
11312 extern __inline __m128d
11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11315 {
11316   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11317                                                    (__v2df) __A,
11318                                                    (__v2df) __B,
11319                                                    __R);
11320 }
11321
11322 extern __inline __m128
11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11325 {
11326   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11327                                                   (__v4sf) __A,
11328                                                   (__v4sf) __B,
11329                                                   __R);
11330 }
11331
11332 extern __inline __m128d
11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11335 {
11336   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11337                                                    (__v2df) __A,
11338                                                    -(__v2df) __B,
11339                                                    __R);
11340 }
11341
11342 extern __inline __m128
11343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11344 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11345 {
11346   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11347                                                   (__v4sf) __A,
11348                                                   -(__v4sf) __B,
11349                                                   __R);
11350 }
11351
11352 extern __inline __m128d
11353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11354 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11355 {
11356   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11357                                                    -(__v2df) __A,
11358                                                    (__v2df) __B,
11359                                                    __R);
11360 }
11361
11362 extern __inline __m128
11363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11364 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11365 {
11366   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11367                                                   -(__v4sf) __A,
11368                                                   (__v4sf) __B,
11369                                                   __R);
11370 }
11371
11372 extern __inline __m128d
11373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11374 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11375 {
11376   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11377                                                    -(__v2df) __A,
11378                                                    -(__v2df) __B,
11379                                                    __R);
11380 }
11381
11382 extern __inline __m128
11383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11384 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11385 {
11386   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11387                                                   -(__v4sf) __A,
11388                                                   -(__v4sf) __B,
11389                                                   __R);
11390 }
11391 #else
11392 #define _mm_fmadd_round_sd(A, B, C, R)            \
11393     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11394
11395 #define _mm_fmadd_round_ss(A, B, C, R)            \
11396     (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11397
11398 #define _mm_fmsub_round_sd(A, B, C, R)            \
11399     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11400
11401 #define _mm_fmsub_round_ss(A, B, C, R)            \
11402     (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11403
11404 #define _mm_fnmadd_round_sd(A, B, C, R)            \
11405     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11406
11407 #define _mm_fnmadd_round_ss(A, B, C, R)            \
11408    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11409
11410 #define _mm_fnmsub_round_sd(A, B, C, R)            \
11411     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11412
11413 #define _mm_fnmsub_round_ss(A, B, C, R)            \
11414     (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11415 #endif
11416
11417 #ifdef __OPTIMIZE__
11418 extern __inline int
11419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11420 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11421 {
11422   return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11423 }
11424
11425 extern __inline int
11426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11428 {
11429   return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11430 }
11431 #else
11432 #define _mm_comi_round_ss(A, B, C, D)\
11433 __builtin_ia32_vcomiss(A, B, C, D)
11434 #define _mm_comi_round_sd(A, B, C, D)\
11435 __builtin_ia32_vcomisd(A, B, C, D)
11436 #endif
11437
11438 extern __inline __m512d
11439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11440 _mm512_sqrt_pd (__m512d __A)
11441 {
11442   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11443                                                   (__v8df)
11444                                                   _mm512_undefined_pd (),
11445                                                   (__mmask8) -1,
11446                                                   _MM_FROUND_CUR_DIRECTION);
11447 }
11448
11449 extern __inline __m512d
11450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11451 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11452 {
11453   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11454                                                   (__v8df) __W,
11455                                                   (__mmask8) __U,
11456                                                   _MM_FROUND_CUR_DIRECTION);
11457 }
11458
11459 extern __inline __m512d
11460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11461 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11462 {
11463   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11464                                                   (__v8df)
11465                                                   _mm512_setzero_pd (),
11466                                                   (__mmask8) __U,
11467                                                   _MM_FROUND_CUR_DIRECTION);
11468 }
11469
11470 extern __inline __m512
11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472 _mm512_sqrt_ps (__m512 __A)
11473 {
11474   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11475                                                  (__v16sf)
11476                                                  _mm512_undefined_ps (),
11477                                                  (__mmask16) -1,
11478                                                  _MM_FROUND_CUR_DIRECTION);
11479 }
11480
11481 extern __inline __m512
11482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11483 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11484 {
11485   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11486                                                  (__v16sf) __W,
11487                                                  (__mmask16) __U,
11488                                                  _MM_FROUND_CUR_DIRECTION);
11489 }
11490
11491 extern __inline __m512
11492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11493 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11494 {
11495   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11496                                                  (__v16sf)
11497                                                  _mm512_setzero_ps (),
11498                                                  (__mmask16) __U,
11499                                                  _MM_FROUND_CUR_DIRECTION);
11500 }
11501
11502 extern __inline __m512d
11503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504 _mm512_add_pd (__m512d __A, __m512d __B)
11505 {
11506   return (__m512d) ((__v8df)__A + (__v8df)__B);
11507 }
11508
11509 extern __inline __m512d
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11512 {
11513   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11514                                                  (__v8df) __B,
11515                                                  (__v8df) __W,
11516                                                  (__mmask8) __U,
11517                                                  _MM_FROUND_CUR_DIRECTION);
11518 }
11519
11520 extern __inline __m512d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11523 {
11524   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11525                                                  (__v8df) __B,
11526                                                  (__v8df)
11527                                                  _mm512_setzero_pd (),
11528                                                  (__mmask8) __U,
11529                                                  _MM_FROUND_CUR_DIRECTION);
11530 }
11531
11532 extern __inline __m512
11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534 _mm512_add_ps (__m512 __A, __m512 __B)
11535 {
11536   return (__m512) ((__v16sf)__A + (__v16sf)__B);
11537 }
11538
11539 extern __inline __m512
11540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11541 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11542 {
11543   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11544                                                 (__v16sf) __B,
11545                                                 (__v16sf) __W,
11546                                                 (__mmask16) __U,
11547                                                 _MM_FROUND_CUR_DIRECTION);
11548 }
11549
11550 extern __inline __m512
11551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11553 {
11554   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11555                                                 (__v16sf) __B,
11556                                                 (__v16sf)
11557                                                 _mm512_setzero_ps (),
11558                                                 (__mmask16) __U,
11559                                                 _MM_FROUND_CUR_DIRECTION);
11560 }
11561
11562 extern __inline __m128d
11563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11565 {
11566   return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11567                                                 (__v2df) __B,
11568                                                 (__v2df) __W,
11569                                                 (__mmask8) __U,
11570                                                 _MM_FROUND_CUR_DIRECTION);
11571 }
11572
11573 extern __inline __m128d
11574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11575 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11576 {
11577   return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11578                                                 (__v2df) __B,
11579                                                 (__v2df)
11580                                                 _mm_setzero_pd (),
11581                                                 (__mmask8) __U,
11582                                                 _MM_FROUND_CUR_DIRECTION);
11583 }
11584
11585 extern __inline __m128
11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11588 {
11589   return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11590                                                 (__v4sf) __B,
11591                                                 (__v4sf) __W,
11592                                                 (__mmask8) __U,
11593                                                 _MM_FROUND_CUR_DIRECTION);
11594 }
11595
11596 extern __inline __m128
11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11599 {
11600   return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11601                                                 (__v4sf) __B,
11602                                                 (__v4sf)
11603                                                 _mm_setzero_ps (),
11604                                                 (__mmask8) __U,
11605                                                 _MM_FROUND_CUR_DIRECTION);
11606 }
11607
11608 extern __inline __m512d
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm512_sub_pd (__m512d __A, __m512d __B)
11611 {
11612   return (__m512d) ((__v8df)__A - (__v8df)__B);
11613 }
11614
11615 extern __inline __m512d
11616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11617 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11618 {
11619   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11620                                                  (__v8df) __B,
11621                                                  (__v8df) __W,
11622                                                  (__mmask8) __U,
11623                                                  _MM_FROUND_CUR_DIRECTION);
11624 }
11625
11626 extern __inline __m512d
11627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11628 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11629 {
11630   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11631                                                  (__v8df) __B,
11632                                                  (__v8df)
11633                                                  _mm512_setzero_pd (),
11634                                                  (__mmask8) __U,
11635                                                  _MM_FROUND_CUR_DIRECTION);
11636 }
11637
11638 extern __inline __m512
11639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11640 _mm512_sub_ps (__m512 __A, __m512 __B)
11641 {
11642   return (__m512) ((__v16sf)__A - (__v16sf)__B);
11643 }
11644
11645 extern __inline __m512
11646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11647 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11648 {
11649   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11650                                                 (__v16sf) __B,
11651                                                 (__v16sf) __W,
11652                                                 (__mmask16) __U,
11653                                                 _MM_FROUND_CUR_DIRECTION);
11654 }
11655
11656 extern __inline __m512
11657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11658 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11659 {
11660   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11661                                                 (__v16sf) __B,
11662                                                 (__v16sf)
11663                                                 _mm512_setzero_ps (),
11664                                                 (__mmask16) __U,
11665                                                 _MM_FROUND_CUR_DIRECTION);
11666 }
11667
11668 extern __inline __m128d
11669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11670 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11671 {
11672   return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11673                                                 (__v2df) __B,
11674                                                 (__v2df) __W,
11675                                                 (__mmask8) __U,
11676                                                 _MM_FROUND_CUR_DIRECTION);
11677 }
11678
11679 extern __inline __m128d
11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11682 {
11683   return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11684                                                 (__v2df) __B,
11685                                                 (__v2df)
11686                                                 _mm_setzero_pd (),
11687                                                 (__mmask8) __U,
11688                                                 _MM_FROUND_CUR_DIRECTION);
11689 }
11690
11691 extern __inline __m128
11692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11693 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11694 {
11695   return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11696                                                 (__v4sf) __B,
11697                                                 (__v4sf) __W,
11698                                                 (__mmask8) __U,
11699                                                 _MM_FROUND_CUR_DIRECTION);
11700 }
11701
11702 extern __inline __m128
11703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11704 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11705 {
11706   return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11707                                                 (__v4sf) __B,
11708                                                 (__v4sf)
11709                                                 _mm_setzero_ps (),
11710                                                 (__mmask8) __U,
11711                                                 _MM_FROUND_CUR_DIRECTION);
11712 }
11713
11714 extern __inline __m512d
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm512_mul_pd (__m512d __A, __m512d __B)
11717 {
11718   return (__m512d) ((__v8df)__A * (__v8df)__B);
11719 }
11720
11721 extern __inline __m512d
11722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11723 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11724 {
11725   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11726                                                  (__v8df) __B,
11727                                                  (__v8df) __W,
11728                                                  (__mmask8) __U,
11729                                                  _MM_FROUND_CUR_DIRECTION);
11730 }
11731
11732 extern __inline __m512d
11733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11734 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11735 {
11736   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11737                                                  (__v8df) __B,
11738                                                  (__v8df)
11739                                                  _mm512_setzero_pd (),
11740                                                  (__mmask8) __U,
11741                                                  _MM_FROUND_CUR_DIRECTION);
11742 }
11743
11744 extern __inline __m512
11745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11746 _mm512_mul_ps (__m512 __A, __m512 __B)
11747 {
11748   return (__m512) ((__v16sf)__A * (__v16sf)__B);
11749 }
11750
11751 extern __inline __m512
11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11754 {
11755   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11756                                                 (__v16sf) __B,
11757                                                 (__v16sf) __W,
11758                                                 (__mmask16) __U,
11759                                                 _MM_FROUND_CUR_DIRECTION);
11760 }
11761
11762 extern __inline __m512
11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11765 {
11766   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11767                                                 (__v16sf) __B,
11768                                                 (__v16sf)
11769                                                 _mm512_setzero_ps (),
11770                                                 (__mmask16) __U,
11771                                                 _MM_FROUND_CUR_DIRECTION);
11772 }
11773
11774 extern __inline __m128d
11775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11777                           __m128d __B)
11778 {
11779   return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11780                                                  (__v2df) __B,
11781                                                  (__v2df) __W,
11782                                                  (__mmask8) __U,
11783                                                   _MM_FROUND_CUR_DIRECTION);
11784 }
11785
11786 extern __inline __m128d
11787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11788 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11789 {
11790   return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11791                                                  (__v2df) __B,
11792                                                  (__v2df)
11793                                                  _mm_setzero_pd (),
11794                                                  (__mmask8) __U,
11795                                                   _MM_FROUND_CUR_DIRECTION);
11796 }
11797
11798 extern __inline __m128
11799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11801                           __m128 __B)
11802 {
11803   return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11804                                                  (__v4sf) __B,
11805                                                  (__v4sf) __W,
11806                                                  (__mmask8) __U,
11807                                                   _MM_FROUND_CUR_DIRECTION);
11808 }
11809
11810 extern __inline __m128
11811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11812 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11813 {
11814   return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11815                                                  (__v4sf) __B,
11816                                                  (__v4sf)
11817                                                  _mm_setzero_ps (),
11818                                                  (__mmask8) __U,
11819                                                   _MM_FROUND_CUR_DIRECTION);
11820 }
11821
11822 extern __inline __m512d
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm512_div_pd (__m512d __M, __m512d __V)
11825 {
11826   return (__m512d) ((__v8df)__M / (__v8df)__V);
11827 }
11828
11829 extern __inline __m512d
11830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11831 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11832 {
11833   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11834                                                  (__v8df) __V,
11835                                                  (__v8df) __W,
11836                                                  (__mmask8) __U,
11837                                                  _MM_FROUND_CUR_DIRECTION);
11838 }
11839
11840 extern __inline __m512d
11841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11842 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11843 {
11844   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11845                                                  (__v8df) __V,
11846                                                  (__v8df)
11847                                                  _mm512_setzero_pd (),
11848                                                  (__mmask8) __U,
11849                                                  _MM_FROUND_CUR_DIRECTION);
11850 }
11851
11852 extern __inline __m512
11853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11854 _mm512_div_ps (__m512 __A, __m512 __B)
11855 {
11856   return (__m512) ((__v16sf)__A / (__v16sf)__B);
11857 }
11858
11859 extern __inline __m512
11860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11861 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11862 {
11863   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11864                                                 (__v16sf) __B,
11865                                                 (__v16sf) __W,
11866                                                 (__mmask16) __U,
11867                                                 _MM_FROUND_CUR_DIRECTION);
11868 }
11869
11870 extern __inline __m512
11871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11872 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11873 {
11874   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11875                                                 (__v16sf) __B,
11876                                                 (__v16sf)
11877                                                 _mm512_setzero_ps (),
11878                                                 (__mmask16) __U,
11879                                                 _MM_FROUND_CUR_DIRECTION);
11880 }
11881
11882 extern __inline __m128d
11883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11885                           __m128d __B)
11886 {
11887   return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11888                                                  (__v2df) __B,
11889                                                  (__v2df) __W,
11890                                                  (__mmask8) __U,
11891                                                   _MM_FROUND_CUR_DIRECTION);
11892 }
11893
11894 extern __inline __m128d
11895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11896 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11897 {
11898   return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11899                                                  (__v2df) __B,
11900                                                  (__v2df)
11901                                                  _mm_setzero_pd (),
11902                                                  (__mmask8) __U,
11903                                                   _MM_FROUND_CUR_DIRECTION);
11904 }
11905
11906 extern __inline __m128
11907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11909                           __m128 __B)
11910 {
11911   return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11912                                                  (__v4sf) __B,
11913                                                  (__v4sf) __W,
11914                                                  (__mmask8) __U,
11915                                                   _MM_FROUND_CUR_DIRECTION);
11916 }
11917
11918 extern __inline __m128
11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11921 {
11922   return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11923                                                  (__v4sf) __B,
11924                                                  (__v4sf)
11925                                                  _mm_setzero_ps (),
11926                                                  (__mmask8) __U,
11927                                                   _MM_FROUND_CUR_DIRECTION);
11928 }
11929
11930 extern __inline __m512d
11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932 _mm512_max_pd (__m512d __A, __m512d __B)
11933 {
11934   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11935                                                  (__v8df) __B,
11936                                                  (__v8df)
11937                                                  _mm512_undefined_pd (),
11938                                                  (__mmask8) -1,
11939                                                  _MM_FROUND_CUR_DIRECTION);
11940 }
11941
11942 extern __inline __m512d
11943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11944 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11945 {
11946   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11947                                                  (__v8df) __B,
11948                                                  (__v8df) __W,
11949                                                  (__mmask8) __U,
11950                                                  _MM_FROUND_CUR_DIRECTION);
11951 }
11952
11953 extern __inline __m512d
11954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11955 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11956 {
11957   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11958                                                  (__v8df) __B,
11959                                                  (__v8df)
11960                                                  _mm512_setzero_pd (),
11961                                                  (__mmask8) __U,
11962                                                  _MM_FROUND_CUR_DIRECTION);
11963 }
11964
11965 extern __inline __m512
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm512_max_ps (__m512 __A, __m512 __B)
11968 {
11969   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11970                                                 (__v16sf) __B,
11971                                                 (__v16sf)
11972                                                 _mm512_undefined_ps (),
11973                                                 (__mmask16) -1,
11974                                                 _MM_FROUND_CUR_DIRECTION);
11975 }
11976
11977 extern __inline __m512
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11980 {
11981   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11982                                                 (__v16sf) __B,
11983                                                 (__v16sf) __W,
11984                                                 (__mmask16) __U,
11985                                                 _MM_FROUND_CUR_DIRECTION);
11986 }
11987
11988 extern __inline __m512
11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11991 {
11992   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11993                                                 (__v16sf) __B,
11994                                                 (__v16sf)
11995                                                 _mm512_setzero_ps (),
11996                                                 (__mmask16) __U,
11997                                                 _MM_FROUND_CUR_DIRECTION);
11998 }
11999
12000 extern __inline __m128d
12001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12002 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12003 {
12004   return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12005                                                  (__v2df) __B,
12006                                                  (__v2df) __W,
12007                                                  (__mmask8) __U,
12008                                                  _MM_FROUND_CUR_DIRECTION);
12009 }
12010
12011 extern __inline __m128d
12012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12013 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
12014 {
12015   return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12016                                                  (__v2df) __B,
12017                                                  (__v2df)
12018                                                  _mm_setzero_pd (),
12019                                                  (__mmask8) __U,
12020                                                  _MM_FROUND_CUR_DIRECTION);
12021 }
12022
12023 extern __inline __m128
12024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12025 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12026 {
12027   return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12028                                                 (__v4sf) __B,
12029                                                 (__v4sf) __W,
12030                                                 (__mmask8) __U,
12031                                                 _MM_FROUND_CUR_DIRECTION);
12032 }
12033
12034 extern __inline __m128
12035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
12037 {
12038   return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12039                                                 (__v4sf) __B,
12040                                                 (__v4sf)
12041                                                 _mm_setzero_ps (),
12042                                                 (__mmask8) __U,
12043                                                 _MM_FROUND_CUR_DIRECTION);
12044 }
12045
12046 extern __inline __m512d
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048 _mm512_min_pd (__m512d __A, __m512d __B)
12049 {
12050   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12051                                                  (__v8df) __B,
12052                                                  (__v8df)
12053                                                  _mm512_undefined_pd (),
12054                                                  (__mmask8) -1,
12055                                                  _MM_FROUND_CUR_DIRECTION);
12056 }
12057
12058 extern __inline __m512d
12059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12061 {
12062   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12063                                                  (__v8df) __B,
12064                                                  (__v8df) __W,
12065                                                  (__mmask8) __U,
12066                                                  _MM_FROUND_CUR_DIRECTION);
12067 }
12068
12069 extern __inline __m512d
12070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12071 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12072 {
12073   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12074                                                  (__v8df) __B,
12075                                                  (__v8df)
12076                                                  _mm512_setzero_pd (),
12077                                                  (__mmask8) __U,
12078                                                  _MM_FROUND_CUR_DIRECTION);
12079 }
12080
12081 extern __inline __m512
12082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12083 _mm512_min_ps (__m512 __A, __m512 __B)
12084 {
12085   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12086                                                 (__v16sf) __B,
12087                                                 (__v16sf)
12088                                                 _mm512_undefined_ps (),
12089                                                 (__mmask16) -1,
12090                                                 _MM_FROUND_CUR_DIRECTION);
12091 }
12092
12093 extern __inline __m512
12094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12095 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12096 {
12097   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12098                                                 (__v16sf) __B,
12099                                                 (__v16sf) __W,
12100                                                 (__mmask16) __U,
12101                                                 _MM_FROUND_CUR_DIRECTION);
12102 }
12103
12104 extern __inline __m512
12105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12107 {
12108   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12109                                                 (__v16sf) __B,
12110                                                 (__v16sf)
12111                                                 _mm512_setzero_ps (),
12112                                                 (__mmask16) __U,
12113                                                 _MM_FROUND_CUR_DIRECTION);
12114 }
12115
12116 extern __inline __m128d
12117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12118 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12119 {
12120   return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12121                                                  (__v2df) __B,
12122                                                  (__v2df) __W,
12123                                                  (__mmask8) __U,
12124                                                  _MM_FROUND_CUR_DIRECTION);
12125 }
12126
12127 extern __inline __m128d
12128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12129 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
12130 {
12131   return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12132                                                  (__v2df) __B,
12133                                                  (__v2df)
12134                                                  _mm_setzero_pd (),
12135                                                  (__mmask8) __U,
12136                                                  _MM_FROUND_CUR_DIRECTION);
12137 }
12138
12139 extern __inline __m128
12140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12141 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12142 {
12143   return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12144                                                 (__v4sf) __B,
12145                                                 (__v4sf) __W,
12146                                                 (__mmask8) __U,
12147                                                 _MM_FROUND_CUR_DIRECTION);
12148 }
12149
12150 extern __inline __m128
12151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12152 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
12153 {
12154   return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12155                                                 (__v4sf) __B,
12156                                                 (__v4sf)
12157                                                 _mm_setzero_ps (),
12158                                                 (__mmask8) __U,
12159                                                 _MM_FROUND_CUR_DIRECTION);
12160 }
12161
12162 extern __inline __m512d
12163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12164 _mm512_scalef_pd (__m512d __A, __m512d __B)
12165 {
12166   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12167                                                     (__v8df) __B,
12168                                                     (__v8df)
12169                                                     _mm512_undefined_pd (),
12170                                                     (__mmask8) -1,
12171                                                     _MM_FROUND_CUR_DIRECTION);
12172 }
12173
12174 extern __inline __m512d
12175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12176 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12177 {
12178   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12179                                                     (__v8df) __B,
12180                                                     (__v8df) __W,
12181                                                     (__mmask8) __U,
12182                                                     _MM_FROUND_CUR_DIRECTION);
12183 }
12184
12185 extern __inline __m512d
12186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
12188 {
12189   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12190                                                     (__v8df) __B,
12191                                                     (__v8df)
12192                                                     _mm512_setzero_pd (),
12193                                                     (__mmask8) __U,
12194                                                     _MM_FROUND_CUR_DIRECTION);
12195 }
12196
12197 extern __inline __m512
12198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199 _mm512_scalef_ps (__m512 __A, __m512 __B)
12200 {
12201   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12202                                                    (__v16sf) __B,
12203                                                    (__v16sf)
12204                                                    _mm512_undefined_ps (),
12205                                                    (__mmask16) -1,
12206                                                    _MM_FROUND_CUR_DIRECTION);
12207 }
12208
12209 extern __inline __m512
12210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12211 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12212 {
12213   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12214                                                    (__v16sf) __B,
12215                                                    (__v16sf) __W,
12216                                                    (__mmask16) __U,
12217                                                    _MM_FROUND_CUR_DIRECTION);
12218 }
12219
12220 extern __inline __m512
12221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12222 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
12223 {
12224   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12225                                                    (__v16sf) __B,
12226                                                    (__v16sf)
12227                                                    _mm512_setzero_ps (),
12228                                                    (__mmask16) __U,
12229                                                    _MM_FROUND_CUR_DIRECTION);
12230 }
12231
12232 extern __inline __m128d
12233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12234 _mm_scalef_sd (__m128d __A, __m128d __B)
12235 {
12236   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
12237                                                     (__v2df) __B,
12238                                                     (__v2df)
12239                                                     _mm_setzero_pd (),
12240                                                     (__mmask8) -1,
12241                                                     _MM_FROUND_CUR_DIRECTION);
12242 }
12243
12244 extern __inline __m128
12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246 _mm_scalef_ss (__m128 __A, __m128 __B)
12247 {
12248   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
12249                                                    (__v4sf) __B,
12250                                                    (__v4sf)
12251                                                    _mm_setzero_ps (),
12252                                                    (__mmask8) -1,
12253                                                    _MM_FROUND_CUR_DIRECTION);
12254 }
12255
12256 extern __inline __m512d
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12259 {
12260   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12261                                                     (__v8df) __B,
12262                                                     (__v8df) __C,
12263                                                     (__mmask8) -1,
12264                                                     _MM_FROUND_CUR_DIRECTION);
12265 }
12266
12267 extern __inline __m512d
12268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12270 {
12271   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12272                                                     (__v8df) __B,
12273                                                     (__v8df) __C,
12274                                                     (__mmask8) __U,
12275                                                     _MM_FROUND_CUR_DIRECTION);
12276 }
12277
12278 extern __inline __m512d
12279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12281 {
12282   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12283                                                      (__v8df) __B,
12284                                                      (__v8df) __C,
12285                                                      (__mmask8) __U,
12286                                                      _MM_FROUND_CUR_DIRECTION);
12287 }
12288
12289 extern __inline __m512d
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12292 {
12293   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12294                                                      (__v8df) __B,
12295                                                      (__v8df) __C,
12296                                                      (__mmask8) __U,
12297                                                      _MM_FROUND_CUR_DIRECTION);
12298 }
12299
12300 extern __inline __m512
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12303 {
12304   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12305                                                    (__v16sf) __B,
12306                                                    (__v16sf) __C,
12307                                                    (__mmask16) -1,
12308                                                    _MM_FROUND_CUR_DIRECTION);
12309 }
12310
12311 extern __inline __m512
12312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12313 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12314 {
12315   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12316                                                    (__v16sf) __B,
12317                                                    (__v16sf) __C,
12318                                                    (__mmask16) __U,
12319                                                    _MM_FROUND_CUR_DIRECTION);
12320 }
12321
12322 extern __inline __m512
12323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12324 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12325 {
12326   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12327                                                     (__v16sf) __B,
12328                                                     (__v16sf) __C,
12329                                                     (__mmask16) __U,
12330                                                     _MM_FROUND_CUR_DIRECTION);
12331 }
12332
12333 extern __inline __m512
12334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12335 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12336 {
12337   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12338                                                     (__v16sf) __B,
12339                                                     (__v16sf) __C,
12340                                                     (__mmask16) __U,
12341                                                     _MM_FROUND_CUR_DIRECTION);
12342 }
12343
12344 extern __inline __m512d
12345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12346 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12347 {
12348   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12349                                                     (__v8df) __B,
12350                                                     -(__v8df) __C,
12351                                                     (__mmask8) -1,
12352                                                     _MM_FROUND_CUR_DIRECTION);
12353 }
12354
12355 extern __inline __m512d
12356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12357 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12358 {
12359   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12360                                                     (__v8df) __B,
12361                                                     -(__v8df) __C,
12362                                                     (__mmask8) __U,
12363                                                     _MM_FROUND_CUR_DIRECTION);
12364 }
12365
12366 extern __inline __m512d
12367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12368 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12369 {
12370   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12371                                                      (__v8df) __B,
12372                                                      (__v8df) __C,
12373                                                      (__mmask8) __U,
12374                                                      _MM_FROUND_CUR_DIRECTION);
12375 }
12376
12377 extern __inline __m512d
12378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12379 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12380 {
12381   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12382                                                      (__v8df) __B,
12383                                                      -(__v8df) __C,
12384                                                      (__mmask8) __U,
12385                                                      _MM_FROUND_CUR_DIRECTION);
12386 }
12387
12388 extern __inline __m512
12389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12390 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12391 {
12392   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12393                                                    (__v16sf) __B,
12394                                                    -(__v16sf) __C,
12395                                                    (__mmask16) -1,
12396                                                    _MM_FROUND_CUR_DIRECTION);
12397 }
12398
12399 extern __inline __m512
12400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12401 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12402 {
12403   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12404                                                    (__v16sf) __B,
12405                                                    -(__v16sf) __C,
12406                                                    (__mmask16) __U,
12407                                                    _MM_FROUND_CUR_DIRECTION);
12408 }
12409
12410 extern __inline __m512
12411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12412 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12413 {
12414   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12415                                                     (__v16sf) __B,
12416                                                     (__v16sf) __C,
12417                                                     (__mmask16) __U,
12418                                                     _MM_FROUND_CUR_DIRECTION);
12419 }
12420
12421 extern __inline __m512
12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12423 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12424 {
12425   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12426                                                     (__v16sf) __B,
12427                                                     -(__v16sf) __C,
12428                                                     (__mmask16) __U,
12429                                                     _MM_FROUND_CUR_DIRECTION);
12430 }
12431
12432 extern __inline __m512d
12433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12434 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12435 {
12436   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12437                                                        (__v8df) __B,
12438                                                        (__v8df) __C,
12439                                                        (__mmask8) -1,
12440                                                        _MM_FROUND_CUR_DIRECTION);
12441 }
12442
12443 extern __inline __m512d
12444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12445 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12446 {
12447   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12448                                                        (__v8df) __B,
12449                                                        (__v8df) __C,
12450                                                        (__mmask8) __U,
12451                                                        _MM_FROUND_CUR_DIRECTION);
12452 }
12453
12454 extern __inline __m512d
12455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12456 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12457 {
12458   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12459                                                         (__v8df) __B,
12460                                                         (__v8df) __C,
12461                                                         (__mmask8) __U,
12462                                                         _MM_FROUND_CUR_DIRECTION);
12463 }
12464
12465 extern __inline __m512d
12466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12467 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12468 {
12469   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12470                                                         (__v8df) __B,
12471                                                         (__v8df) __C,
12472                                                         (__mmask8) __U,
12473                                                         _MM_FROUND_CUR_DIRECTION);
12474 }
12475
12476 extern __inline __m512
12477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12478 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12479 {
12480   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12481                                                       (__v16sf) __B,
12482                                                       (__v16sf) __C,
12483                                                       (__mmask16) -1,
12484                                                       _MM_FROUND_CUR_DIRECTION);
12485 }
12486
12487 extern __inline __m512
12488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12489 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12490 {
12491   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12492                                                       (__v16sf) __B,
12493                                                       (__v16sf) __C,
12494                                                       (__mmask16) __U,
12495                                                       _MM_FROUND_CUR_DIRECTION);
12496 }
12497
12498 extern __inline __m512
12499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12500 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12501 {
12502   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12503                                                        (__v16sf) __B,
12504                                                        (__v16sf) __C,
12505                                                        (__mmask16) __U,
12506                                                        _MM_FROUND_CUR_DIRECTION);
12507 }
12508
12509 extern __inline __m512
12510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12511 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12512 {
12513   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12514                                                        (__v16sf) __B,
12515                                                        (__v16sf) __C,
12516                                                        (__mmask16) __U,
12517                                                        _MM_FROUND_CUR_DIRECTION);
12518 }
12519
12520 extern __inline __m512d
12521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12522 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12523 {
12524   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12525                                                        (__v8df) __B,
12526                                                        -(__v8df) __C,
12527                                                        (__mmask8) -1,
12528                                                        _MM_FROUND_CUR_DIRECTION);
12529 }
12530
12531 extern __inline __m512d
12532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12533 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12534 {
12535   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12536                                                        (__v8df) __B,
12537                                                        -(__v8df) __C,
12538                                                        (__mmask8) __U,
12539                                                        _MM_FROUND_CUR_DIRECTION);
12540 }
12541
12542 extern __inline __m512d
12543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12544 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12545 {
12546   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12547                                                         (__v8df) __B,
12548                                                         (__v8df) __C,
12549                                                         (__mmask8) __U,
12550                                                         _MM_FROUND_CUR_DIRECTION);
12551 }
12552
12553 extern __inline __m512d
12554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12555 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12556 {
12557   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12558                                                         (__v8df) __B,
12559                                                         -(__v8df) __C,
12560                                                         (__mmask8) __U,
12561                                                         _MM_FROUND_CUR_DIRECTION);
12562 }
12563
12564 extern __inline __m512
12565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12566 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12567 {
12568   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12569                                                       (__v16sf) __B,
12570                                                       -(__v16sf) __C,
12571                                                       (__mmask16) -1,
12572                                                       _MM_FROUND_CUR_DIRECTION);
12573 }
12574
12575 extern __inline __m512
12576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12577 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12578 {
12579   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12580                                                       (__v16sf) __B,
12581                                                       -(__v16sf) __C,
12582                                                       (__mmask16) __U,
12583                                                       _MM_FROUND_CUR_DIRECTION);
12584 }
12585
12586 extern __inline __m512
12587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12588 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12589 {
12590   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12591                                                        (__v16sf) __B,
12592                                                        (__v16sf) __C,
12593                                                        (__mmask16) __U,
12594                                                        _MM_FROUND_CUR_DIRECTION);
12595 }
12596
12597 extern __inline __m512
12598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12599 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12600 {
12601   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12602                                                        (__v16sf) __B,
12603                                                        -(__v16sf) __C,
12604                                                        (__mmask16) __U,
12605                                                        _MM_FROUND_CUR_DIRECTION);
12606 }
12607
12608 extern __inline __m512d
12609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12610 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12611 {
12612   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12613                                                     (__v8df) __B,
12614                                                     (__v8df) __C,
12615                                                     (__mmask8) -1,
12616                                                     _MM_FROUND_CUR_DIRECTION);
12617 }
12618
12619 extern __inline __m512d
12620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12621 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12622 {
12623   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12624                                                      (__v8df) __B,
12625                                                      (__v8df) __C,
12626                                                      (__mmask8) __U,
12627                                                      _MM_FROUND_CUR_DIRECTION);
12628 }
12629
12630 extern __inline __m512d
12631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12632 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12633 {
12634   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
12635                                                      (__v8df) __B,
12636                                                      (__v8df) __C,
12637                                                      (__mmask8) __U,
12638                                                      _MM_FROUND_CUR_DIRECTION);
12639 }
12640
12641 extern __inline __m512d
12642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12643 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12644 {
12645   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12646                                                      (__v8df) __B,
12647                                                      (__v8df) __C,
12648                                                      (__mmask8) __U,
12649                                                      _MM_FROUND_CUR_DIRECTION);
12650 }
12651
12652 extern __inline __m512
12653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12654 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12655 {
12656   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12657                                                    (__v16sf) __B,
12658                                                    (__v16sf) __C,
12659                                                    (__mmask16) -1,
12660                                                    _MM_FROUND_CUR_DIRECTION);
12661 }
12662
12663 extern __inline __m512
12664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12665 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12666 {
12667   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12668                                                     (__v16sf) __B,
12669                                                     (__v16sf) __C,
12670                                                     (__mmask16) __U,
12671                                                     _MM_FROUND_CUR_DIRECTION);
12672 }
12673
12674 extern __inline __m512
12675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12676 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12677 {
12678   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
12679                                                     (__v16sf) __B,
12680                                                     (__v16sf) __C,
12681                                                     (__mmask16) __U,
12682                                                     _MM_FROUND_CUR_DIRECTION);
12683 }
12684
12685 extern __inline __m512
12686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12687 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12688 {
12689   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12690                                                     (__v16sf) __B,
12691                                                     (__v16sf) __C,
12692                                                     (__mmask16) __U,
12693                                                     _MM_FROUND_CUR_DIRECTION);
12694 }
12695
12696 extern __inline __m512d
12697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12698 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12699 {
12700   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12701                                                     (__v8df) __B,
12702                                                     -(__v8df) __C,
12703                                                     (__mmask8) -1,
12704                                                     _MM_FROUND_CUR_DIRECTION);
12705 }
12706
12707 extern __inline __m512d
12708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12709 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12710 {
12711   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12712                                                      (__v8df) __B,
12713                                                      (__v8df) __C,
12714                                                      (__mmask8) __U,
12715                                                      _MM_FROUND_CUR_DIRECTION);
12716 }
12717
12718 extern __inline __m512d
12719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12720 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12721 {
12722   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12723                                                       (__v8df) __B,
12724                                                       (__v8df) __C,
12725                                                       (__mmask8) __U,
12726                                                       _MM_FROUND_CUR_DIRECTION);
12727 }
12728
12729 extern __inline __m512d
12730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12731 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12732 {
12733   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12734                                                      (__v8df) __B,
12735                                                      -(__v8df) __C,
12736                                                      (__mmask8) __U,
12737                                                      _MM_FROUND_CUR_DIRECTION);
12738 }
12739
12740 extern __inline __m512
12741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12742 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12743 {
12744   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12745                                                    (__v16sf) __B,
12746                                                    -(__v16sf) __C,
12747                                                    (__mmask16) -1,
12748                                                    _MM_FROUND_CUR_DIRECTION);
12749 }
12750
12751 extern __inline __m512
12752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12753 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12754 {
12755   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12756                                                     (__v16sf) __B,
12757                                                     (__v16sf) __C,
12758                                                     (__mmask16) __U,
12759                                                     _MM_FROUND_CUR_DIRECTION);
12760 }
12761
12762 extern __inline __m512
12763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12764 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12765 {
12766   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12767                                                      (__v16sf) __B,
12768                                                      (__v16sf) __C,
12769                                                      (__mmask16) __U,
12770                                                      _MM_FROUND_CUR_DIRECTION);
12771 }
12772
12773 extern __inline __m512
12774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12775 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12776 {
12777   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12778                                                     (__v16sf) __B,
12779                                                     -(__v16sf) __C,
12780                                                     (__mmask16) __U,
12781                                                     _MM_FROUND_CUR_DIRECTION);
12782 }
12783
12784 extern __inline __m256i
12785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12786 _mm512_cvttpd_epi32 (__m512d __A)
12787 {
12788   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12789                                                      (__v8si)
12790                                                      _mm256_undefined_si256 (),
12791                                                      (__mmask8) -1,
12792                                                      _MM_FROUND_CUR_DIRECTION);
12793 }
12794
12795 extern __inline __m256i
12796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12797 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12798 {
12799   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12800                                                      (__v8si) __W,
12801                                                      (__mmask8) __U,
12802                                                      _MM_FROUND_CUR_DIRECTION);
12803 }
12804
12805 extern __inline __m256i
12806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12807 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12808 {
12809   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12810                                                      (__v8si)
12811                                                      _mm256_setzero_si256 (),
12812                                                      (__mmask8) __U,
12813                                                      _MM_FROUND_CUR_DIRECTION);
12814 }
12815
12816 extern __inline __m256i
12817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12818 _mm512_cvttpd_epu32 (__m512d __A)
12819 {
12820   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12821                                                       (__v8si)
12822                                                       _mm256_undefined_si256 (),
12823                                                       (__mmask8) -1,
12824                                                       _MM_FROUND_CUR_DIRECTION);
12825 }
12826
12827 extern __inline __m256i
12828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12829 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12830 {
12831   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12832                                                       (__v8si) __W,
12833                                                       (__mmask8) __U,
12834                                                       _MM_FROUND_CUR_DIRECTION);
12835 }
12836
12837 extern __inline __m256i
12838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12839 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12840 {
12841   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12842                                                       (__v8si)
12843                                                       _mm256_setzero_si256 (),
12844                                                       (__mmask8) __U,
12845                                                       _MM_FROUND_CUR_DIRECTION);
12846 }
12847
12848 extern __inline __m256i
12849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12850 _mm512_cvtpd_epi32 (__m512d __A)
12851 {
12852   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12853                                                     (__v8si)
12854                                                     _mm256_undefined_si256 (),
12855                                                     (__mmask8) -1,
12856                                                     _MM_FROUND_CUR_DIRECTION);
12857 }
12858
12859 extern __inline __m256i
12860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12861 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12862 {
12863   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12864                                                     (__v8si) __W,
12865                                                     (__mmask8) __U,
12866                                                     _MM_FROUND_CUR_DIRECTION);
12867 }
12868
12869 extern __inline __m256i
12870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12871 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12872 {
12873   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12874                                                     (__v8si)
12875                                                     _mm256_setzero_si256 (),
12876                                                     (__mmask8) __U,
12877                                                     _MM_FROUND_CUR_DIRECTION);
12878 }
12879
12880 extern __inline __m256i
12881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12882 _mm512_cvtpd_epu32 (__m512d __A)
12883 {
12884   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12885                                                      (__v8si)
12886                                                      _mm256_undefined_si256 (),
12887                                                      (__mmask8) -1,
12888                                                      _MM_FROUND_CUR_DIRECTION);
12889 }
12890
12891 extern __inline __m256i
12892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12893 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12894 {
12895   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12896                                                      (__v8si) __W,
12897                                                      (__mmask8) __U,
12898                                                      _MM_FROUND_CUR_DIRECTION);
12899 }
12900
12901 extern __inline __m256i
12902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12903 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12904 {
12905   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12906                                                      (__v8si)
12907                                                      _mm256_setzero_si256 (),
12908                                                      (__mmask8) __U,
12909                                                      _MM_FROUND_CUR_DIRECTION);
12910 }
12911
12912 extern __inline __m512i
12913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12914 _mm512_cvttps_epi32 (__m512 __A)
12915 {
12916   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12917                                                      (__v16si)
12918                                                      _mm512_undefined_epi32 (),
12919                                                      (__mmask16) -1,
12920                                                      _MM_FROUND_CUR_DIRECTION);
12921 }
12922
12923 extern __inline __m512i
12924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12925 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12926 {
12927   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12928                                                      (__v16si) __W,
12929                                                      (__mmask16) __U,
12930                                                      _MM_FROUND_CUR_DIRECTION);
12931 }
12932
12933 extern __inline __m512i
12934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12935 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
12936 {
12937   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12938                                                      (__v16si)
12939                                                      _mm512_setzero_si512 (),
12940                                                      (__mmask16) __U,
12941                                                      _MM_FROUND_CUR_DIRECTION);
12942 }
12943
12944 extern __inline __m512i
12945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12946 _mm512_cvttps_epu32 (__m512 __A)
12947 {
12948   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12949                                                       (__v16si)
12950                                                       _mm512_undefined_epi32 (),
12951                                                       (__mmask16) -1,
12952                                                       _MM_FROUND_CUR_DIRECTION);
12953 }
12954
12955 extern __inline __m512i
12956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12957 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12958 {
12959   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12960                                                       (__v16si) __W,
12961                                                       (__mmask16) __U,
12962                                                       _MM_FROUND_CUR_DIRECTION);
12963 }
12964
12965 extern __inline __m512i
12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12967 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
12968 {
12969   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12970                                                       (__v16si)
12971                                                       _mm512_setzero_si512 (),
12972                                                       (__mmask16) __U,
12973                                                       _MM_FROUND_CUR_DIRECTION);
12974 }
12975
12976 extern __inline __m512i
12977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12978 _mm512_cvtps_epi32 (__m512 __A)
12979 {
12980   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12981                                                     (__v16si)
12982                                                     _mm512_undefined_epi32 (),
12983                                                     (__mmask16) -1,
12984                                                     _MM_FROUND_CUR_DIRECTION);
12985 }
12986
12987 extern __inline __m512i
12988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12989 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12990 {
12991   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12992                                                     (__v16si) __W,
12993                                                     (__mmask16) __U,
12994                                                     _MM_FROUND_CUR_DIRECTION);
12995 }
12996
12997 extern __inline __m512i
12998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12999 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
13000 {
13001   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13002                                                     (__v16si)
13003                                                     _mm512_setzero_si512 (),
13004                                                     (__mmask16) __U,
13005                                                     _MM_FROUND_CUR_DIRECTION);
13006 }
13007
13008 extern __inline __m512i
13009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13010 _mm512_cvtps_epu32 (__m512 __A)
13011 {
13012   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13013                                                      (__v16si)
13014                                                      _mm512_undefined_epi32 (),
13015                                                      (__mmask16) -1,
13016                                                      _MM_FROUND_CUR_DIRECTION);
13017 }
13018
13019 extern __inline __m512i
13020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13021 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13022 {
13023   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13024                                                      (__v16si) __W,
13025                                                      (__mmask16) __U,
13026                                                      _MM_FROUND_CUR_DIRECTION);
13027 }
13028
13029 extern __inline __m512i
13030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13031 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
13032 {
13033   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13034                                                      (__v16si)
13035                                                      _mm512_setzero_si512 (),
13036                                                      (__mmask16) __U,
13037                                                      _MM_FROUND_CUR_DIRECTION);
13038 }
13039
13040 extern __inline double
13041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13042 _mm512_cvtsd_f64 (__m512d __A)
13043 {
13044   return __A[0];
13045 }
13046
13047 extern __inline float
13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13049 _mm512_cvtss_f32 (__m512 __A)
13050 {
13051   return __A[0];
13052 }
13053
13054 #ifdef __x86_64__
13055 extern __inline __m128
13056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13057 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
13058 {
13059   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
13060                                               _MM_FROUND_CUR_DIRECTION);
13061 }
13062
13063 extern __inline __m128d
13064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13065 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
13066 {
13067   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
13068                                                _MM_FROUND_CUR_DIRECTION);
13069 }
13070 #endif
13071
13072 extern __inline __m128
13073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13074 _mm_cvtu32_ss (__m128 __A, unsigned __B)
13075 {
13076   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
13077                                               _MM_FROUND_CUR_DIRECTION);
13078 }
13079
13080 extern __inline __m512
13081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13082 _mm512_cvtepi32_ps (__m512i __A)
13083 {
13084   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13085                                                    (__v16sf)
13086                                                    _mm512_undefined_ps (),
13087                                                    (__mmask16) -1,
13088                                                    _MM_FROUND_CUR_DIRECTION);
13089 }
13090
13091 extern __inline __m512
13092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13093 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13094 {
13095   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13096                                                    (__v16sf) __W,
13097                                                    (__mmask16) __U,
13098                                                    _MM_FROUND_CUR_DIRECTION);
13099 }
13100
13101 extern __inline __m512
13102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13103 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
13104 {
13105   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13106                                                    (__v16sf)
13107                                                    _mm512_setzero_ps (),
13108                                                    (__mmask16) __U,
13109                                                    _MM_FROUND_CUR_DIRECTION);
13110 }
13111
13112 extern __inline __m512
13113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114 _mm512_cvtepu32_ps (__m512i __A)
13115 {
13116   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13117                                                     (__v16sf)
13118                                                     _mm512_undefined_ps (),
13119                                                     (__mmask16) -1,
13120                                                     _MM_FROUND_CUR_DIRECTION);
13121 }
13122
13123 extern __inline __m512
13124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13126 {
13127   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13128                                                     (__v16sf) __W,
13129                                                     (__mmask16) __U,
13130                                                     _MM_FROUND_CUR_DIRECTION);
13131 }
13132
13133 extern __inline __m512
13134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13135 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
13136 {
13137   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13138                                                     (__v16sf)
13139                                                     _mm512_setzero_ps (),
13140                                                     (__mmask16) __U,
13141                                                     _MM_FROUND_CUR_DIRECTION);
13142 }
13143
13144 #ifdef __OPTIMIZE__
13145 extern __inline __m512d
13146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13147 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
13148 {
13149   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13150                                                       (__v8df) __B,
13151                                                       (__v8di) __C,
13152                                                       __imm,
13153                                                       (__mmask8) -1,
13154                                                       _MM_FROUND_CUR_DIRECTION);
13155 }
13156
13157 extern __inline __m512d
13158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13159 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
13160                          __m512i __C, const int __imm)
13161 {
13162   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13163                                                       (__v8df) __B,
13164                                                       (__v8di) __C,
13165                                                       __imm,
13166                                                       (__mmask8) __U,
13167                                                       _MM_FROUND_CUR_DIRECTION);
13168 }
13169
13170 extern __inline __m512d
13171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13172 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
13173                           __m512i __C, const int __imm)
13174 {
13175   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
13176                                                        (__v8df) __B,
13177                                                        (__v8di) __C,
13178                                                        __imm,
13179                                                        (__mmask8) __U,
13180                                                        _MM_FROUND_CUR_DIRECTION);
13181 }
13182
13183 extern __inline __m512
13184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13185 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
13186 {
13187   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13188                                                      (__v16sf) __B,
13189                                                      (__v16si) __C,
13190                                                      __imm,
13191                                                      (__mmask16) -1,
13192                                                      _MM_FROUND_CUR_DIRECTION);
13193 }
13194
13195 extern __inline __m512
13196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13197 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
13198                          __m512i __C, const int __imm)
13199 {
13200   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13201                                                      (__v16sf) __B,
13202                                                      (__v16si) __C,
13203                                                      __imm,
13204                                                      (__mmask16) __U,
13205                                                      _MM_FROUND_CUR_DIRECTION);
13206 }
13207
13208 extern __inline __m512
13209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13210 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
13211                           __m512i __C, const int __imm)
13212 {
13213   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
13214                                                       (__v16sf) __B,
13215                                                       (__v16si) __C,
13216                                                       __imm,
13217                                                       (__mmask16) __U,
13218                                                       _MM_FROUND_CUR_DIRECTION);
13219 }
13220
13221 extern __inline __m128d
13222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13223 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
13224 {
13225   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13226                                                    (__v2df) __B,
13227                                                    (__v2di) __C, __imm,
13228                                                    (__mmask8) -1,
13229                                                    _MM_FROUND_CUR_DIRECTION);
13230 }
13231
13232 extern __inline __m128d
13233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13234 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
13235                       __m128i __C, const int __imm)
13236 {
13237   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13238                                                    (__v2df) __B,
13239                                                    (__v2di) __C, __imm,
13240                                                    (__mmask8) __U,
13241                                                    _MM_FROUND_CUR_DIRECTION);
13242 }
13243
13244 extern __inline __m128d
13245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13246 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
13247                        __m128i __C, const int __imm)
13248 {
13249   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
13250                                                     (__v2df) __B,
13251                                                     (__v2di) __C,
13252                                                     __imm,
13253                                                     (__mmask8) __U,
13254                                                     _MM_FROUND_CUR_DIRECTION);
13255 }
13256
13257 extern __inline __m128
13258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13259 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
13260 {
13261   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13262                                                   (__v4sf) __B,
13263                                                   (__v4si) __C, __imm,
13264                                                   (__mmask8) -1,
13265                                                   _MM_FROUND_CUR_DIRECTION);
13266 }
13267
13268 extern __inline __m128
13269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13270 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13271                       __m128i __C, const int __imm)
13272 {
13273   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13274                                                   (__v4sf) __B,
13275                                                   (__v4si) __C, __imm,
13276                                                   (__mmask8) __U,
13277                                                   _MM_FROUND_CUR_DIRECTION);
13278 }
13279
13280 extern __inline __m128
13281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13282 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13283                        __m128i __C, const int __imm)
13284 {
13285   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13286                                                    (__v4sf) __B,
13287                                                    (__v4si) __C, __imm,
13288                                                    (__mmask8) __U,
13289                                                    _MM_FROUND_CUR_DIRECTION);
13290 }
13291 #else
13292 #define _mm512_fixupimm_pd(X, Y, Z, C)                                  \
13293   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
13294       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
13295       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13296
13297 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
13298   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
13299       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
13300       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13301
13302 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
13303   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
13304       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
13305       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13306
13307 #define _mm512_fixupimm_ps(X, Y, Z, C)                                  \
13308   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
13309     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
13310     (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13311
13312 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
13313   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
13314     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
13315     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13316
13317 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
13318   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
13319     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
13320     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13321
13322 #define _mm_fixupimm_sd(X, Y, Z, C)                                     \
13323     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
13324       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
13325       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13326
13327 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C)                             \
13328     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
13329       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
13330       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13331
13332 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)                            \
13333     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),    \
13334       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
13335       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13336
13337 #define _mm_fixupimm_ss(X, Y, Z, C)                                     \
13338     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
13339       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
13340       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13341
13342 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C)                             \
13343     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
13344       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
13345       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13346
13347 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)                            \
13348     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),      \
13349       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
13350       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13351 #endif
13352
13353 #ifdef __x86_64__
13354 extern __inline unsigned long long
13355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13356 _mm_cvtss_u64 (__m128 __A)
13357 {
13358   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13359                                                            __A,
13360                                                            _MM_FROUND_CUR_DIRECTION);
13361 }
13362
13363 extern __inline unsigned long long
13364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13365 _mm_cvttss_u64 (__m128 __A)
13366 {
13367   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13368                                                             __A,
13369                                                             _MM_FROUND_CUR_DIRECTION);
13370 }
13371
13372 extern __inline long long
13373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13374 _mm_cvttss_i64 (__m128 __A)
13375 {
13376   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13377                                                   _MM_FROUND_CUR_DIRECTION);
13378 }
13379 #endif /* __x86_64__ */
13380
13381 extern __inline unsigned
13382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13383 _mm_cvtss_u32 (__m128 __A)
13384 {
13385   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13386                                                  _MM_FROUND_CUR_DIRECTION);
13387 }
13388
13389 extern __inline unsigned
13390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13391 _mm_cvttss_u32 (__m128 __A)
13392 {
13393   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13394                                                   _MM_FROUND_CUR_DIRECTION);
13395 }
13396
13397 extern __inline int
13398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13399 _mm_cvttss_i32 (__m128 __A)
13400 {
13401   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13402                                             _MM_FROUND_CUR_DIRECTION);
13403 }
13404
13405 #ifdef __x86_64__
13406 extern __inline unsigned long long
13407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13408 _mm_cvtsd_u64 (__m128d __A)
13409 {
13410   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13411                                                            __A,
13412                                                            _MM_FROUND_CUR_DIRECTION);
13413 }
13414
13415 extern __inline unsigned long long
13416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13417 _mm_cvttsd_u64 (__m128d __A)
13418 {
13419   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13420                                                             __A,
13421                                                             _MM_FROUND_CUR_DIRECTION);
13422 }
13423
13424 extern __inline long long
13425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13426 _mm_cvttsd_i64 (__m128d __A)
13427 {
13428   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13429                                                   _MM_FROUND_CUR_DIRECTION);
13430 }
13431 #endif /* __x86_64__ */
13432
13433 extern __inline unsigned
13434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13435 _mm_cvtsd_u32 (__m128d __A)
13436 {
13437   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13438                                                  _MM_FROUND_CUR_DIRECTION);
13439 }
13440
13441 extern __inline unsigned
13442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13443 _mm_cvttsd_u32 (__m128d __A)
13444 {
13445   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13446                                                   _MM_FROUND_CUR_DIRECTION);
13447 }
13448
13449 extern __inline int
13450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13451 _mm_cvttsd_i32 (__m128d __A)
13452 {
13453   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13454                                             _MM_FROUND_CUR_DIRECTION);
13455 }
13456
13457 extern __inline __m512d
13458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13459 _mm512_cvtps_pd (__m256 __A)
13460 {
13461   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13462                                                     (__v8df)
13463                                                     _mm512_undefined_pd (),
13464                                                     (__mmask8) -1,
13465                                                     _MM_FROUND_CUR_DIRECTION);
13466 }
13467
13468 extern __inline __m512d
13469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13470 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13471 {
13472   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13473                                                     (__v8df) __W,
13474                                                     (__mmask8) __U,
13475                                                     _MM_FROUND_CUR_DIRECTION);
13476 }
13477
13478 extern __inline __m512d
13479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13480 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13481 {
13482   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13483                                                     (__v8df)
13484                                                     _mm512_setzero_pd (),
13485                                                     (__mmask8) __U,
13486                                                     _MM_FROUND_CUR_DIRECTION);
13487 }
13488
13489 extern __inline __m512
13490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13491 _mm512_cvtph_ps (__m256i __A)
13492 {
13493   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13494                                                     (__v16sf)
13495                                                     _mm512_undefined_ps (),
13496                                                     (__mmask16) -1,
13497                                                     _MM_FROUND_CUR_DIRECTION);
13498 }
13499
13500 extern __inline __m512
13501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13502 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13503 {
13504   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13505                                                     (__v16sf) __W,
13506                                                     (__mmask16) __U,
13507                                                     _MM_FROUND_CUR_DIRECTION);
13508 }
13509
13510 extern __inline __m512
13511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13512 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13513 {
13514   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13515                                                     (__v16sf)
13516                                                     _mm512_setzero_ps (),
13517                                                     (__mmask16) __U,
13518                                                     _MM_FROUND_CUR_DIRECTION);
13519 }
13520
13521 extern __inline __m256
13522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13523 _mm512_cvtpd_ps (__m512d __A)
13524 {
13525   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13526                                                    (__v8sf)
13527                                                    _mm256_undefined_ps (),
13528                                                    (__mmask8) -1,
13529                                                    _MM_FROUND_CUR_DIRECTION);
13530 }
13531
13532 extern __inline __m256
13533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13534 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13535 {
13536   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13537                                                    (__v8sf) __W,
13538                                                    (__mmask8) __U,
13539                                                    _MM_FROUND_CUR_DIRECTION);
13540 }
13541
13542 extern __inline __m256
13543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13544 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13545 {
13546   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13547                                                    (__v8sf)
13548                                                    _mm256_setzero_ps (),
13549                                                    (__mmask8) __U,
13550                                                    _MM_FROUND_CUR_DIRECTION);
13551 }
13552
13553 #ifdef __OPTIMIZE__
13554 extern __inline __m512
13555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13556 _mm512_getexp_ps (__m512 __A)
13557 {
13558   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13559                                                    (__v16sf)
13560                                                    _mm512_undefined_ps (),
13561                                                    (__mmask16) -1,
13562                                                    _MM_FROUND_CUR_DIRECTION);
13563 }
13564
13565 extern __inline __m512
13566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13567 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13568 {
13569   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13570                                                    (__v16sf) __W,
13571                                                    (__mmask16) __U,
13572                                                    _MM_FROUND_CUR_DIRECTION);
13573 }
13574
13575 extern __inline __m512
13576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13577 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13578 {
13579   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13580                                                    (__v16sf)
13581                                                    _mm512_setzero_ps (),
13582                                                    (__mmask16) __U,
13583                                                    _MM_FROUND_CUR_DIRECTION);
13584 }
13585
13586 extern __inline __m512d
13587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13588 _mm512_getexp_pd (__m512d __A)
13589 {
13590   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13591                                                     (__v8df)
13592                                                     _mm512_undefined_pd (),
13593                                                     (__mmask8) -1,
13594                                                     _MM_FROUND_CUR_DIRECTION);
13595 }
13596
13597 extern __inline __m512d
13598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13599 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13600 {
13601   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13602                                                     (__v8df) __W,
13603                                                     (__mmask8) __U,
13604                                                     _MM_FROUND_CUR_DIRECTION);
13605 }
13606
13607 extern __inline __m512d
13608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13609 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13610 {
13611   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13612                                                     (__v8df)
13613                                                     _mm512_setzero_pd (),
13614                                                     (__mmask8) __U,
13615                                                     _MM_FROUND_CUR_DIRECTION);
13616 }
13617
13618 extern __inline __m128
13619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13620 _mm_getexp_ss (__m128 __A, __m128 __B)
13621 {
13622   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13623                                                     (__v4sf) __B,
13624                                                     _MM_FROUND_CUR_DIRECTION);
13625 }
13626
13627 extern __inline __m128
13628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13629 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13630 {
13631   return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13632                                                 (__v4sf) __B,
13633                                                 (__v4sf) __W,
13634                                                 (__mmask8) __U,
13635                                                 _MM_FROUND_CUR_DIRECTION);
13636 }
13637
13638 extern __inline __m128
13639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13640 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
13641 {
13642   return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13643                                                 (__v4sf) __B,
13644                                                 (__v4sf)
13645                                                 _mm_setzero_ps (),
13646                                                 (__mmask8) __U,
13647                                                 _MM_FROUND_CUR_DIRECTION);
13648 }
13649
13650 extern __inline __m128d
13651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13652 _mm_getexp_sd (__m128d __A, __m128d __B)
13653 {
13654   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13655                                                      (__v2df) __B,
13656                                                      _MM_FROUND_CUR_DIRECTION);
13657 }
13658
13659 extern __inline __m128d
13660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13661 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13662 {
13663   return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13664                                                 (__v2df) __B,
13665                                                 (__v2df) __W,
13666                                                 (__mmask8) __U,
13667                                                 _MM_FROUND_CUR_DIRECTION);
13668 }
13669
13670 extern __inline __m128d
13671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13672 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
13673 {
13674   return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13675                                                 (__v2df) __B,
13676                                                 (__v2df)
13677                                                 _mm_setzero_pd (),
13678                                                 (__mmask8) __U,
13679                                                 _MM_FROUND_CUR_DIRECTION);
13680 }
13681
13682 extern __inline __m512d
13683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13684 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13685                    _MM_MANTISSA_SIGN_ENUM __C)
13686 {
13687   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13688                                                      (__C << 2) | __B,
13689                                                      _mm512_undefined_pd (),
13690                                                      (__mmask8) -1,
13691                                                      _MM_FROUND_CUR_DIRECTION);
13692 }
13693
13694 extern __inline __m512d
13695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13696 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13697                         _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13698 {
13699   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13700                                                      (__C << 2) | __B,
13701                                                      (__v8df) __W, __U,
13702                                                      _MM_FROUND_CUR_DIRECTION);
13703 }
13704
13705 extern __inline __m512d
13706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13707 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13708                          _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13709 {
13710   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13711                                                      (__C << 2) | __B,
13712                                                      (__v8df)
13713                                                      _mm512_setzero_pd (),
13714                                                      __U,
13715                                                      _MM_FROUND_CUR_DIRECTION);
13716 }
13717
13718 extern __inline __m512
13719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13720 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13721                    _MM_MANTISSA_SIGN_ENUM __C)
13722 {
13723   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13724                                                     (__C << 2) | __B,
13725                                                     _mm512_undefined_ps (),
13726                                                     (__mmask16) -1,
13727                                                     _MM_FROUND_CUR_DIRECTION);
13728 }
13729
13730 extern __inline __m512
13731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13732 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13733                         _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13734 {
13735   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13736                                                     (__C << 2) | __B,
13737                                                     (__v16sf) __W, __U,
13738                                                     _MM_FROUND_CUR_DIRECTION);
13739 }
13740
13741 extern __inline __m512
13742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13743 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13744                          _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13745 {
13746   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13747                                                     (__C << 2) | __B,
13748                                                     (__v16sf)
13749                                                     _mm512_setzero_ps (),
13750                                                     __U,
13751                                                     _MM_FROUND_CUR_DIRECTION);
13752 }
13753
13754 extern __inline __m128d
13755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13756 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13757                 _MM_MANTISSA_SIGN_ENUM __D)
13758 {
13759   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13760                                                    (__v2df) __B,
13761                                                    (__D << 2) | __C,
13762                                                    _MM_FROUND_CUR_DIRECTION);
13763 }
13764
13765 extern __inline __m128d
13766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13767 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
13768                         _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13769 {
13770   return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13771                                                         (__v2df) __B,
13772                                                         (__D << 2) | __C,
13773                                                         (__v2df) __W,
13774                                                        __U,
13775                                                      _MM_FROUND_CUR_DIRECTION);
13776 }
13777
13778 extern __inline __m128d
13779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13780 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
13781                          _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13782 {
13783   return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13784                                                         (__v2df) __B,
13785                                                         (__D << 2) | __C,
13786                                                         (__v2df)
13787                                                         _mm_setzero_pd(),
13788                                                         __U,
13789                                                      _MM_FROUND_CUR_DIRECTION);
13790 }
13791
13792 extern __inline __m128
13793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13794 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13795                 _MM_MANTISSA_SIGN_ENUM __D)
13796 {
13797   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13798                                                   (__v4sf) __B,
13799                                                   (__D << 2) | __C,
13800                                                   _MM_FROUND_CUR_DIRECTION);
13801 }
13802
13803 extern __inline __m128
13804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13805 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
13806                         _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13807 {
13808   return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13809                                                         (__v4sf) __B,
13810                                                         (__D << 2) | __C,
13811                                                         (__v4sf) __W,
13812                                                        __U,
13813                                                      _MM_FROUND_CUR_DIRECTION);
13814 }
13815
13816 extern __inline __m128
13817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13818 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
13819                          _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13820 {
13821   return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13822                                                         (__v4sf) __B,
13823                                                         (__D << 2) | __C,
13824                                                         (__v4sf)
13825                                                         _mm_setzero_ps(),
13826                                                         __U,
13827                                                      _MM_FROUND_CUR_DIRECTION);
13828 }
13829
13830 #else
13831 #define _mm512_getmant_pd(X, B, C)                                                  \
13832   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
13833                                               (int)(((C)<<2) | (B)),                \
13834                                               (__v8df)_mm512_undefined_pd(),        \
13835                                               (__mmask8)-1,\
13836                                               _MM_FROUND_CUR_DIRECTION))
13837
13838 #define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
13839   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
13840                                               (int)(((C)<<2) | (B)),                \
13841                                               (__v8df)(__m512d)(W),                 \
13842                                               (__mmask8)(U),\
13843                                               _MM_FROUND_CUR_DIRECTION))
13844
13845 #define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
13846   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
13847                                               (int)(((C)<<2) | (B)),                \
13848                                               (__v8df)_mm512_setzero_pd(),          \
13849                                               (__mmask8)(U),\
13850                                               _MM_FROUND_CUR_DIRECTION))
13851 #define _mm512_getmant_ps(X, B, C)                                                  \
13852   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
13853                                              (int)(((C)<<2) | (B)),                 \
13854                                              (__v16sf)_mm512_undefined_ps(),        \
13855                                              (__mmask16)-1,\
13856                                              _MM_FROUND_CUR_DIRECTION))
13857
13858 #define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
13859   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
13860                                              (int)(((C)<<2) | (B)),                 \
13861                                              (__v16sf)(__m512)(W),                  \
13862                                              (__mmask16)(U),\
13863                                              _MM_FROUND_CUR_DIRECTION))
13864
13865 #define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
13866   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
13867                                              (int)(((C)<<2) | (B)),                 \
13868                                              (__v16sf)_mm512_setzero_ps(),          \
13869                                              (__mmask16)(U),\
13870                                              _MM_FROUND_CUR_DIRECTION))
13871 #define _mm_getmant_sd(X, Y, C, D)                                                  \
13872   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
13873                                            (__v2df)(__m128d)(Y),                    \
13874                                            (int)(((D)<<2) | (C)),                   \
13875                                            _MM_FROUND_CUR_DIRECTION))
13876
13877 #define _mm_mask_getmant_sd(W, U, X, Y, C, D)                                       \
13878   ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X),                 \
13879                                                  (__v2df)(__m128d)(Y),                 \
13880                                                  (int)(((D)<<2) | (C)),                \
13881                                                 (__v2df)(__m128d)(W),                 \
13882                                               (__mmask8)(U),\
13883                                               _MM_FROUND_CUR_DIRECTION))
13884
13885 #define _mm_maskz_getmant_sd(U, X, Y, C, D)                                         \
13886   ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X),                 \
13887                                            (__v2df)(__m128d)(Y),                     \
13888                                               (int)(((D)<<2) | (C)),                \
13889                                            (__v2df)_mm_setzero_pd(),             \
13890                                               (__mmask8)(U),\
13891                                               _MM_FROUND_CUR_DIRECTION))
13892
13893 #define _mm_getmant_ss(X, Y, C, D)                                                  \
13894   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
13895                                           (__v4sf)(__m128)(Y),                      \
13896                                           (int)(((D)<<2) | (C)),                    \
13897                                           _MM_FROUND_CUR_DIRECTION))
13898
13899 #define _mm_mask_getmant_ss(W, U, X, Y, C, D)                                       \
13900   ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X),                 \
13901                                                  (__v4sf)(__m128)(Y),                 \
13902                                                  (int)(((D)<<2) | (C)),                \
13903                                                 (__v4sf)(__m128)(W),                 \
13904                                               (__mmask8)(U),\
13905                                               _MM_FROUND_CUR_DIRECTION))
13906
13907 #define _mm_maskz_getmant_ss(U, X, Y, C, D)                                         \
13908   ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X),                 \
13909                                            (__v4sf)(__m128)(Y),                     \
13910                                               (int)(((D)<<2) | (C)),                \
13911                                            (__v4sf)_mm_setzero_ps(),             \
13912                                               (__mmask8)(U),\
13913                                               _MM_FROUND_CUR_DIRECTION))
13914
13915 #define _mm_getexp_ss(A, B)                                                   \
13916   ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
13917                                            _MM_FROUND_CUR_DIRECTION))
13918
13919 #define _mm_mask_getexp_ss(W, U, A, B) \
13920     (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
13921                                              _MM_FROUND_CUR_DIRECTION)
13922
13923 #define _mm_maskz_getexp_ss(U, A, B)   \
13924     (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
13925                                               _MM_FROUND_CUR_DIRECTION)
13926
13927 #define _mm_getexp_sd(A, B)                                                    \
13928   ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
13929                                             _MM_FROUND_CUR_DIRECTION))
13930
13931 #define _mm_mask_getexp_sd(W, U, A, B) \
13932     (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
13933                                              _MM_FROUND_CUR_DIRECTION)
13934
13935 #define _mm_maskz_getexp_sd(U, A, B)   \
13936     (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
13937                                               _MM_FROUND_CUR_DIRECTION)
13938
13939 #define _mm512_getexp_ps(A)                                             \
13940   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
13941   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
13942
13943 #define _mm512_mask_getexp_ps(W, U, A)                                  \
13944   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
13945   (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13946
13947 #define _mm512_maskz_getexp_ps(U, A)                                    \
13948   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
13949   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13950
13951 #define _mm512_getexp_pd(A)                                             \
13952   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
13953   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
13954
13955 #define _mm512_mask_getexp_pd(W, U, A)                                  \
13956   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
13957   (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13958
13959 #define _mm512_maskz_getexp_pd(U, A)                                    \
13960   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
13961   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13962 #endif
13963
13964 #ifdef __OPTIMIZE__
13965 extern __inline __m512
13966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13967 _mm512_roundscale_ps (__m512 __A, const int __imm)
13968 {
13969   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
13970                                                   (__v16sf)
13971                                                   _mm512_undefined_ps (),
13972                                                   -1,
13973                                                   _MM_FROUND_CUR_DIRECTION);
13974 }
13975
13976 extern __inline __m512
13977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13978 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
13979                            const int __imm)
13980 {
13981   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
13982                                                   (__v16sf) __A,
13983                                                   (__mmask16) __B,
13984                                                   _MM_FROUND_CUR_DIRECTION);
13985 }
13986
13987 extern __inline __m512
13988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13989 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
13990 {
13991   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
13992                                                   __imm,
13993                                                   (__v16sf)
13994                                                   _mm512_setzero_ps (),
13995                                                   (__mmask16) __A,
13996                                                   _MM_FROUND_CUR_DIRECTION);
13997 }
13998
13999 extern __inline __m512d
14000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14001 _mm512_roundscale_pd (__m512d __A, const int __imm)
14002 {
14003   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
14004                                                    (__v8df)
14005                                                    _mm512_undefined_pd (),
14006                                                    -1,
14007                                                    _MM_FROUND_CUR_DIRECTION);
14008 }
14009
14010 extern __inline __m512d
14011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14012 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
14013                            const int __imm)
14014 {
14015   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
14016                                                    (__v8df) __A,
14017                                                    (__mmask8) __B,
14018                                                    _MM_FROUND_CUR_DIRECTION);
14019 }
14020
14021 extern __inline __m512d
14022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14023 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
14024 {
14025   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
14026                                                    __imm,
14027                                                    (__v8df)
14028                                                    _mm512_setzero_pd (),
14029                                                    (__mmask8) __A,
14030                                                    _MM_FROUND_CUR_DIRECTION);
14031 }
14032
14033 extern __inline __m128
14034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14035 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
14036 {
14037   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
14038                                                    (__v4sf) __B, __imm,
14039                                                    _MM_FROUND_CUR_DIRECTION);
14040 }
14041
14042 extern __inline __m128d
14043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14044 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
14045 {
14046   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
14047                                                     (__v2df) __B, __imm,
14048                                                    _MM_FROUND_CUR_DIRECTION);
14049 }
14050
14051 #else
14052 #define _mm512_roundscale_ps(A, B) \
14053   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
14054     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
14055 #define _mm512_mask_roundscale_ps(A, B, C, D)                           \
14056   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),       \
14057                                             (int)(D),                   \
14058                                             (__v16sf)(__m512)(A),       \
14059                                             (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
14060 #define _mm512_maskz_roundscale_ps(A, B, C)                             \
14061   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),       \
14062                                             (int)(C),                   \
14063                                             (__v16sf)_mm512_setzero_ps(),\
14064                                             (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
14065 #define _mm512_roundscale_pd(A, B) \
14066   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
14067     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14068 #define _mm512_mask_roundscale_pd(A, B, C, D)                           \
14069   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),      \
14070                                              (int)(D),                  \
14071                                              (__v8df)(__m512d)(A),      \
14072                                              (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
14073 #define _mm512_maskz_roundscale_pd(A, B, C)                             \
14074   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),      \
14075                                              (int)(C),                  \
14076                                              (__v8df)_mm512_setzero_pd(),\
14077                                              (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
14078 #define _mm_roundscale_ss(A, B, C)                                      \
14079   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),       \
14080   (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14081 #define _mm_roundscale_sd(A, B, C)                                      \
14082   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),     \
14083     (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14084 #endif
14085
14086 #ifdef __OPTIMIZE__
14087 extern __inline __mmask8
14088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14089 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
14090 {
14091   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14092                                                   (__v8df) __Y, __P,
14093                                                   (__mmask8) -1,
14094                                                   _MM_FROUND_CUR_DIRECTION);
14095 }
14096
14097 extern __inline __mmask16
14098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14099 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
14100 {
14101   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14102                                                    (__v16sf) __Y, __P,
14103                                                    (__mmask16) -1,
14104                                                    _MM_FROUND_CUR_DIRECTION);
14105 }
14106
14107 extern __inline __mmask16
14108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14109 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
14110 {
14111   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14112                                                    (__v16sf) __Y, __P,
14113                                                    (__mmask16) __U,
14114                                                    _MM_FROUND_CUR_DIRECTION);
14115 }
14116
14117 extern __inline __mmask8
14118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14119 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
14120 {
14121   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14122                                                   (__v8df) __Y, __P,
14123                                                   (__mmask8) __U,
14124                                                   _MM_FROUND_CUR_DIRECTION);
14125 }
14126
14127 extern __inline __mmask8
14128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14129 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
14130 {
14131   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14132                                                   (__v8df) __Y, _CMP_EQ_OQ,
14133                                                   (__mmask8) -1,
14134                                                   _MM_FROUND_CUR_DIRECTION);
14135 }
14136
14137 extern __inline __mmask8
14138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14139 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14140 {
14141   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14142                                                   (__v8df) __Y, _CMP_EQ_OQ,
14143                                                   (__mmask8) __U,
14144                                                   _MM_FROUND_CUR_DIRECTION);
14145 }
14146
14147 extern __inline __mmask8
14148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14149 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
14150 {
14151   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14152                                                   (__v8df) __Y, _CMP_LT_OS,
14153                                                   (__mmask8) -1,
14154                                                   _MM_FROUND_CUR_DIRECTION);
14155 }
14156
14157 extern __inline __mmask8
14158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14159 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14160 {
14161   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14162                                                   (__v8df) __Y, _CMP_LT_OS,
14163                                                   (__mmask8) __U,
14164                                                   _MM_FROUND_CUR_DIRECTION);
14165 }
14166
14167 extern __inline __mmask8
14168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14169 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
14170 {
14171   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14172                                                   (__v8df) __Y, _CMP_LE_OS,
14173                                                   (__mmask8) -1,
14174                                                   _MM_FROUND_CUR_DIRECTION);
14175 }
14176
14177 extern __inline __mmask8
14178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14179 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14180 {
14181   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14182                                                   (__v8df) __Y, _CMP_LE_OS,
14183                                                   (__mmask8) __U,
14184                                                   _MM_FROUND_CUR_DIRECTION);
14185 }
14186
14187 extern __inline __mmask8
14188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14189 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
14190 {
14191   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14192                                                   (__v8df) __Y, _CMP_UNORD_Q,
14193                                                   (__mmask8) -1,
14194                                                   _MM_FROUND_CUR_DIRECTION);
14195 }
14196
14197 extern __inline __mmask8
14198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14199 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14200 {
14201   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14202                                                   (__v8df) __Y, _CMP_UNORD_Q,
14203                                                   (__mmask8) __U,
14204                                                   _MM_FROUND_CUR_DIRECTION);
14205 }
14206
14207 extern __inline __mmask8
14208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14209 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
14210 {
14211   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14212                                                   (__v8df) __Y, _CMP_NEQ_UQ,
14213                                                   (__mmask8) -1,
14214                                                   _MM_FROUND_CUR_DIRECTION);
14215 }
14216
14217 extern __inline __mmask8
14218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14219 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14220 {
14221   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14222                                                   (__v8df) __Y, _CMP_NEQ_UQ,
14223                                                   (__mmask8) __U,
14224                                                   _MM_FROUND_CUR_DIRECTION);
14225 }
14226
14227 extern __inline __mmask8
14228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14229 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
14230 {
14231   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14232                                                   (__v8df) __Y, _CMP_NLT_US,
14233                                                   (__mmask8) -1,
14234                                                   _MM_FROUND_CUR_DIRECTION);
14235 }
14236
14237 extern __inline __mmask8
14238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14239 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14240 {
14241   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14242                                                   (__v8df) __Y, _CMP_NLT_US,
14243                                                   (__mmask8) __U,
14244                                                   _MM_FROUND_CUR_DIRECTION);
14245 }
14246
14247 extern __inline __mmask8
14248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14249 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
14250 {
14251   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14252                                                   (__v8df) __Y, _CMP_NLE_US,
14253                                                   (__mmask8) -1,
14254                                                   _MM_FROUND_CUR_DIRECTION);
14255 }
14256
14257 extern __inline __mmask8
14258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14259 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14260 {
14261   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14262                                                   (__v8df) __Y, _CMP_NLE_US,
14263                                                   (__mmask8) __U,
14264                                                   _MM_FROUND_CUR_DIRECTION);
14265 }
14266
14267 extern __inline __mmask8
14268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14269 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
14270 {
14271   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14272                                                   (__v8df) __Y, _CMP_ORD_Q,
14273                                                   (__mmask8) -1,
14274                                                   _MM_FROUND_CUR_DIRECTION);
14275 }
14276
14277 extern __inline __mmask8
14278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14279 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14280 {
14281   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14282                                                   (__v8df) __Y, _CMP_ORD_Q,
14283                                                   (__mmask8) __U,
14284                                                   _MM_FROUND_CUR_DIRECTION);
14285 }
14286
14287 extern __inline __mmask16
14288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14289 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
14290 {
14291   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14292                                                    (__v16sf) __Y, _CMP_EQ_OQ,
14293                                                    (__mmask16) -1,
14294                                                    _MM_FROUND_CUR_DIRECTION);
14295 }
14296
14297 extern __inline __mmask16
14298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14299 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14300 {
14301    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14302                                                    (__v16sf) __Y, _CMP_EQ_OQ,
14303                                                    (__mmask16) __U,
14304                                                    _MM_FROUND_CUR_DIRECTION);
14305 }
14306
14307 extern __inline __mmask16
14308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14309 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
14310 {
14311   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14312                                                    (__v16sf) __Y, _CMP_LT_OS,
14313                                                    (__mmask16) -1,
14314                                                    _MM_FROUND_CUR_DIRECTION);
14315 }
14316
14317 extern __inline __mmask16
14318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14319 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14320 {
14321    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14322                                                    (__v16sf) __Y, _CMP_LT_OS,
14323                                                    (__mmask16) __U,
14324                                                    _MM_FROUND_CUR_DIRECTION);
14325 }
14326
14327 extern __inline __mmask16
14328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14329 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
14330 {
14331   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14332                                                    (__v16sf) __Y, _CMP_LE_OS,
14333                                                    (__mmask16) -1,
14334                                                    _MM_FROUND_CUR_DIRECTION);
14335 }
14336
14337 extern __inline __mmask16
14338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14339 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14340 {
14341    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14342                                                    (__v16sf) __Y, _CMP_LE_OS,
14343                                                    (__mmask16) __U,
14344                                                    _MM_FROUND_CUR_DIRECTION);
14345 }
14346
14347 extern __inline __mmask16
14348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14349 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
14350 {
14351   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14352                                                    (__v16sf) __Y, _CMP_UNORD_Q,
14353                                                    (__mmask16) -1,
14354                                                    _MM_FROUND_CUR_DIRECTION);
14355 }
14356
14357 extern __inline __mmask16
14358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14359 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14360 {
14361    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14362                                                    (__v16sf) __Y, _CMP_UNORD_Q,
14363                                                    (__mmask16) __U,
14364                                                    _MM_FROUND_CUR_DIRECTION);
14365 }
14366
14367 extern __inline __mmask16
14368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14369 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
14370 {
14371   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14372                                                    (__v16sf) __Y, _CMP_NEQ_UQ,
14373                                                    (__mmask16) -1,
14374                                                    _MM_FROUND_CUR_DIRECTION);
14375 }
14376
14377 extern __inline __mmask16
14378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14379 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14380 {
14381    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14382                                                    (__v16sf) __Y, _CMP_NEQ_UQ,
14383                                                    (__mmask16) __U,
14384                                                    _MM_FROUND_CUR_DIRECTION);
14385 }
14386
14387 extern __inline __mmask16
14388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14389 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
14390 {
14391   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14392                                                    (__v16sf) __Y, _CMP_NLT_US,
14393                                                    (__mmask16) -1,
14394                                                    _MM_FROUND_CUR_DIRECTION);
14395 }
14396
14397 extern __inline __mmask16
14398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14399 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14400 {
14401    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14402                                                    (__v16sf) __Y, _CMP_NLT_US,
14403                                                    (__mmask16) __U,
14404                                                    _MM_FROUND_CUR_DIRECTION);
14405 }
14406
14407 extern __inline __mmask16
14408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14409 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
14410 {
14411   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14412                                                    (__v16sf) __Y, _CMP_NLE_US,
14413                                                    (__mmask16) -1,
14414                                                    _MM_FROUND_CUR_DIRECTION);
14415 }
14416
14417 extern __inline __mmask16
14418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14419 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14420 {
14421    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14422                                                    (__v16sf) __Y, _CMP_NLE_US,
14423                                                    (__mmask16) __U,
14424                                                    _MM_FROUND_CUR_DIRECTION);
14425 }
14426
14427 extern __inline __mmask16
14428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14429 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
14430 {
14431   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14432                                                    (__v16sf) __Y, _CMP_ORD_Q,
14433                                                    (__mmask16) -1,
14434                                                    _MM_FROUND_CUR_DIRECTION);
14435 }
14436
14437 extern __inline __mmask16
14438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14439 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14440 {
14441    return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14442                                                    (__v16sf) __Y, _CMP_ORD_Q,
14443                                                    (__mmask16) __U,
14444                                                    _MM_FROUND_CUR_DIRECTION);
14445 }
14446
14447 extern __inline __mmask8
14448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14449 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
14450 {
14451   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14452                                                (__v2df) __Y, __P,
14453                                                (__mmask8) -1,
14454                                                _MM_FROUND_CUR_DIRECTION);
14455 }
14456
14457 extern __inline __mmask8
14458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14459 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
14460 {
14461   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14462                                                (__v2df) __Y, __P,
14463                                                (__mmask8) __M,
14464                                                _MM_FROUND_CUR_DIRECTION);
14465 }
14466
14467 extern __inline __mmask8
14468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14469 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
14470 {
14471   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14472                                                (__v4sf) __Y, __P,
14473                                                (__mmask8) -1,
14474                                                _MM_FROUND_CUR_DIRECTION);
14475 }
14476
14477 extern __inline __mmask8
14478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14479 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
14480 {
14481   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14482                                                (__v4sf) __Y, __P,
14483                                                (__mmask8) __M,
14484                                                _MM_FROUND_CUR_DIRECTION);
14485 }
14486
14487 #else
14488 #define _mm512_cmp_pd_mask(X, Y, P)                                     \
14489   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
14490                                             (__v8df)(__m512d)(Y), (int)(P),\
14491                                             (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14492
14493 #define _mm512_cmp_ps_mask(X, Y, P)                                     \
14494   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
14495                                              (__v16sf)(__m512)(Y), (int)(P),\
14496                                              (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
14497
14498 #define _mm512_mask_cmp_pd_mask(M, X, Y, P)                                     \
14499   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
14500                                             (__v8df)(__m512d)(Y), (int)(P),\
14501                                             (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
14502
14503 #define _mm512_mask_cmp_ps_mask(M, X, Y, P)                                     \
14504   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
14505                                              (__v16sf)(__m512)(Y), (int)(P),\
14506                                              (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
14507
14508 #define _mm_cmp_sd_mask(X, Y, P)                                        \
14509   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
14510                                          (__v2df)(__m128d)(Y), (int)(P),\
14511                                          (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14512
14513 #define _mm_mask_cmp_sd_mask(M, X, Y, P)                                        \
14514   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
14515                                          (__v2df)(__m128d)(Y), (int)(P),\
14516                                          M,_MM_FROUND_CUR_DIRECTION))
14517
14518 #define _mm_cmp_ss_mask(X, Y, P)                                        \
14519   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
14520                                          (__v4sf)(__m128)(Y), (int)(P), \
14521                                          (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14522
14523 #define _mm_mask_cmp_ss_mask(M, X, Y, P)                                        \
14524   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
14525                                          (__v4sf)(__m128)(Y), (int)(P), \
14526                                          M,_MM_FROUND_CUR_DIRECTION))
14527 #endif
14528
14529 extern __inline __mmask16
14530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14531 _mm512_kmov (__mmask16 __A)
14532 {
14533   return __builtin_ia32_kmovw (__A);
14534 }
14535
14536 extern __inline __m512
14537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14538 _mm512_castpd_ps (__m512d __A)
14539 {
14540   return (__m512) (__A);
14541 }
14542
14543 extern __inline __m512i
14544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14545 _mm512_castpd_si512 (__m512d __A)
14546 {
14547   return (__m512i) (__A);
14548 }
14549
14550 extern __inline __m512d
14551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14552 _mm512_castps_pd (__m512 __A)
14553 {
14554   return (__m512d) (__A);
14555 }
14556
14557 extern __inline __m512i
14558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14559 _mm512_castps_si512 (__m512 __A)
14560 {
14561   return (__m512i) (__A);
14562 }
14563
14564 extern __inline __m512
14565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14566 _mm512_castsi512_ps (__m512i __A)
14567 {
14568   return (__m512) (__A);
14569 }
14570
14571 extern __inline __m512d
14572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14573 _mm512_castsi512_pd (__m512i __A)
14574 {
14575   return (__m512d) (__A);
14576 }
14577
14578 extern __inline __m128d
14579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14580 _mm512_castpd512_pd128 (__m512d __A)
14581 {
14582   return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
14583 }
14584
14585 extern __inline __m128
14586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14587 _mm512_castps512_ps128 (__m512 __A)
14588 {
14589   return _mm512_extractf32x4_ps(__A, 0);
14590 }
14591
14592 extern __inline __m128i
14593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14594 _mm512_castsi512_si128 (__m512i __A)
14595 {
14596   return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
14597 }
14598
14599 extern __inline __m256d
14600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14601 _mm512_castpd512_pd256 (__m512d __A)
14602 {
14603   return _mm512_extractf64x4_pd(__A, 0);
14604 }
14605
14606 extern __inline __m256
14607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14608 _mm512_castps512_ps256 (__m512 __A)
14609 {
14610   return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
14611 }
14612
14613 extern __inline __m256i
14614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14615 _mm512_castsi512_si256 (__m512i __A)
14616 {
14617   return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
14618 }
14619
14620 extern __inline __m512d
14621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14622 _mm512_castpd128_pd512 (__m128d __A)
14623 {
14624   return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
14625 }
14626
14627 extern __inline __m512
14628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14629 _mm512_castps128_ps512 (__m128 __A)
14630 {
14631   return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
14632 }
14633
14634 extern __inline __m512i
14635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14636 _mm512_castsi128_si512 (__m128i __A)
14637 {
14638   return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
14639 }
14640
14641 extern __inline __m512d
14642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14643 _mm512_castpd256_pd512 (__m256d __A)
14644 {
14645   return __builtin_ia32_pd512_256pd (__A);
14646 }
14647
14648 extern __inline __m512
14649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14650 _mm512_castps256_ps512 (__m256 __A)
14651 {
14652   return __builtin_ia32_ps512_256ps (__A);
14653 }
14654
14655 extern __inline __m512i
14656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14657 _mm512_castsi256_si512 (__m256i __A)
14658 {
14659   return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
14660 }
14661
14662 extern __inline __mmask16
14663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14664 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
14665 {
14666   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14667                                                      (__v16si) __B, 0,
14668                                                      (__mmask16) -1);
14669 }
14670
14671 extern __inline __mmask16
14672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14673 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14674 {
14675   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14676                                                      (__v16si) __B, 0, __U);
14677 }
14678
14679 extern __inline __mmask8
14680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14681 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14682 {
14683   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14684                                                     (__v8di) __B, 0, __U);
14685 }
14686
14687 extern __inline __mmask8
14688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14689 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
14690 {
14691   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14692                                                     (__v8di) __B, 0,
14693                                                     (__mmask8) -1);
14694 }
14695
14696 extern __inline __mmask16
14697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14698 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
14699 {
14700   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14701                                                      (__v16si) __B, 6,
14702                                                      (__mmask16) -1);
14703 }
14704
14705 extern __inline __mmask16
14706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14707 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14708 {
14709   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14710                                                      (__v16si) __B, 6,  __U);
14711 }
14712
14713 extern __inline __mmask8
14714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14715 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14716 {
14717   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14718                                                     (__v8di) __B, 6, __U);
14719 }
14720
14721 extern __inline __mmask8
14722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14723 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
14724 {
14725   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14726                                                     (__v8di) __B, 6,
14727                                                     (__mmask8) -1);
14728 }
14729
14730 #undef __MM512_REDUCE_OP
14731 #define __MM512_REDUCE_OP(op) \
14732   __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1);            \
14733   __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0);            \
14734   __m256i __T3 = (__m256i) (__T1 op __T2);                              \
14735   __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1);            \
14736   __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0);            \
14737   __v4si __T6 = __T4 op __T5;                                           \
14738   __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 });      \
14739   __v4si __T8 = __T6 op __T7;                                           \
14740   return __T8[0] op __T8[1]
14741
14742 extern __inline int
14743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14744 _mm512_reduce_add_epi32 (__m512i __A)
14745 {
14746   __MM512_REDUCE_OP (+);
14747 }
14748
14749 extern __inline int
14750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14751 _mm512_reduce_mul_epi32 (__m512i __A)
14752 {
14753   __MM512_REDUCE_OP (*);
14754 }
14755
14756 extern __inline int
14757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14758 _mm512_reduce_and_epi32 (__m512i __A)
14759 {
14760   __MM512_REDUCE_OP (&);
14761 }
14762
14763 extern __inline int
14764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14765 _mm512_reduce_or_epi32 (__m512i __A)
14766 {
14767   __MM512_REDUCE_OP (|);
14768 }
14769
14770 extern __inline int
14771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14772 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14773 {
14774   __A = _mm512_maskz_mov_epi32 (__U, __A);
14775   __MM512_REDUCE_OP (+);
14776 }
14777
14778 extern __inline int
14779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14780 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14781 {
14782   __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14783   __MM512_REDUCE_OP (*);
14784 }
14785
14786 extern __inline int
14787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14788 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14789 {
14790   __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14791   __MM512_REDUCE_OP (&);
14792 }
14793
14794 extern __inline int
14795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14796 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14797 {
14798   __A = _mm512_maskz_mov_epi32 (__U, __A);
14799   __MM512_REDUCE_OP (|);
14800 }
14801
14802 #undef __MM512_REDUCE_OP
14803 #define __MM512_REDUCE_OP(op) \
14804   __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1);          \
14805   __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0);          \
14806   __m256i __T3 = _mm256_##op (__T1, __T2);                              \
14807   __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1);          \
14808   __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0);          \
14809   __m128i __T6 = _mm_##op (__T4, __T5);                                 \
14810   __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6,            \
14811                                               (__v4si) { 2, 3, 0, 1 }); \
14812   __m128i __T8 = _mm_##op (__T6, __T7);                                 \
14813   __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8,            \
14814                                               (__v4si) { 1, 0, 1, 0 }); \
14815   __v4si __T10 = (__v4si) _mm_##op (__T8, __T9);                        \
14816   return __T10[0]
14817
14818 extern __inline int
14819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14820 _mm512_reduce_min_epi32 (__m512i __A)
14821 {
14822   __MM512_REDUCE_OP (min_epi32);
14823 }
14824
14825 extern __inline int
14826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14827 _mm512_reduce_max_epi32 (__m512i __A)
14828 {
14829   __MM512_REDUCE_OP (max_epi32);
14830 }
14831
14832 extern __inline unsigned int
14833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14834 _mm512_reduce_min_epu32 (__m512i __A)
14835 {
14836   __MM512_REDUCE_OP (min_epu32);
14837 }
14838
14839 extern __inline unsigned int
14840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14841 _mm512_reduce_max_epu32 (__m512i __A)
14842 {
14843   __MM512_REDUCE_OP (max_epu32);
14844 }
14845
14846 extern __inline int
14847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14848 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14849 {
14850   __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14851   __MM512_REDUCE_OP (min_epi32);
14852 }
14853
14854 extern __inline int
14855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14856 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14857 {
14858   __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14859   __MM512_REDUCE_OP (max_epi32);
14860 }
14861
14862 extern __inline unsigned int
14863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14864 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14865 {
14866   __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14867   __MM512_REDUCE_OP (min_epu32);
14868 }
14869
14870 extern __inline unsigned int
14871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14872 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14873 {
14874   __A = _mm512_maskz_mov_epi32 (__U, __A);
14875   __MM512_REDUCE_OP (max_epu32);
14876 }
14877
14878 #undef __MM512_REDUCE_OP
14879 #define __MM512_REDUCE_OP(op) \
14880   __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1);     \
14881   __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0);     \
14882   __m256 __T3 = __T1 op __T2;                                           \
14883   __m128 __T4 = _mm256_extractf128_ps (__T3, 1);                        \
14884   __m128 __T5 = _mm256_extractf128_ps (__T3, 0);                        \
14885   __m128 __T6 = __T4 op __T5;                                           \
14886   __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 });      \
14887   __m128 __T8 = __T6 op __T7;                                           \
14888   return __T8[0] op __T8[1]
14889
14890 extern __inline float
14891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14892 _mm512_reduce_add_ps (__m512 __A)
14893 {
14894   __MM512_REDUCE_OP (+);
14895 }
14896
14897 extern __inline float
14898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14899 _mm512_reduce_mul_ps (__m512 __A)
14900 {
14901   __MM512_REDUCE_OP (*);
14902 }
14903
14904 extern __inline float
14905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14906 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14907 {
14908   __A = _mm512_maskz_mov_ps (__U, __A);
14909   __MM512_REDUCE_OP (+);
14910 }
14911
14912 extern __inline float
14913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14914 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14915 {
14916   __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14917   __MM512_REDUCE_OP (*);
14918 }
14919
14920 #undef __MM512_REDUCE_OP
14921 #define __MM512_REDUCE_OP(op) \
14922   __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1);     \
14923   __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0);     \
14924   __m256 __T3 = _mm256_##op (__T1, __T2);                               \
14925   __m128 __T4 = _mm256_extractf128_ps (__T3, 1);                        \
14926   __m128 __T5 = _mm256_extractf128_ps (__T3, 0);                        \
14927   __m128 __T6 = _mm_##op (__T4, __T5);                                  \
14928   __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 });      \
14929   __m128 __T8 = _mm_##op (__T6, __T7);                                  \
14930   __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 });      \
14931   __m128 __T10 = _mm_##op (__T8, __T9);                                 \
14932   return __T10[0]
14933
14934 extern __inline float
14935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14936 _mm512_reduce_min_ps (__m512 __A)
14937 {
14938   __MM512_REDUCE_OP (min_ps);
14939 }
14940
14941 extern __inline float
14942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14943 _mm512_reduce_max_ps (__m512 __A)
14944 {
14945   __MM512_REDUCE_OP (max_ps);
14946 }
14947
14948 extern __inline float
14949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14950 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
14951 {
14952   __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
14953   __MM512_REDUCE_OP (min_ps);
14954 }
14955
14956 extern __inline float
14957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14958 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
14959 {
14960   __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
14961   __MM512_REDUCE_OP (max_ps);
14962 }
14963
14964 #undef __MM512_REDUCE_OP
14965 #define __MM512_REDUCE_OP(op) \
14966   __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1);            \
14967   __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0);            \
14968   __m256i __T3 = (__m256i) (__T1 op __T2);                              \
14969   __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1);            \
14970   __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0);            \
14971   __v2di __T6 = __T4 op __T5;                                           \
14972   return __T6[0] op __T6[1]
14973
14974 extern __inline long long
14975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14976 _mm512_reduce_add_epi64 (__m512i __A)
14977 {
14978   __MM512_REDUCE_OP (+);
14979 }
14980
14981 extern __inline long long
14982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14983 _mm512_reduce_mul_epi64 (__m512i __A)
14984 {
14985   __MM512_REDUCE_OP (*);
14986 }
14987
14988 extern __inline long long
14989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14990 _mm512_reduce_and_epi64 (__m512i __A)
14991 {
14992   __MM512_REDUCE_OP (&);
14993 }
14994
14995 extern __inline long long
14996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14997 _mm512_reduce_or_epi64 (__m512i __A)
14998 {
14999   __MM512_REDUCE_OP (|);
15000 }
15001
15002 extern __inline long long
15003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15004 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
15005 {
15006   __A = _mm512_maskz_mov_epi64 (__U, __A);
15007   __MM512_REDUCE_OP (+);
15008 }
15009
15010 extern __inline long long
15011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15012 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
15013 {
15014   __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
15015   __MM512_REDUCE_OP (*);
15016 }
15017
15018 extern __inline long long
15019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15020 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
15021 {
15022   __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15023   __MM512_REDUCE_OP (&);
15024 }
15025
15026 extern __inline long long
15027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15028 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
15029 {
15030   __A = _mm512_maskz_mov_epi64 (__U, __A);
15031   __MM512_REDUCE_OP (|);
15032 }
15033
15034 #undef __MM512_REDUCE_OP
15035 #define __MM512_REDUCE_OP(op) \
15036   __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e);                 \
15037   __m512i __T2 = _mm512_##op (__A, __T1);                               \
15038   __m512i __T3                                                          \
15039     = (__m512i) __builtin_shuffle ((__v8di) __T2,                       \
15040                                    (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
15041   __m512i __T4 = _mm512_##op (__T2, __T3);                              \
15042   __m512i __T5                                                          \
15043     = (__m512i) __builtin_shuffle ((__v8di) __T4,                       \
15044                                    (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
15045   __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5);                      \
15046   return __T6[0]
15047
15048 extern __inline long long
15049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15050 _mm512_reduce_min_epi64 (__m512i __A)
15051 {
15052   __MM512_REDUCE_OP (min_epi64);
15053 }
15054
15055 extern __inline long long
15056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15057 _mm512_reduce_max_epi64 (__m512i __A)
15058 {
15059   __MM512_REDUCE_OP (max_epi64);
15060 }
15061
15062 extern __inline long long
15063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15064 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
15065 {
15066   __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
15067                                __U, __A);
15068   __MM512_REDUCE_OP (min_epi64);
15069 }
15070
15071 extern __inline long long
15072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15073 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
15074 {
15075   __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
15076                                __U, __A);
15077   __MM512_REDUCE_OP (max_epi64);
15078 }
15079
15080 extern __inline unsigned long long
15081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15082 _mm512_reduce_min_epu64 (__m512i __A)
15083 {
15084   __MM512_REDUCE_OP (min_epu64);
15085 }
15086
15087 extern __inline unsigned long long
15088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15089 _mm512_reduce_max_epu64 (__m512i __A)
15090 {
15091   __MM512_REDUCE_OP (max_epu64);
15092 }
15093
15094 extern __inline unsigned long long
15095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15096 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
15097 {
15098   __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15099   __MM512_REDUCE_OP (min_epu64);
15100 }
15101
15102 extern __inline unsigned long long
15103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15104 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
15105 {
15106   __A = _mm512_maskz_mov_epi64 (__U, __A);
15107   __MM512_REDUCE_OP (max_epu64);
15108 }
15109
15110 #undef __MM512_REDUCE_OP
15111 #define __MM512_REDUCE_OP(op) \
15112   __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1);             \
15113   __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0);             \
15114   __m256d __T3 = __T1 op __T2;                                          \
15115   __m128d __T4 = _mm256_extractf128_pd (__T3, 1);                       \
15116   __m128d __T5 = _mm256_extractf128_pd (__T3, 0);                       \
15117   __m128d __T6 = __T4 op __T5;                                          \
15118   return __T6[0] op __T6[1]
15119
15120 extern __inline double
15121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15122 _mm512_reduce_add_pd (__m512d __A)
15123 {
15124   __MM512_REDUCE_OP (+);
15125 }
15126
15127 extern __inline double
15128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15129 _mm512_reduce_mul_pd (__m512d __A)
15130 {
15131   __MM512_REDUCE_OP (*);
15132 }
15133
15134 extern __inline double
15135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15136 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
15137 {
15138   __A = _mm512_maskz_mov_pd (__U, __A);
15139   __MM512_REDUCE_OP (+);
15140 }
15141
15142 extern __inline double
15143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15144 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
15145 {
15146   __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
15147   __MM512_REDUCE_OP (*);
15148 }
15149
15150 #undef __MM512_REDUCE_OP
15151 #define __MM512_REDUCE_OP(op) \
15152   __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1);             \
15153   __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0);             \
15154   __m256d __T3 = _mm256_##op (__T1, __T2);                              \
15155   __m128d __T4 = _mm256_extractf128_pd (__T3, 1);                       \
15156   __m128d __T5 = _mm256_extractf128_pd (__T3, 0);                       \
15157   __m128d __T6 = _mm_##op (__T4, __T5);                                 \
15158   __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
15159   __m128d __T8 = _mm_##op (__T6, __T7);                                 \
15160   return __T8[0]
15161
15162 extern __inline double
15163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15164 _mm512_reduce_min_pd (__m512d __A)
15165 {
15166   __MM512_REDUCE_OP (min_pd);
15167 }
15168
15169 extern __inline double
15170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15171 _mm512_reduce_max_pd (__m512d __A)
15172 {
15173   __MM512_REDUCE_OP (max_pd);
15174 }
15175
15176 extern __inline double
15177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15178 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
15179 {
15180   __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
15181   __MM512_REDUCE_OP (min_pd);
15182 }
15183
15184 extern __inline double
15185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15186 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
15187 {
15188   __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
15189   __MM512_REDUCE_OP (max_pd);
15190 }
15191
15192 #undef __MM512_REDUCE_OP
15193
15194 #ifdef __DISABLE_AVX512F__
15195 #undef __DISABLE_AVX512F__
15196 #pragma GCC pop_options
15197 #endif /* __DISABLE_AVX512F__ */
15198
15199 #endif /* _AVX512FINTRIN_H_INCLUDED */