Import GCC-8 to a new vendor branch
[dragonfly.git] / contrib / gcc-8.0 / gcc / config / i386 / avx512dqintrin.h
1 /* Copyright (C) 2014-2018 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512DQINTRIN_H_INCLUDED
29 #define _AVX512DQINTRIN_H_INCLUDED
30
31 #ifndef __AVX512DQ__
32 #pragma GCC push_options
33 #pragma GCC target("avx512dq")
34 #define __DISABLE_AVX512DQ__
35 #endif /* __AVX512DQ__ */
36
37 extern __inline unsigned char
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 _ktest_mask8_u8  (__mmask8 __A,  __mmask8 __B, unsigned char *__CF)
40 {
41   *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
42   return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
43 }
44
45 extern __inline unsigned char
46 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
47 _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
48 {
49   return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
50 }
51
52 extern __inline unsigned char
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
55 {
56   return (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
57 }
58
59 extern __inline unsigned char
60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 _ktest_mask16_u8  (__mmask16 __A,  __mmask16 __B, unsigned char *__CF)
62 {
63   *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B);
64   return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
65 }
66
67 extern __inline unsigned char
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
70 {
71   return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
72 }
73
74 extern __inline unsigned char
75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76 _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
77 {
78   return (unsigned char) __builtin_ia32_ktestchi (__A, __B);
79 }
80
81 extern __inline unsigned char
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _kortest_mask8_u8  (__mmask8 __A,  __mmask8 __B, unsigned char *__CF)
84 {
85   *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
86   return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
87 }
88
89 extern __inline unsigned char
90 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91 _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
92 {
93   return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
94 }
95
96 extern __inline unsigned char
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
99 {
100   return (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
101 }
102
103 extern __inline __mmask8
104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105 _kadd_mask8 (__mmask8 __A, __mmask8 __B)
106 {
107   return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
108 }
109
110 extern __inline __mmask16
111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112 _kadd_mask16 (__mmask16 __A, __mmask16 __B)
113 {
114   return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
115 }
116
117 extern __inline unsigned int
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 _cvtmask8_u32 (__mmask8 __A)
120 {
121   return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
122 }
123         
124 extern __inline __mmask8
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _cvtu32_mask8 (unsigned int __A)
127 {
128   return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A);
129 }
130
131 extern __inline __mmask8
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _load_mask8 (__mmask8 *__A)
134 {
135   return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A);
136 }
137
138 extern __inline void
139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140 _store_mask8 (__mmask8 *__A, __mmask8 __B)
141 {
142   *(__mmask8 *) __A = __builtin_ia32_kmovb (__B);
143 }
144
145 extern __inline __mmask8
146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147 _knot_mask8 (__mmask8 __A)
148 {
149   return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
150 }
151
152 extern __inline __mmask8
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _kor_mask8 (__mmask8 __A, __mmask8 __B)
155 {
156   return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
157 }
158
159 extern __inline __mmask8
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _kxnor_mask8 (__mmask8 __A, __mmask8 __B)
162 {
163   return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
164 }
165
166 extern __inline __mmask8
167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
168 _kxor_mask8 (__mmask8 __A, __mmask8 __B)
169 {
170   return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
171 }
172
173 extern __inline __mmask8
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 _kand_mask8 (__mmask8 __A, __mmask8 __B)
176 {
177   return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
178 }
179
180 extern __inline __mmask8
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _kandn_mask8 (__mmask8 __A, __mmask8 __B)
183 {
184   return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
185 }
186
187 extern __inline __m512d
188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
189 _mm512_broadcast_f64x2 (__m128d __A)
190 {
191   return (__m512d)
192          __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
193                                                  _mm512_undefined_pd (),
194                                                  (__mmask8) -1);
195 }
196
197 extern __inline __m512d
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
200 {
201   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
202                                                            __A,
203                                                            (__v8df)
204                                                            __O, __M);
205 }
206
207 extern __inline __m512d
208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
209 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
210 {
211   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
212                                                            __A,
213                                                            (__v8df)
214                                                            _mm512_setzero_ps (),
215                                                            __M);
216 }
217
218 extern __inline __m512i
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm512_broadcast_i64x2 (__m128i __A)
221 {
222   return (__m512i)
223          __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
224                                                  _mm512_undefined_epi32 (),
225                                                  (__mmask8) -1);
226 }
227
228 extern __inline __m512i
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
231 {
232   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
233                                                            __A,
234                                                            (__v8di)
235                                                            __O, __M);
236 }
237
238 extern __inline __m512i
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
241 {
242   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
243                                                            __A,
244                                                            (__v8di)
245                                                            _mm512_setzero_si512 (),
246                                                            __M);
247 }
248
249 extern __inline __m512
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251 _mm512_broadcast_f32x2 (__m128 __A)
252 {
253   return (__m512)
254          __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
255                                                  (__v16sf)_mm512_undefined_ps (),
256                                                  (__mmask16) -1);
257 }
258
259 extern __inline __m512
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
262 {
263   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
264                                                           (__v16sf)
265                                                           __O, __M);
266 }
267
268 extern __inline __m512
269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
271 {
272   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
273                                                           (__v16sf)
274                                                           _mm512_setzero_ps (),
275                                                           __M);
276 }
277
278 extern __inline __m512i
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 _mm512_broadcast_i32x2 (__m128i __A)
281 {
282   return (__m512i)
283          __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
284                                                  (__v16si)
285                                                  _mm512_undefined_epi32 (),
286                                                  (__mmask16) -1);
287 }
288
289 extern __inline __m512i
290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
292 {
293   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
294                                                            __A,
295                                                            (__v16si)
296                                                            __O, __M);
297 }
298
299 extern __inline __m512i
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
302 {
303   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
304                                                            __A,
305                                                            (__v16si)
306                                                            _mm512_setzero_si512 (),
307                                                            __M);
308 }
309
310 extern __inline __m512
311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312 _mm512_broadcast_f32x8 (__m256 __A)
313 {
314   return (__m512)
315          __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
316                                                  _mm512_undefined_ps (),
317                                                  (__mmask16) -1);
318 }
319
320 extern __inline __m512
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
323 {
324   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
325                                                           (__v16sf)__O,
326                                                           __M);
327 }
328
329 extern __inline __m512
330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
331 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
332 {
333   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
334                                                           (__v16sf)
335                                                           _mm512_setzero_ps (),
336                                                           __M);
337 }
338
339 extern __inline __m512i
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341 _mm512_broadcast_i32x8 (__m256i __A)
342 {
343   return (__m512i)
344          __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
345                                                  (__v16si)
346                                                  _mm512_undefined_epi32 (),
347                                                  (__mmask16) -1);
348 }
349
350 extern __inline __m512i
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
353 {
354   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
355                                                            __A,
356                                                            (__v16si)__O,
357                                                            __M);
358 }
359
360 extern __inline __m512i
361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
363 {
364   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
365                                                            __A,
366                                                            (__v16si)
367                                                            _mm512_setzero_si512 (),
368                                                            __M);
369 }
370
371 extern __inline __m512i
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 _mm512_mullo_epi64 (__m512i __A, __m512i __B)
374 {
375   return (__m512i) ((__v8du) __A * (__v8du) __B);
376 }
377
378 extern __inline __m512i
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
381                          __m512i __B)
382 {
383   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
384                                                   (__v8di) __B,
385                                                   (__v8di) __W,
386                                                   (__mmask8) __U);
387 }
388
389 extern __inline __m512i
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
392 {
393   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
394                                                   (__v8di) __B,
395                                                   (__v8di)
396                                                   _mm512_setzero_si512 (),
397                                                   (__mmask8) __U);
398 }
399
400 extern __inline __m512d
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm512_xor_pd (__m512d __A, __m512d __B)
403 {
404   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
405                                                  (__v8df) __B,
406                                                  (__v8df)
407                                                  _mm512_setzero_pd (),
408                                                  (__mmask8) -1);
409 }
410
411 extern __inline __m512d
412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
414                     __m512d __B)
415 {
416   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
417                                                  (__v8df) __B,
418                                                  (__v8df) __W,
419                                                  (__mmask8) __U);
420 }
421
422 extern __inline __m512d
423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
424 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
425 {
426   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
427                                                  (__v8df) __B,
428                                                  (__v8df)
429                                                  _mm512_setzero_pd (),
430                                                  (__mmask8) __U);
431 }
432
433 extern __inline __m512
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_xor_ps (__m512 __A, __m512 __B)
436 {
437   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
438                                                 (__v16sf) __B,
439                                                 (__v16sf)
440                                                 _mm512_setzero_ps (),
441                                                 (__mmask16) -1);
442 }
443
444 extern __inline __m512
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
447 {
448   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
449                                                 (__v16sf) __B,
450                                                 (__v16sf) __W,
451                                                 (__mmask16) __U);
452 }
453
454 extern __inline __m512
455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
456 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
457 {
458   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
459                                                 (__v16sf) __B,
460                                                 (__v16sf)
461                                                 _mm512_setzero_ps (),
462                                                 (__mmask16) __U);
463 }
464
465 extern __inline __m512d
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm512_or_pd (__m512d __A, __m512d __B)
468 {
469   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
470                                                 (__v8df) __B,
471                                                 (__v8df)
472                                                 _mm512_setzero_pd (),
473                                                 (__mmask8) -1);
474 }
475
476 extern __inline __m512d
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
479 {
480   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
481                                                 (__v8df) __B,
482                                                 (__v8df) __W,
483                                                 (__mmask8) __U);
484 }
485
486 extern __inline __m512d
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
489 {
490   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
491                                                 (__v8df) __B,
492                                                 (__v8df)
493                                                 _mm512_setzero_pd (),
494                                                 (__mmask8) __U);
495 }
496
497 extern __inline __m512
498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
499 _mm512_or_ps (__m512 __A, __m512 __B)
500 {
501   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
502                                                (__v16sf) __B,
503                                                (__v16sf)
504                                                _mm512_setzero_ps (),
505                                                (__mmask16) -1);
506 }
507
508 extern __inline __m512
509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
511 {
512   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
513                                                (__v16sf) __B,
514                                                (__v16sf) __W,
515                                                (__mmask16) __U);
516 }
517
518 extern __inline __m512
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
521 {
522   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
523                                                (__v16sf) __B,
524                                                (__v16sf)
525                                                _mm512_setzero_ps (),
526                                                (__mmask16) __U);
527 }
528
529 extern __inline __m512d
530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531 _mm512_and_pd (__m512d __A, __m512d __B)
532 {
533   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
534                                                  (__v8df) __B,
535                                                  (__v8df)
536                                                  _mm512_setzero_pd (),
537                                                  (__mmask8) -1);
538 }
539
540 extern __inline __m512d
541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
542 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
543                     __m512d __B)
544 {
545   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
546                                                  (__v8df) __B,
547                                                  (__v8df) __W,
548                                                  (__mmask8) __U);
549 }
550
551 extern __inline __m512d
552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
554 {
555   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
556                                                  (__v8df) __B,
557                                                  (__v8df)
558                                                  _mm512_setzero_pd (),
559                                                  (__mmask8) __U);
560 }
561
562 extern __inline __m512
563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564 _mm512_and_ps (__m512 __A, __m512 __B)
565 {
566   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
567                                                 (__v16sf) __B,
568                                                 (__v16sf)
569                                                 _mm512_setzero_ps (),
570                                                 (__mmask16) -1);
571 }
572
573 extern __inline __m512
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
576 {
577   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
578                                                 (__v16sf) __B,
579                                                 (__v16sf) __W,
580                                                 (__mmask16) __U);
581 }
582
583 extern __inline __m512
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
586 {
587   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
588                                                 (__v16sf) __B,
589                                                 (__v16sf)
590                                                 _mm512_setzero_ps (),
591                                                 (__mmask16) __U);
592 }
593
594 extern __inline __m512d
595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596 _mm512_andnot_pd (__m512d __A, __m512d __B)
597 {
598   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
599                                                   (__v8df) __B,
600                                                   (__v8df)
601                                                   _mm512_setzero_pd (),
602                                                   (__mmask8) -1);
603 }
604
605 extern __inline __m512d
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
608                        __m512d __B)
609 {
610   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
611                                                   (__v8df) __B,
612                                                   (__v8df) __W,
613                                                   (__mmask8) __U);
614 }
615
616 extern __inline __m512d
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
619 {
620   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
621                                                   (__v8df) __B,
622                                                   (__v8df)
623                                                   _mm512_setzero_pd (),
624                                                   (__mmask8) __U);
625 }
626
627 extern __inline __m512
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm512_andnot_ps (__m512 __A, __m512 __B)
630 {
631   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
632                                                  (__v16sf) __B,
633                                                  (__v16sf)
634                                                  _mm512_setzero_ps (),
635                                                  (__mmask16) -1);
636 }
637
638 extern __inline __m512
639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
641                        __m512 __B)
642 {
643   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
644                                                  (__v16sf) __B,
645                                                  (__v16sf) __W,
646                                                  (__mmask16) __U);
647 }
648
649 extern __inline __m512
650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
652 {
653   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
654                                                  (__v16sf) __B,
655                                                  (__v16sf)
656                                                  _mm512_setzero_ps (),
657                                                  (__mmask16) __U);
658 }
659
660 extern __inline __mmask16
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_movepi32_mask (__m512i __A)
663 {
664   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
665 }
666
667 extern __inline __mmask8
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 _mm512_movepi64_mask (__m512i __A)
670 {
671   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
672 }
673
674 extern __inline __m512i
675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
676 _mm512_movm_epi32 (__mmask16 __A)
677 {
678   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
679 }
680
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_movm_epi64 (__mmask8 __A)
684 {
685   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
686 }
687
688 extern __inline __m512i
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 _mm512_cvttpd_epi64 (__m512d __A)
691 {
692   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
693                                                      (__v8di)
694                                                      _mm512_setzero_si512 (),
695                                                      (__mmask8) -1,
696                                                      _MM_FROUND_CUR_DIRECTION);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
702 {
703   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
704                                                      (__v8di) __W,
705                                                      (__mmask8) __U,
706                                                      _MM_FROUND_CUR_DIRECTION);
707 }
708
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
712 {
713   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
714                                                      (__v8di)
715                                                      _mm512_setzero_si512 (),
716                                                      (__mmask8) __U,
717                                                      _MM_FROUND_CUR_DIRECTION);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_cvttpd_epu64 (__m512d __A)
723 {
724   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
725                                                       (__v8di)
726                                                       _mm512_setzero_si512 (),
727                                                       (__mmask8) -1,
728                                                       _MM_FROUND_CUR_DIRECTION);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
734 {
735   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
736                                                       (__v8di) __W,
737                                                       (__mmask8) __U,
738                                                       _MM_FROUND_CUR_DIRECTION);
739 }
740
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
744 {
745   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
746                                                       (__v8di)
747                                                       _mm512_setzero_si512 (),
748                                                       (__mmask8) __U,
749                                                       _MM_FROUND_CUR_DIRECTION);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_cvttps_epi64 (__m256 __A)
755 {
756   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
757                                                      (__v8di)
758                                                      _mm512_setzero_si512 (),
759                                                      (__mmask8) -1,
760                                                      _MM_FROUND_CUR_DIRECTION);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
766 {
767   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
768                                                      (__v8di) __W,
769                                                      (__mmask8) __U,
770                                                      _MM_FROUND_CUR_DIRECTION);
771 }
772
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
776 {
777   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
778                                                      (__v8di)
779                                                      _mm512_setzero_si512 (),
780                                                      (__mmask8) __U,
781                                                      _MM_FROUND_CUR_DIRECTION);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_cvttps_epu64 (__m256 __A)
787 {
788   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
789                                                       (__v8di)
790                                                       _mm512_setzero_si512 (),
791                                                       (__mmask8) -1,
792                                                       _MM_FROUND_CUR_DIRECTION);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
798 {
799   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
800                                                       (__v8di) __W,
801                                                       (__mmask8) __U,
802                                                       _MM_FROUND_CUR_DIRECTION);
803 }
804
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
808 {
809   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
810                                                       (__v8di)
811                                                       _mm512_setzero_si512 (),
812                                                       (__mmask8) __U,
813                                                       _MM_FROUND_CUR_DIRECTION);
814 }
815
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_cvtpd_epi64 (__m512d __A)
819 {
820   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
821                                                     (__v8di)
822                                                     _mm512_setzero_si512 (),
823                                                     (__mmask8) -1,
824                                                     _MM_FROUND_CUR_DIRECTION);
825 }
826
827 extern __inline __m512i
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
830 {
831   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
832                                                     (__v8di) __W,
833                                                     (__mmask8) __U,
834                                                     _MM_FROUND_CUR_DIRECTION);
835 }
836
837 extern __inline __m512i
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
840 {
841   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
842                                                     (__v8di)
843                                                     _mm512_setzero_si512 (),
844                                                     (__mmask8) __U,
845                                                     _MM_FROUND_CUR_DIRECTION);
846 }
847
848 extern __inline __m512i
849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850 _mm512_cvtpd_epu64 (__m512d __A)
851 {
852   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
853                                                      (__v8di)
854                                                      _mm512_setzero_si512 (),
855                                                      (__mmask8) -1,
856                                                      _MM_FROUND_CUR_DIRECTION);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
862 {
863   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
864                                                      (__v8di) __W,
865                                                      (__mmask8) __U,
866                                                      _MM_FROUND_CUR_DIRECTION);
867 }
868
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
872 {
873   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
874                                                      (__v8di)
875                                                      _mm512_setzero_si512 (),
876                                                      (__mmask8) __U,
877                                                      _MM_FROUND_CUR_DIRECTION);
878 }
879
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_cvtps_epi64 (__m256 __A)
883 {
884   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
885                                                     (__v8di)
886                                                     _mm512_setzero_si512 (),
887                                                     (__mmask8) -1,
888                                                     _MM_FROUND_CUR_DIRECTION);
889 }
890
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
894 {
895   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
896                                                     (__v8di) __W,
897                                                     (__mmask8) __U,
898                                                     _MM_FROUND_CUR_DIRECTION);
899 }
900
901 extern __inline __m512i
902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
903 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
904 {
905   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
906                                                     (__v8di)
907                                                     _mm512_setzero_si512 (),
908                                                     (__mmask8) __U,
909                                                     _MM_FROUND_CUR_DIRECTION);
910 }
911
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_cvtps_epu64 (__m256 __A)
915 {
916   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
917                                                      (__v8di)
918                                                      _mm512_setzero_si512 (),
919                                                      (__mmask8) -1,
920                                                      _MM_FROUND_CUR_DIRECTION);
921 }
922
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
926 {
927   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
928                                                      (__v8di) __W,
929                                                      (__mmask8) __U,
930                                                      _MM_FROUND_CUR_DIRECTION);
931 }
932
933 extern __inline __m512i
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
936 {
937   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
938                                                      (__v8di)
939                                                      _mm512_setzero_si512 (),
940                                                      (__mmask8) __U,
941                                                      _MM_FROUND_CUR_DIRECTION);
942 }
943
944 extern __inline __m256
945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
946 _mm512_cvtepi64_ps (__m512i __A)
947 {
948   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
949                                                    (__v8sf)
950                                                    _mm256_setzero_ps (),
951                                                    (__mmask8) -1,
952                                                    _MM_FROUND_CUR_DIRECTION);
953 }
954
955 extern __inline __m256
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
958 {
959   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
960                                                    (__v8sf) __W,
961                                                    (__mmask8) __U,
962                                                    _MM_FROUND_CUR_DIRECTION);
963 }
964
965 extern __inline __m256
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
968 {
969   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
970                                                    (__v8sf)
971                                                    _mm256_setzero_ps (),
972                                                    (__mmask8) __U,
973                                                    _MM_FROUND_CUR_DIRECTION);
974 }
975
976 extern __inline __m256
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm512_cvtepu64_ps (__m512i __A)
979 {
980   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
981                                                     (__v8sf)
982                                                     _mm256_setzero_ps (),
983                                                     (__mmask8) -1,
984                                                     _MM_FROUND_CUR_DIRECTION);
985 }
986
987 extern __inline __m256
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
990 {
991   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
992                                                     (__v8sf) __W,
993                                                     (__mmask8) __U,
994                                                     _MM_FROUND_CUR_DIRECTION);
995 }
996
997 extern __inline __m256
998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
1000 {
1001   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1002                                                     (__v8sf)
1003                                                     _mm256_setzero_ps (),
1004                                                     (__mmask8) __U,
1005                                                     _MM_FROUND_CUR_DIRECTION);
1006 }
1007
1008 extern __inline __m512d
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 _mm512_cvtepi64_pd (__m512i __A)
1011 {
1012   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1013                                                     (__v8df)
1014                                                     _mm512_setzero_pd (),
1015                                                     (__mmask8) -1,
1016                                                     _MM_FROUND_CUR_DIRECTION);
1017 }
1018
1019 extern __inline __m512d
1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1022 {
1023   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1024                                                     (__v8df) __W,
1025                                                     (__mmask8) __U,
1026                                                     _MM_FROUND_CUR_DIRECTION);
1027 }
1028
1029 extern __inline __m512d
1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
1032 {
1033   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1034                                                     (__v8df)
1035                                                     _mm512_setzero_pd (),
1036                                                     (__mmask8) __U,
1037                                                     _MM_FROUND_CUR_DIRECTION);
1038 }
1039
1040 extern __inline __m512d
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm512_cvtepu64_pd (__m512i __A)
1043 {
1044   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1045                                                      (__v8df)
1046                                                      _mm512_setzero_pd (),
1047                                                      (__mmask8) -1,
1048                                                      _MM_FROUND_CUR_DIRECTION);
1049 }
1050
1051 extern __inline __m512d
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1054 {
1055   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1056                                                      (__v8df) __W,
1057                                                      (__mmask8) __U,
1058                                                      _MM_FROUND_CUR_DIRECTION);
1059 }
1060
1061 extern __inline __m512d
1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
1064 {
1065   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1066                                                      (__v8df)
1067                                                      _mm512_setzero_pd (),
1068                                                      (__mmask8) __U,
1069                                                      _MM_FROUND_CUR_DIRECTION);
1070 }
1071
1072 #ifdef __OPTIMIZE__
1073 extern __inline __mmask8
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _kshiftli_mask8 (__mmask8 __A, unsigned int __B)
1076 {
1077   return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
1078 }
1079
1080 extern __inline __mmask8
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _kshiftri_mask8 (__mmask8 __A, unsigned int __B)
1083 {
1084   return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
1085 }
1086
1087 extern __inline __m512d
1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 _mm512_range_pd (__m512d __A, __m512d __B, int __C)
1090 {
1091   return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1092                                                    (__v8df) __B, __C,
1093                                                    (__v8df)
1094                                                    _mm512_setzero_pd (),
1095                                                    (__mmask8) -1,
1096                                                    _MM_FROUND_CUR_DIRECTION);
1097 }
1098
1099 extern __inline __m512d
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 _mm512_mask_range_pd (__m512d __W, __mmask8 __U,
1102                       __m512d __A, __m512d __B, int __C)
1103 {
1104   return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1105                                                    (__v8df) __B, __C,
1106                                                    (__v8df) __W,
1107                                                    (__mmask8) __U,
1108                                                    _MM_FROUND_CUR_DIRECTION);
1109 }
1110
1111 extern __inline __m512d
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
1114 {
1115   return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1116                                                    (__v8df) __B, __C,
1117                                                    (__v8df)
1118                                                    _mm512_setzero_pd (),
1119                                                    (__mmask8) __U,
1120                                                    _MM_FROUND_CUR_DIRECTION);
1121 }
1122
1123 extern __inline __m512
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm512_range_ps (__m512 __A, __m512 __B, int __C)
1126 {
1127   return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1128                                                   (__v16sf) __B, __C,
1129                                                   (__v16sf)
1130                                                   _mm512_setzero_ps (),
1131                                                   (__mmask16) -1,
1132                                                   _MM_FROUND_CUR_DIRECTION);
1133 }
1134
1135 extern __inline __m512
1136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1137 _mm512_mask_range_ps (__m512 __W, __mmask16 __U,
1138                       __m512 __A, __m512 __B, int __C)
1139 {
1140   return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1141                                                   (__v16sf) __B, __C,
1142                                                   (__v16sf) __W,
1143                                                   (__mmask16) __U,
1144                                                   _MM_FROUND_CUR_DIRECTION);
1145 }
1146
1147 extern __inline __m512
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
1150 {
1151   return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1152                                                   (__v16sf) __B, __C,
1153                                                   (__v16sf)
1154                                                   _mm512_setzero_ps (),
1155                                                   (__mmask16) __U,
1156                                                   _MM_FROUND_CUR_DIRECTION);
1157 }
1158
1159 extern __inline __m128d
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
1162 {
1163   return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1164                                                  (__v2df) __B, __C,
1165                                                  (__v2df) _mm_setzero_pd (),
1166                                                  (__mmask8) -1);
1167 }
1168
1169 extern __inline __m128d
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm_mask_reduce_sd (__m128d __W,  __mmask8 __U, __m128d __A,
1172                     __m128d __B, int __C)
1173 {
1174   return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1175                                                  (__v2df) __B, __C,
1176                                                  (__v2df) __W,
1177                                                  (__mmask8) __U);
1178 }
1179
1180 extern __inline __m128d
1181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182 _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1183 {
1184   return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1185                                                  (__v2df) __B, __C,
1186                                                  (__v2df) _mm_setzero_pd (),
1187                                                  (__mmask8) __U);
1188 }
1189
1190 extern __inline __m128
1191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1192 _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
1193 {
1194   return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1195                                                 (__v4sf) __B, __C,
1196                                                 (__v4sf) _mm_setzero_ps (),
1197                                                 (__mmask8) -1);
1198 }
1199
1200
1201 extern __inline __m128
1202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1203 _mm_mask_reduce_ss (__m128 __W,  __mmask8 __U, __m128 __A,
1204                     __m128 __B, int __C)
1205 {
1206   return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1207                                                 (__v4sf) __B, __C,
1208                                                 (__v4sf) __W,
1209                                                 (__mmask8) __U);
1210 }
1211
1212 extern __inline __m128
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1215 {
1216   return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1217                                                 (__v4sf) __B, __C,
1218                                                 (__v4sf) _mm_setzero_ps (),
1219                                                 (__mmask8) __U);
1220 }
1221
1222 extern __inline __m128d
1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1224 _mm_range_sd (__m128d __A, __m128d __B, int __C)
1225 {
1226   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1227                                                    (__v2df) __B, __C,
1228                                                    (__v2df)
1229                                                    _mm_setzero_pd (),
1230                                                    (__mmask8) -1,
1231                                                    _MM_FROUND_CUR_DIRECTION);
1232 }
1233
1234 extern __inline __m128d
1235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236 _mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C)
1237 {
1238   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1239                                                    (__v2df) __B, __C,
1240                                                    (__v2df) __W,
1241                                                    (__mmask8) __U,
1242                                                    _MM_FROUND_CUR_DIRECTION);
1243 }
1244
1245 extern __inline __m128d
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1248 {
1249   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1250                                                    (__v2df) __B, __C,
1251                                                    (__v2df)
1252                                                    _mm_setzero_pd (),
1253                                                    (__mmask8) __U,
1254                                                    _MM_FROUND_CUR_DIRECTION);
1255 }
1256
1257 extern __inline __m128
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm_range_ss (__m128 __A, __m128 __B, int __C)
1260 {
1261   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1262                                                   (__v4sf) __B, __C,
1263                                                   (__v4sf)
1264                                                   _mm_setzero_ps (),
1265                                                   (__mmask8) -1,
1266                                                   _MM_FROUND_CUR_DIRECTION);
1267 }
1268
1269 extern __inline __m128
1270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1271 _mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C)
1272 {
1273   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1274                                                   (__v4sf) __B, __C,
1275                                                   (__v4sf) __W,
1276                                                   (__mmask8) __U,
1277                                                   _MM_FROUND_CUR_DIRECTION);
1278 }
1279
1280
1281 extern __inline __m128
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1284 {
1285   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1286                                                   (__v4sf) __B, __C,
1287                                                   (__v4sf)
1288                                                   _mm_setzero_ps (),
1289                                                   (__mmask8) __U,
1290                                                   _MM_FROUND_CUR_DIRECTION);
1291 }
1292
1293 extern __inline __m128d
1294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295 _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1296 {
1297   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1298                                                    (__v2df) __B, __C,
1299                                                    (__v2df)
1300                                                    _mm_setzero_pd (),
1301                                                    (__mmask8) -1, __R);
1302 }
1303
1304 extern __inline __m128d
1305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1306 _mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
1307                          int __C, const int __R)
1308 {
1309   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1310                                                    (__v2df) __B, __C,
1311                                                    (__v2df) __W,
1312                                                    (__mmask8) __U, __R);
1313 }
1314
1315 extern __inline __m128d
1316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1317 _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
1318                           const int __R)
1319 {
1320   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1321                                                    (__v2df) __B, __C,
1322                                                    (__v2df)
1323                                                    _mm_setzero_pd (),
1324                                                    (__mmask8) __U, __R);
1325 }
1326
1327 extern __inline __m128
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1330 {
1331   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1332                                                   (__v4sf) __B, __C,
1333                                                   (__v4sf)
1334                                                   _mm_setzero_ps (),
1335                                                   (__mmask8) -1, __R);
1336 }
1337
1338 extern __inline __m128
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
1341                          int __C, const int __R)
1342 {
1343   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1344                                                   (__v4sf) __B, __C,
1345                                                   (__v4sf) __W,
1346                                                   (__mmask8) __U, __R);
1347 }
1348
1349 extern __inline __m128
1350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1351 _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
1352                           const int __R)
1353 {
1354   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1355                                                   (__v4sf) __B, __C,
1356                                                   (__v4sf)
1357                                                   _mm_setzero_ps (),
1358                                                   (__mmask8) __U, __R);
1359 }
1360
1361 extern __inline __mmask8
1362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1363 _mm_fpclass_ss_mask (__m128 __A, const int __imm)
1364 {
1365   return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
1366 }
1367
1368 extern __inline __mmask8
1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm_fpclass_sd_mask (__m128d __A, const int __imm)
1371 {
1372   return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
1373 }
1374
1375 extern __inline __m512i
1376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1377 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
1378 {
1379   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1380                                                      (__v8di)
1381                                                      _mm512_setzero_si512 (),
1382                                                      (__mmask8) -1,
1383                                                      __R);
1384 }
1385
1386 extern __inline __m512i
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1389                                 const int __R)
1390 {
1391   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1392                                                      (__v8di) __W,
1393                                                      (__mmask8) __U,
1394                                                      __R);
1395 }
1396
1397 extern __inline __m512i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1400                                  const int __R)
1401 {
1402   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1403                                                      (__v8di)
1404                                                      _mm512_setzero_si512 (),
1405                                                      (__mmask8) __U,
1406                                                      __R);
1407 }
1408
1409 extern __inline __m512i
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
1412 {
1413   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1414                                                       (__v8di)
1415                                                       _mm512_setzero_si512 (),
1416                                                       (__mmask8) -1,
1417                                                       __R);
1418 }
1419
1420 extern __inline __m512i
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1423                                 const int __R)
1424 {
1425   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1426                                                       (__v8di) __W,
1427                                                       (__mmask8) __U,
1428                                                       __R);
1429 }
1430
1431 extern __inline __m512i
1432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1433 _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1434                                  const int __R)
1435 {
1436   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1437                                                       (__v8di)
1438                                                       _mm512_setzero_si512 (),
1439                                                       (__mmask8) __U,
1440                                                       __R);
1441 }
1442
1443 extern __inline __m512i
1444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1445 _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
1446 {
1447   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1448                                                      (__v8di)
1449                                                      _mm512_setzero_si512 (),
1450                                                      (__mmask8) -1,
1451                                                      __R);
1452 }
1453
1454 extern __inline __m512i
1455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1456 _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1457                                 const int __R)
1458 {
1459   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1460                                                      (__v8di) __W,
1461                                                      (__mmask8) __U,
1462                                                      __R);
1463 }
1464
1465 extern __inline __m512i
1466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1467 _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
1468                                  const int __R)
1469 {
1470   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1471                                                      (__v8di)
1472                                                      _mm512_setzero_si512 (),
1473                                                      (__mmask8) __U,
1474                                                      __R);
1475 }
1476
1477 extern __inline __m512i
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
1480 {
1481   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1482                                                       (__v8di)
1483                                                       _mm512_setzero_si512 (),
1484                                                       (__mmask8) -1,
1485                                                       __R);
1486 }
1487
1488 extern __inline __m512i
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1491                                 const int __R)
1492 {
1493   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1494                                                       (__v8di) __W,
1495                                                       (__mmask8) __U,
1496                                                       __R);
1497 }
1498
1499 extern __inline __m512i
1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
1502                                  const int __R)
1503 {
1504   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1505                                                       (__v8di)
1506                                                       _mm512_setzero_si512 (),
1507                                                       (__mmask8) __U,
1508                                                       __R);
1509 }
1510
1511 extern __inline __m512i
1512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1513 _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
1514 {
1515   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1516                                                     (__v8di)
1517                                                     _mm512_setzero_si512 (),
1518                                                     (__mmask8) -1,
1519                                                     __R);
1520 }
1521
1522 extern __inline __m512i
1523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524 _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1525                                const int __R)
1526 {
1527   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1528                                                     (__v8di) __W,
1529                                                     (__mmask8) __U,
1530                                                     __R);
1531 }
1532
1533 extern __inline __m512i
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1536                                 const int __R)
1537 {
1538   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1539                                                     (__v8di)
1540                                                     _mm512_setzero_si512 (),
1541                                                     (__mmask8) __U,
1542                                                     __R);
1543 }
1544
1545 extern __inline __m512i
1546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1547 _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
1548 {
1549   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1550                                                      (__v8di)
1551                                                      _mm512_setzero_si512 (),
1552                                                      (__mmask8) -1,
1553                                                      __R);
1554 }
1555
1556 extern __inline __m512i
1557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1558 _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1559                                const int __R)
1560 {
1561   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1562                                                      (__v8di) __W,
1563                                                      (__mmask8) __U,
1564                                                      __R);
1565 }
1566
1567 extern __inline __m512i
1568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1569 _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1570                                 const int __R)
1571 {
1572   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1573                                                      (__v8di)
1574                                                      _mm512_setzero_si512 (),
1575                                                      (__mmask8) __U,
1576                                                      __R);
1577 }
1578
1579 extern __inline __m512i
1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1581 _mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
1582 {
1583   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1584                                                     (__v8di)
1585                                                     _mm512_setzero_si512 (),
1586                                                     (__mmask8) -1,
1587                                                     __R);
1588 }
1589
1590 extern __inline __m512i
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1593                                const int __R)
1594 {
1595   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1596                                                     (__v8di) __W,
1597                                                     (__mmask8) __U,
1598                                                     __R);
1599 }
1600
1601 extern __inline __m512i
1602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1603 _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
1604                                 const int __R)
1605 {
1606   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1607                                                     (__v8di)
1608                                                     _mm512_setzero_si512 (),
1609                                                     (__mmask8) __U,
1610                                                     __R);
1611 }
1612
1613 extern __inline __m512i
1614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1615 _mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
1616 {
1617   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1618                                                      (__v8di)
1619                                                      _mm512_setzero_si512 (),
1620                                                      (__mmask8) -1,
1621                                                      __R);
1622 }
1623
1624 extern __inline __m512i
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1627                                const int __R)
1628 {
1629   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1630                                                      (__v8di) __W,
1631                                                      (__mmask8) __U,
1632                                                      __R);
1633 }
1634
1635 extern __inline __m512i
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
1638                                 const int __R)
1639 {
1640   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1641                                                      (__v8di)
1642                                                      _mm512_setzero_si512 (),
1643                                                      (__mmask8) __U,
1644                                                      __R);
1645 }
1646
1647 extern __inline __m256
1648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1649 _mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
1650 {
1651   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1652                                                    (__v8sf)
1653                                                    _mm256_setzero_ps (),
1654                                                    (__mmask8) -1,
1655                                                    __R);
1656 }
1657
1658 extern __inline __m256
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1661                                const int __R)
1662 {
1663   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1664                                                    (__v8sf) __W,
1665                                                    (__mmask8) __U,
1666                                                    __R);
1667 }
1668
1669 extern __inline __m256
1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671 _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
1672                                 const int __R)
1673 {
1674   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1675                                                    (__v8sf)
1676                                                    _mm256_setzero_ps (),
1677                                                    (__mmask8) __U,
1678                                                    __R);
1679 }
1680
1681 extern __inline __m256
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
1684 {
1685   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1686                                                     (__v8sf)
1687                                                     _mm256_setzero_ps (),
1688                                                     (__mmask8) -1,
1689                                                     __R);
1690 }
1691
1692 extern __inline __m256
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1695                                const int __R)
1696 {
1697   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1698                                                     (__v8sf) __W,
1699                                                     (__mmask8) __U,
1700                                                     __R);
1701 }
1702
1703 extern __inline __m256
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
1706                                 const int __R)
1707 {
1708   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1709                                                     (__v8sf)
1710                                                     _mm256_setzero_ps (),
1711                                                     (__mmask8) __U,
1712                                                     __R);
1713 }
1714
1715 extern __inline __m512d
1716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1717 _mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
1718 {
1719   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1720                                                     (__v8df)
1721                                                     _mm512_setzero_pd (),
1722                                                     (__mmask8) -1,
1723                                                     __R);
1724 }
1725
1726 extern __inline __m512d
1727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1728 _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1729                                const int __R)
1730 {
1731   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1732                                                     (__v8df) __W,
1733                                                     (__mmask8) __U,
1734                                                     __R);
1735 }
1736
1737 extern __inline __m512d
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
1740                                 const int __R)
1741 {
1742   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1743                                                     (__v8df)
1744                                                     _mm512_setzero_pd (),
1745                                                     (__mmask8) __U,
1746                                                     __R);
1747 }
1748
1749 extern __inline __m512d
1750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1751 _mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
1752 {
1753   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1754                                                      (__v8df)
1755                                                      _mm512_setzero_pd (),
1756                                                      (__mmask8) -1,
1757                                                      __R);
1758 }
1759
1760 extern __inline __m512d
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1763                                const int __R)
1764 {
1765   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1766                                                      (__v8df) __W,
1767                                                      (__mmask8) __U,
1768                                                      __R);
1769 }
1770
1771 extern __inline __m512d
1772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1773 _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
1774                                 const int __R)
1775 {
1776   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1777                                                      (__v8df)
1778                                                      _mm512_setzero_pd (),
1779                                                      (__mmask8) __U,
1780                                                      __R);
1781 }
1782
1783 extern __inline __m512d
1784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1785 _mm512_reduce_pd (__m512d __A, int __B)
1786 {
1787   return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1788                                                     (__v8df)
1789                                                     _mm512_setzero_pd (),
1790                                                     (__mmask8) -1);
1791 }
1792
1793 extern __inline __m512d
1794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1795 _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
1796 {
1797   return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1798                                                     (__v8df) __W,
1799                                                     (__mmask8) __U);
1800 }
1801
1802 extern __inline __m512d
1803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1804 _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
1805 {
1806   return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1807                                                     (__v8df)
1808                                                     _mm512_setzero_pd (),
1809                                                     (__mmask8) __U);
1810 }
1811
1812 extern __inline __m512
1813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1814 _mm512_reduce_ps (__m512 __A, int __B)
1815 {
1816   return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1817                                                    (__v16sf)
1818                                                    _mm512_setzero_ps (),
1819                                                    (__mmask16) -1);
1820 }
1821
1822 extern __inline __m512
1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824 _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
1825 {
1826   return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1827                                                    (__v16sf) __W,
1828                                                    (__mmask16) __U);
1829 }
1830
1831 extern __inline __m512
1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1833 _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
1834 {
1835   return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1836                                                    (__v16sf)
1837                                                    _mm512_setzero_ps (),
1838                                                    (__mmask16) __U);
1839 }
1840
1841 extern __inline __m256
1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1843 _mm512_extractf32x8_ps (__m512 __A, const int __imm)
1844 {
1845   return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1846                                                     __imm,
1847                                                     (__v8sf)
1848                                                     _mm256_setzero_ps (),
1849                                                     (__mmask8) -1);
1850 }
1851
1852 extern __inline __m256
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
1855                              const int __imm)
1856 {
1857   return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1858                                                     __imm,
1859                                                     (__v8sf) __W,
1860                                                     (__mmask8) __U);
1861 }
1862
1863 extern __inline __m256
1864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1865 _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
1866                               const int __imm)
1867 {
1868   return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1869                                                     __imm,
1870                                                     (__v8sf)
1871                                                     _mm256_setzero_ps (),
1872                                                     (__mmask8) __U);
1873 }
1874
1875 extern __inline __m128d
1876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1877 _mm512_extractf64x2_pd (__m512d __A, const int __imm)
1878 {
1879   return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1880                                                          __imm,
1881                                                          (__v2df)
1882                                                          _mm_setzero_pd (),
1883                                                          (__mmask8) -1);
1884 }
1885
1886 extern __inline __m128d
1887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888 _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
1889                              const int __imm)
1890 {
1891   return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1892                                                          __imm,
1893                                                          (__v2df) __W,
1894                                                          (__mmask8)
1895                                                          __U);
1896 }
1897
1898 extern __inline __m128d
1899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900 _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
1901                               const int __imm)
1902 {
1903   return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1904                                                          __imm,
1905                                                          (__v2df)
1906                                                          _mm_setzero_pd (),
1907                                                          (__mmask8)
1908                                                          __U);
1909 }
1910
1911 extern __inline __m256i
1912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1913 _mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
1914 {
1915   return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1916                                                      __imm,
1917                                                      (__v8si)
1918                                                      _mm256_setzero_si256 (),
1919                                                      (__mmask8) -1);
1920 }
1921
1922 extern __inline __m256i
1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924 _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
1925                                 const int __imm)
1926 {
1927   return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1928                                                      __imm,
1929                                                      (__v8si) __W,
1930                                                      (__mmask8) __U);
1931 }
1932
1933 extern __inline __m256i
1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1935 _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
1936                                  const int __imm)
1937 {
1938   return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1939                                                      __imm,
1940                                                      (__v8si)
1941                                                      _mm256_setzero_si256 (),
1942                                                      (__mmask8) __U);
1943 }
1944
1945 extern __inline __m128i
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947 _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
1948 {
1949   return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1950                                                          __imm,
1951                                                          (__v2di)
1952                                                          _mm_setzero_si128 (),
1953                                                          (__mmask8) -1);
1954 }
1955
1956 extern __inline __m128i
1957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958 _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
1959                                 const int __imm)
1960 {
1961   return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1962                                                          __imm,
1963                                                          (__v2di) __W,
1964                                                          (__mmask8)
1965                                                          __U);
1966 }
1967
1968 extern __inline __m128i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
1971                                  const int __imm)
1972 {
1973   return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1974                                                          __imm,
1975                                                          (__v2di)
1976                                                          _mm_setzero_si128 (),
1977                                                          (__mmask8)
1978                                                          __U);
1979 }
1980
1981 extern __inline __m512d
1982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1983 _mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
1984                        const int __R)
1985 {
1986   return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1987                                                    (__v8df) __B, __C,
1988                                                    (__v8df)
1989                                                    _mm512_setzero_pd (),
1990                                                    (__mmask8) -1,
1991                                                    __R);
1992 }
1993
1994 extern __inline __m512d
1995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1996 _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
1997                             __m512d __A, __m512d __B, int __C,
1998                             const int __R)
1999 {
2000   return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2001                                                    (__v8df) __B, __C,
2002                                                    (__v8df) __W,
2003                                                    (__mmask8) __U,
2004                                                    __R);
2005 }
2006
2007 extern __inline __m512d
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2010                              int __C, const int __R)
2011 {
2012   return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2013                                                    (__v8df) __B, __C,
2014                                                    (__v8df)
2015                                                    _mm512_setzero_pd (),
2016                                                    (__mmask8) __U,
2017                                                    __R);
2018 }
2019
2020 extern __inline __m512
2021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2022 _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
2023 {
2024   return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2025                                                   (__v16sf) __B, __C,
2026                                                   (__v16sf)
2027                                                   _mm512_setzero_ps (),
2028                                                   (__mmask16) -1,
2029                                                   __R);
2030 }
2031
2032 extern __inline __m512
2033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2034 _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
2035                             __m512 __A, __m512 __B, int __C,
2036                             const int __R)
2037 {
2038   return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2039                                                   (__v16sf) __B, __C,
2040                                                   (__v16sf) __W,
2041                                                   (__mmask16) __U,
2042                                                   __R);
2043 }
2044
2045 extern __inline __m512
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2048                              int __C, const int __R)
2049 {
2050   return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2051                                                   (__v16sf) __B, __C,
2052                                                   (__v16sf)
2053                                                   _mm512_setzero_ps (),
2054                                                   (__mmask16) __U,
2055                                                   __R);
2056 }
2057
2058 extern __inline __m512i
2059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2060 _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
2061 {
2062   return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2063                                                     (__v8si) __B,
2064                                                     __imm,
2065                                                     (__v16si)
2066                                                     _mm512_setzero_si512 (),
2067                                                     (__mmask16) -1);
2068 }
2069
2070 extern __inline __m512i
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
2073                          __m256i __B, const int __imm)
2074 {
2075   return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2076                                                     (__v8si) __B,
2077                                                     __imm,
2078                                                     (__v16si) __W,
2079                                                     (__mmask16) __U);
2080 }
2081
2082 extern __inline __m512i
2083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2084 _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
2085                           const int __imm)
2086 {
2087   return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2088                                                     (__v8si) __B,
2089                                                     __imm,
2090                                                     (__v16si)
2091                                                     _mm512_setzero_si512 (),
2092                                                     (__mmask16) __U);
2093 }
2094
2095 extern __inline __m512
2096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2097 _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
2098 {
2099   return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2100                                                    (__v8sf) __B,
2101                                                    __imm,
2102                                                    (__v16sf)
2103                                                    _mm512_setzero_ps (),
2104                                                    (__mmask16) -1);
2105 }
2106
2107 extern __inline __m512
2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
2110                          __m256 __B, const int __imm)
2111 {
2112   return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2113                                                    (__v8sf) __B,
2114                                                    __imm,
2115                                                    (__v16sf) __W,
2116                                                    (__mmask16) __U);
2117 }
2118
2119 extern __inline __m512
2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121 _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
2122                           const int __imm)
2123 {
2124   return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2125                                                    (__v8sf) __B,
2126                                                    __imm,
2127                                                    (__v16sf)
2128                                                    _mm512_setzero_ps (),
2129                                                    (__mmask16) __U);
2130 }
2131
2132 extern __inline __m512i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
2135 {
2136   return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2137                                                         (__v2di) __B,
2138                                                         __imm,
2139                                                         (__v8di)
2140                                                         _mm512_setzero_si512 (),
2141                                                         (__mmask8) -1);
2142 }
2143
2144 extern __inline __m512i
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
2147                          __m128i __B, const int __imm)
2148 {
2149   return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2150                                                         (__v2di) __B,
2151                                                         __imm,
2152                                                         (__v8di) __W,
2153                                                         (__mmask8)
2154                                                         __U);
2155 }
2156
2157 extern __inline __m512i
2158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2159 _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
2160                           const int __imm)
2161 {
2162   return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2163                                                         (__v2di) __B,
2164                                                         __imm,
2165                                                         (__v8di)
2166                                                         _mm512_setzero_si512 (),
2167                                                         (__mmask8)
2168                                                         __U);
2169 }
2170
2171 extern __inline __m512d
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
2174 {
2175   return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2176                                                         (__v2df) __B,
2177                                                         __imm,
2178                                                         (__v8df)
2179                                                         _mm512_setzero_pd (),
2180                                                         (__mmask8) -1);
2181 }
2182
2183 extern __inline __m512d
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
2186                          __m128d __B, const int __imm)
2187 {
2188   return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2189                                                         (__v2df) __B,
2190                                                         __imm,
2191                                                         (__v8df) __W,
2192                                                         (__mmask8)
2193                                                         __U);
2194 }
2195
2196 extern __inline __m512d
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
2199                           const int __imm)
2200 {
2201   return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2202                                                         (__v2df) __B,
2203                                                         __imm,
2204                                                         (__v8df)
2205                                                         _mm512_setzero_pd (),
2206                                                         (__mmask8)
2207                                                         __U);
2208 }
2209
2210 extern __inline __mmask8
2211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212 _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
2213                              const int __imm)
2214 {
2215   return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2216                                                       __imm, __U);
2217 }
2218
2219 extern __inline __mmask8
2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221 _mm512_fpclass_pd_mask (__m512d __A, const int __imm)
2222 {
2223   return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2224                                                       __imm,
2225                                                       (__mmask8) -1);
2226 }
2227
2228 extern __inline __mmask16
2229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2230 _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
2231                              const int __imm)
2232 {
2233   return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2234                                                        __imm, __U);
2235 }
2236
2237 extern __inline __mmask16
2238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2239 _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
2240 {
2241   return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2242                                                        __imm,
2243                                                        (__mmask16) -1);
2244 }
2245
2246 #else
2247 #define _kshiftli_mask8(X, Y)                                           \
2248   ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
2249
2250 #define _kshiftri_mask8(X, Y)                                           \
2251   ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
2252
2253 #define _mm_range_sd(A, B, C)                                            \
2254   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2255     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),          \
2256     (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2257
2258 #define _mm_mask_range_sd(W, U, A, B, C)                                 \
2259   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2260     (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W),                \
2261     (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2262
2263 #define _mm_maskz_range_sd(U, A, B, C)                                   \
2264   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2265     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),          \
2266     (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2267
2268 #define _mm_range_ss(A, B, C)                                           \
2269   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),  \
2270     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),          \
2271     (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2272
2273 #define _mm_mask_range_ss(W, U, A, B, C)                                \
2274   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),  \
2275     (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),                 \
2276     (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2277
2278 #define _mm_maskz_range_ss(U, A, B, C)                                  \
2279   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),  \
2280     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),          \
2281     (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2282
2283 #define _mm_range_round_sd(A, B, C, R)                                   \
2284   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2285     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),          \
2286     (__mmask8) -1, (R)))
2287
2288 #define _mm_mask_range_round_sd(W, U, A, B, C, R)                        \
2289   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2290     (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W),                \
2291     (__mmask8)(U), (R)))
2292
2293 #define _mm_maskz_range_round_sd(U, A, B, C, R)                          \
2294   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2295     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),          \
2296     (__mmask8)(U), (R)))
2297
2298 #define _mm_range_round_ss(A, B, C, R)                                  \
2299   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),  \
2300     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),          \
2301     (__mmask8) -1, (R)))
2302
2303 #define _mm_mask_range_round_ss(W, U, A, B, C, R)                       \
2304   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),  \
2305     (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),                 \
2306     (__mmask8)(U), (R)))
2307
2308 #define _mm_maskz_range_round_ss(U, A, B, C, R)                         \
2309   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),  \
2310     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),          \
2311     (__mmask8)(U), (R)))
2312
2313 #define _mm512_cvtt_roundpd_epi64(A, B)             \
2314   ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)             \
2315                                               _mm512_setzero_si512 (),  \
2316                                               -1, (B)))
2317
2318 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B)  \
2319     ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2320
2321 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B)    \
2322     ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2323
2324 #define _mm512_cvtt_roundpd_epu64(A, B)             \
2325     ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2326
2327 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B)  \
2328     ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2329
2330 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B)    \
2331     ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2332
2333 #define _mm512_cvtt_roundps_epi64(A, B)             \
2334     ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2335
2336 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B)  \
2337     ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2338
2339 #define _mm512_maskz_cvtt_roundps_epi64(U, A, B)    \
2340     ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2341
2342 #define _mm512_cvtt_roundps_epu64(A, B)             \
2343     ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2344
2345 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B)  \
2346     ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2347
2348 #define _mm512_maskz_cvtt_roundps_epu64(U, A, B)    \
2349     ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2350
2351 #define _mm512_cvt_roundpd_epi64(A, B)              \
2352     ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2353
2354 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B)   \
2355     ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2356
2357 #define _mm512_maskz_cvt_roundpd_epi64(U, A, B)     \
2358     ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2359
2360 #define _mm512_cvt_roundpd_epu64(A, B)              \
2361     ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2362
2363 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B)   \
2364     ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2365
2366 #define _mm512_maskz_cvt_roundpd_epu64(U, A, B)     \
2367     ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2368
2369 #define _mm512_cvt_roundps_epi64(A, B)              \
2370     ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2371
2372 #define _mm512_mask_cvt_roundps_epi64(W, U, A, B)   \
2373     ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2374
2375 #define _mm512_maskz_cvt_roundps_epi64(U, A, B)     \
2376     ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2377
2378 #define _mm512_cvt_roundps_epu64(A, B)              \
2379     ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2380
2381 #define _mm512_mask_cvt_roundps_epu64(W, U, A, B)   \
2382     ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2383
2384 #define _mm512_maskz_cvt_roundps_epu64(U, A, B)     \
2385     ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2386
2387 #define _mm512_cvt_roundepi64_ps(A, B)              \
2388     ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2389
2390 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B)   \
2391     ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2392
2393 #define _mm512_maskz_cvt_roundepi64_ps(U, A, B)     \
2394     ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2395
2396 #define _mm512_cvt_roundepu64_ps(A, B)              \
2397     ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2398
2399 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B)   \
2400     ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2401
2402 #define _mm512_maskz_cvt_roundepu64_ps(U, A, B)     \
2403     ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2404
2405 #define _mm512_cvt_roundepi64_pd(A, B)              \
2406     ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2407
2408 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B)   \
2409     ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2410
2411 #define _mm512_maskz_cvt_roundepi64_pd(U, A, B)     \
2412     ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2413
2414 #define _mm512_cvt_roundepu64_pd(A, B)              \
2415     ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2416
2417 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B)   \
2418     ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2419
2420 #define _mm512_maskz_cvt_roundepu64_pd(U, A, B)     \
2421     ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2422
2423 #define _mm512_reduce_pd(A, B)                                          \
2424   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),     \
2425     (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
2426
2427 #define _mm512_mask_reduce_pd(W, U, A, B)                               \
2428   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),     \
2429     (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
2430
2431 #define _mm512_maskz_reduce_pd(U, A, B)                                 \
2432   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),     \
2433     (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
2434
2435 #define _mm512_reduce_ps(A, B)                                          \
2436   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),      \
2437     (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
2438
2439 #define _mm512_mask_reduce_ps(W, U, A, B)                               \
2440   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),      \
2441     (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
2442
2443 #define _mm512_maskz_reduce_ps(U, A, B)                                 \
2444   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),      \
2445     (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
2446
2447 #define _mm512_extractf32x8_ps(X, C)                                    \
2448   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
2449     (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
2450
2451 #define _mm512_mask_extractf32x8_ps(W, U, X, C)                         \
2452   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
2453     (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
2454
2455 #define _mm512_maskz_extractf32x8_ps(U, X, C)                           \
2456   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
2457     (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
2458
2459 #define _mm512_extractf64x2_pd(X, C)                                    \
2460   ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2461     (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
2462
2463 #define _mm512_mask_extractf64x2_pd(W, U, X, C)                         \
2464   ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2465     (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
2466
2467 #define _mm512_maskz_extractf64x2_pd(U, X, C)                           \
2468   ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2469     (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
2470
2471 #define _mm512_extracti32x8_epi32(X, C)                                 \
2472   ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
2473     (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
2474
2475 #define _mm512_mask_extracti32x8_epi32(W, U, X, C)                      \
2476   ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
2477     (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
2478
2479 #define _mm512_maskz_extracti32x8_epi32(U, X, C)                        \
2480   ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
2481     (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
2482
2483 #define _mm512_extracti64x2_epi64(X, C)                                 \
2484   ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2485     (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
2486
2487 #define _mm512_mask_extracti64x2_epi64(W, U, X, C)                      \
2488   ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2489     (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
2490
2491 #define _mm512_maskz_extracti64x2_epi64(U, X, C)                        \
2492   ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2493     (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
2494
2495 #define _mm512_range_pd(A, B, C)                                        \
2496   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),      \
2497     (__v8df)(__m512d)(B), (int)(C),                                     \
2498     (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
2499
2500 #define _mm512_mask_range_pd(W, U, A, B, C)                             \
2501   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),      \
2502     (__v8df)(__m512d)(B), (int)(C),                                     \
2503     (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2504
2505 #define _mm512_maskz_range_pd(U, A, B, C)                               \
2506   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),      \
2507     (__v8df)(__m512d)(B), (int)(C),                                     \
2508     (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2509
2510 #define _mm512_range_ps(A, B, C)                                        \
2511   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),       \
2512     (__v16sf)(__m512)(B), (int)(C),                                     \
2513     (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
2514
2515 #define _mm512_mask_range_ps(W, U, A, B, C)                             \
2516   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),       \
2517     (__v16sf)(__m512)(B), (int)(C),                                     \
2518     (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2519
2520 #define _mm512_maskz_range_ps(U, A, B, C)                               \
2521   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),       \
2522     (__v16sf)(__m512)(B), (int)(C),                                     \
2523     (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2524
2525 #define _mm512_range_round_pd(A, B, C, R)                                       \
2526   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),      \
2527     (__v8df)(__m512d)(B), (int)(C),                                     \
2528     (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2529
2530 #define _mm512_mask_range_round_pd(W, U, A, B, C, R)                            \
2531   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),      \
2532     (__v8df)(__m512d)(B), (int)(C),                                     \
2533     (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
2534
2535 #define _mm512_maskz_range_round_pd(U, A, B, C, R)                              \
2536   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),      \
2537     (__v8df)(__m512d)(B), (int)(C),                                     \
2538     (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
2539
2540 #define _mm512_range_round_ps(A, B, C, R)                                       \
2541   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),       \
2542     (__v16sf)(__m512)(B), (int)(C),                                     \
2543     (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2544
2545 #define _mm512_mask_range_round_ps(W, U, A, B, C, R)                            \
2546   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),       \
2547     (__v16sf)(__m512)(B), (int)(C),                                     \
2548     (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
2549
2550 #define _mm512_maskz_range_round_ps(U, A, B, C, R)                              \
2551   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),       \
2552     (__v16sf)(__m512)(B), (int)(C),                                     \
2553     (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2554
2555 #define _mm512_insertf64x2(X, Y, C)                                     \
2556   ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2557     (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X),            \
2558     (__mmask8)-1))
2559
2560 #define _mm512_mask_insertf64x2(W, U, X, Y, C)                          \
2561   ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2562     (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W),            \
2563     (__mmask8) (U)))
2564
2565 #define _mm512_maskz_insertf64x2(U, X, Y, C)                            \
2566   ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2567     (__v2df)(__m128d) (Y), (int) (C),                                   \
2568     (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
2569
2570 #define _mm512_inserti64x2(X, Y, C)                                     \
2571   ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2572     (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
2573
2574 #define _mm512_mask_inserti64x2(W, U, X, Y, C)                          \
2575   ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2576     (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W),            \
2577     (__mmask8) (U)))
2578
2579 #define _mm512_maskz_inserti64x2(U, X, Y, C)                            \
2580   ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2581     (__v2di)(__m128i) (Y), (int) (C),                                   \
2582     (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
2583
2584 #define _mm512_insertf32x8(X, Y, C)                                     \
2585   ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
2586     (__v8sf)(__m256) (Y), (int) (C),\
2587     (__v16sf)(__m512)_mm512_setzero_ps (),\
2588     (__mmask16)-1))
2589
2590 #define _mm512_mask_insertf32x8(W, U, X, Y, C)                          \
2591   ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
2592     (__v8sf)(__m256) (Y), (int) (C),\
2593     (__v16sf)(__m512)(W),\
2594     (__mmask16)(U)))
2595
2596 #define _mm512_maskz_insertf32x8(U, X, Y, C)                            \
2597   ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
2598     (__v8sf)(__m256) (Y), (int) (C),\
2599     (__v16sf)(__m512)_mm512_setzero_ps (),\
2600     (__mmask16)(U)))
2601
2602 #define _mm512_inserti32x8(X, Y, C)                                     \
2603   ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X),   \
2604     (__v8si)(__m256i) (Y), (int) (C),\
2605     (__v16si)(__m512i)_mm512_setzero_si512 (),\
2606     (__mmask16)-1))
2607
2608 #define _mm512_mask_inserti32x8(W, U, X, Y, C)                          \
2609   ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X),   \
2610     (__v8si)(__m256i) (Y), (int) (C),\
2611     (__v16si)(__m512i)(W),\
2612     (__mmask16)(U)))
2613
2614 #define _mm512_maskz_inserti32x8(U, X, Y, C)                            \
2615   ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X),   \
2616     (__v8si)(__m256i) (Y), (int) (C),\
2617     (__v16si)(__m512i)_mm512_setzero_si512 (),\
2618     (__mmask16)(U)))
2619
2620 #define _mm_fpclass_ss_mask(X, C)                                               \
2621   ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C)))  \
2622
2623 #define _mm_fpclass_sd_mask(X, C)                                               \
2624   ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
2625
2626 #define _mm512_mask_fpclass_pd_mask(u, X, C)                            \
2627   ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2628                                                 (int) (C), (__mmask8)(u)))
2629
2630 #define _mm512_mask_fpclass_ps_mask(u, x, c)                            \
2631   ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2632                                                  (int) (c),(__mmask8)(u)))
2633
2634 #define _mm512_fpclass_pd_mask(X, C)                                    \
2635   ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2636                                                 (int) (C), (__mmask8)-1))
2637
2638 #define _mm512_fpclass_ps_mask(x, c)                                    \
2639   ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2640                                                  (int) (c),(__mmask8)-1))
2641
2642 #define _mm_reduce_sd(A, B, C)                                          \
2643   ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A),        \
2644     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),         \
2645     (__mmask8)-1))
2646
2647 #define _mm_mask_reduce_sd(W, U, A, B, C)                               \
2648   ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A),        \
2649     (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U)))
2650
2651 #define _mm_maskz_reduce_sd(U, A, B, C)                                 \
2652   ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A),        \
2653     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),         \
2654     (__mmask8)(U)))
2655
2656 #define _mm_reduce_ss(A, B, C)                                          \
2657   ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A),          \
2658     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),          \
2659     (__mmask8)-1))
2660
2661 #define _mm_mask_reduce_ss(W, U, A, B, C)                               \
2662   ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A),          \
2663     (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U)))
2664
2665 #define _mm_maskz_reduce_ss(U, A, B, C)                                 \
2666   ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A),          \
2667     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),          \
2668     (__mmask8)(U)))
2669
2670
2671
2672 #endif
2673
2674 #ifdef __DISABLE_AVX512DQ__
2675 #undef __DISABLE_AVX512DQ__
2676 #pragma GCC pop_options
2677 #endif /* __DISABLE_AVX512DQ__ */
2678
2679 #endif /* _AVX512DQINTRIN_H_INCLUDED */