Update gcc-50 to SVN version 239798 (gcc-5-branch)
[dragonfly.git] / contrib / gcc-5.0 / gcc / config / i386 / avx512vldqintrin.h
1 /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
29 #define _AVX512VLDQINTRIN_H_INCLUDED
30
31 #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512vl,avx512dq")
34 #define __DISABLE_AVX512VLDQ__
35 #endif /* __AVX512VLDQ__ */
36
37 extern __inline __m256i
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 _mm256_cvttpd_epi64 (__m256d __A)
40 {
41   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
42                                                      (__v4di)
43                                                      _mm256_setzero_si256 (),
44                                                      (__mmask8) -1);
45 }
46
47 extern __inline __m256i
48 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
50 {
51   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
52                                                      (__v4di) __W,
53                                                      (__mmask8) __U);
54 }
55
56 extern __inline __m256i
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
59 {
60   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
61                                                      (__v4di)
62                                                      _mm256_setzero_si256 (),
63                                                      (__mmask8) __U);
64 }
65
66 extern __inline __m128i
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 _mm_cvttpd_epi64 (__m128d __A)
69 {
70   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
71                                                      (__v2di)
72                                                      _mm_setzero_di (),
73                                                      (__mmask8) -1);
74 }
75
76 extern __inline __m128i
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
79 {
80   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
81                                                      (__v2di) __W,
82                                                      (__mmask8) __U);
83 }
84
85 extern __inline __m128i
86 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
88 {
89   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
90                                                      (__v2di)
91                                                      _mm_setzero_si128 (),
92                                                      (__mmask8) __U);
93 }
94
95 extern __inline __m256i
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_cvttpd_epu64 (__m256d __A)
98 {
99   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
100                                                       (__v4di)
101                                                       _mm256_setzero_si256 (),
102                                                       (__mmask8) -1);
103 }
104
105 extern __inline __m256i
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
108 {
109   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
110                                                       (__v4di) __W,
111                                                       (__mmask8) __U);
112 }
113
114 extern __inline __m256i
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
117 {
118   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
119                                                       (__v4di)
120                                                       _mm256_setzero_si256 (),
121                                                       (__mmask8) __U);
122 }
123
124 extern __inline __m128i
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm_cvttpd_epu64 (__m128d __A)
127 {
128   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
129                                                       (__v2di)
130                                                       _mm_setzero_di (),
131                                                       (__mmask8) -1);
132 }
133
134 extern __inline __m128i
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
137 {
138   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
139                                                       (__v2di) __W,
140                                                       (__mmask8) __U);
141 }
142
143 extern __inline __m128i
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
146 {
147   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
148                                                       (__v2di)
149                                                       _mm_setzero_si128 (),
150                                                       (__mmask8) __U);
151 }
152
153 extern __inline __m256i
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 _mm256_cvtpd_epi64 (__m256d __A)
156 {
157   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
158                                                     (__v4di)
159                                                     _mm256_setzero_si256 (),
160                                                     (__mmask8) -1);
161 }
162
163 extern __inline __m256i
164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
166 {
167   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
168                                                     (__v4di) __W,
169                                                     (__mmask8) __U);
170 }
171
172 extern __inline __m256i
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
175 {
176   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
177                                                     (__v4di)
178                                                     _mm256_setzero_si256 (),
179                                                     (__mmask8) __U);
180 }
181
182 extern __inline __m128i
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 _mm_cvtpd_epi64 (__m128d __A)
185 {
186   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
187                                                     (__v2di)
188                                                     _mm_setzero_di (),
189                                                     (__mmask8) -1);
190 }
191
192 extern __inline __m128i
193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
195 {
196   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
197                                                     (__v2di) __W,
198                                                     (__mmask8) __U);
199 }
200
201 extern __inline __m128i
202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
204 {
205   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
206                                                     (__v2di)
207                                                     _mm_setzero_si128 (),
208                                                     (__mmask8) __U);
209 }
210
211 extern __inline __m256i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm256_cvtpd_epu64 (__m256d __A)
214 {
215   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
216                                                      (__v4di)
217                                                      _mm256_setzero_si256 (),
218                                                      (__mmask8) -1);
219 }
220
221 extern __inline __m256i
222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
224 {
225   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
226                                                      (__v4di) __W,
227                                                      (__mmask8) __U);
228 }
229
230 extern __inline __m256i
231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
233 {
234   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
235                                                      (__v4di)
236                                                      _mm256_setzero_si256 (),
237                                                      (__mmask8) __U);
238 }
239
240 extern __inline __m128i
241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
242 _mm_cvtpd_epu64 (__m128d __A)
243 {
244   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
245                                                      (__v2di)
246                                                      _mm_setzero_di (),
247                                                      (__mmask8) -1);
248 }
249
250 extern __inline __m128i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
253 {
254   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
255                                                      (__v2di) __W,
256                                                      (__mmask8) __U);
257 }
258
259 extern __inline __m128i
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
262 {
263   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
264                                                      (__v2di)
265                                                      _mm_setzero_si128 (),
266                                                      (__mmask8) __U);
267 }
268
269 extern __inline __m256i
270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 _mm256_cvttps_epi64 (__m128 __A)
272 {
273   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
274                                                      (__v4di)
275                                                      _mm256_setzero_si256 (),
276                                                      (__mmask8) -1);
277 }
278
279 extern __inline __m256i
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
282 {
283   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
284                                                      (__v4di) __W,
285                                                      (__mmask8) __U);
286 }
287
288 extern __inline __m256i
289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
291 {
292   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
293                                                      (__v4di)
294                                                      _mm256_setzero_si256 (),
295                                                      (__mmask8) __U);
296 }
297
298 extern __inline __m128i
299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_cvttps_epi64 (__m128 __A)
301 {
302   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
303                                                      (__v2di)
304                                                      _mm_setzero_di (),
305                                                      (__mmask8) -1);
306 }
307
308 extern __inline __m128i
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
311 {
312   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
313                                                      (__v2di) __W,
314                                                      (__mmask8) __U);
315 }
316
317 extern __inline __m128i
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
320 {
321   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
322                                                      (__v2di)
323                                                      _mm_setzero_di (),
324                                                      (__mmask8) __U);
325 }
326
327 extern __inline __m256i
328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329 _mm256_cvttps_epu64 (__m128 __A)
330 {
331   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
332                                                       (__v4di)
333                                                       _mm256_setzero_si256 (),
334                                                       (__mmask8) -1);
335 }
336
337 extern __inline __m256i
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
340 {
341   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
342                                                       (__v4di) __W,
343                                                       (__mmask8) __U);
344 }
345
346 extern __inline __m256i
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
349 {
350   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
351                                                       (__v4di)
352                                                       _mm256_setzero_si256 (),
353                                                       (__mmask8) __U);
354 }
355
356 extern __inline __m128i
357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
358 _mm_cvttps_epu64 (__m128 __A)
359 {
360   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
361                                                       (__v2di)
362                                                       _mm_setzero_di (),
363                                                       (__mmask8) -1);
364 }
365
366 extern __inline __m128i
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
369 {
370   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
371                                                       (__v2di) __W,
372                                                       (__mmask8) __U);
373 }
374
375 extern __inline __m128i
376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
378 {
379   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
380                                                       (__v2di)
381                                                       _mm_setzero_di (),
382                                                       (__mmask8) __U);
383 }
384
385 extern __inline __m256d
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 _mm256_broadcast_f64x2 (__m128d __A)
388 {
389   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
390                                                            __A,
391                                                            (__v4df)_mm256_undefined_pd(),
392                                                            (__mmask8) -1);
393 }
394
395 extern __inline __m256d
396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
398 {
399   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
400                                                            __A,
401                                                            (__v4df)
402                                                            __O, __M);
403 }
404
405 extern __inline __m256d
406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
407 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
408 {
409   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
410                                                            __A,
411                                                            (__v4df)
412                                                            _mm256_setzero_ps (),
413                                                            __M);
414 }
415
416 extern __inline __m256i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm256_broadcast_i64x2 (__m128i __A)
419 {
420   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
421                                                            __A,
422                                                            (__v4di)_mm256_undefined_si256(),
423                                                            (__mmask8) -1);
424 }
425
426 extern __inline __m256i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
429 {
430   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
431                                                            __A,
432                                                            (__v4di)
433                                                            __O, __M);
434 }
435
436 extern __inline __m256i
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
439 {
440   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
441                                                            __A,
442                                                            (__v4di)
443                                                            _mm256_setzero_si256 (),
444                                                            __M);
445 }
446
447 extern __inline __m256
448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
449 _mm256_broadcast_f32x2 (__m128 __A)
450 {
451   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
452                                                           (__v8sf)_mm256_undefined_ps(),
453                                                           (__mmask8) -1);
454 }
455
456 extern __inline __m256
457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
459 {
460   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
461                                                           (__v8sf) __O,
462                                                           __M);
463 }
464
465 extern __inline __m256
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
468 {
469   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
470                                                           (__v8sf)
471                                                           _mm256_setzero_ps (),
472                                                           __M);
473 }
474
475 extern __inline __m256i
476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
477 _mm256_broadcast_i32x2 (__m128i __A)
478 {
479   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
480                                                            __A,
481                                                           (__v8si)_mm256_undefined_si256(),
482                                                            (__mmask8) -1);
483 }
484
485 extern __inline __m256i
486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
487 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
488 {
489   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
490                                                            __A,
491                                                            (__v8si)
492                                                            __O, __M);
493 }
494
495 extern __inline __m256i
496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
497 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
498 {
499   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
500                                                            __A,
501                                                            (__v8si)
502                                                            _mm256_setzero_si256 (),
503                                                            __M);
504 }
505
506 extern __inline __m128i
507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
508 _mm_broadcast_i32x2 (__m128i __A)
509 {
510   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
511                                                            __A,
512                                                           (__v4si)_mm_undefined_si128(),
513                                                            (__mmask8) -1);
514 }
515
516 extern __inline __m128i
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
519 {
520   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
521                                                            __A,
522                                                            (__v4si)
523                                                            __O, __M);
524 }
525
526 extern __inline __m128i
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
529 {
530   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
531                                                            __A,
532                                                            (__v4si)
533                                                            _mm_setzero_si128 (),
534                                                            __M);
535 }
536
537 extern __inline __m256i
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 _mm256_mullo_epi64 (__m256i __A, __m256i __B)
540 {
541   return (__m256i) ((__v4du) __A * (__v4du) __B);
542 }
543
544 extern __inline __m256i
545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
546 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
547                          __m256i __B)
548 {
549   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
550                                                   (__v4di) __B,
551                                                   (__v4di) __W,
552                                                   (__mmask8) __U);
553 }
554
555 extern __inline __m256i
556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
557 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
558 {
559   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
560                                                   (__v4di) __B,
561                                                   (__v4di)
562                                                   _mm256_setzero_si256 (),
563                                                   (__mmask8) __U);
564 }
565
566 extern __inline __m128i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm_mullo_epi64 (__m128i __A, __m128i __B)
569 {
570   return (__m128i) ((__v2du) __A * (__v2du) __B);
571 }
572
573 extern __inline __m128i
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
576                       __m128i __B)
577 {
578   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
579                                                   (__v2di) __B,
580                                                   (__v2di) __W,
581                                                   (__mmask8) __U);
582 }
583
584 extern __inline __m128i
585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
587 {
588   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
589                                                   (__v2di) __B,
590                                                   (__v2di)
591                                                   _mm_setzero_di (),
592                                                   (__mmask8) __U);
593 }
594
595 extern __inline __m256d
596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
598                        __m256d __B)
599 {
600   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
601                                                   (__v4df) __B,
602                                                   (__v4df) __W,
603                                                   (__mmask8) __U);
604 }
605
606 extern __inline __m256d
607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
608 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
609 {
610   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
611                                                   (__v4df) __B,
612                                                   (__v4df)
613                                                   _mm256_setzero_pd (),
614                                                   (__mmask8) __U);
615 }
616
617 extern __inline __m128d
618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
620                     __m128d __B)
621 {
622   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
623                                                   (__v2df) __B,
624                                                   (__v2df) __W,
625                                                   (__mmask8) __U);
626 }
627
628 extern __inline __m128d
629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
630 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
631 {
632   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
633                                                   (__v2df) __B,
634                                                   (__v2df)
635                                                   _mm_setzero_pd (),
636                                                   (__mmask8) __U);
637 }
638
639 extern __inline __m256
640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
641 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
642                        __m256 __B)
643 {
644   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
645                                                  (__v8sf) __B,
646                                                  (__v8sf) __W,
647                                                  (__mmask8) __U);
648 }
649
650 extern __inline __m256
651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
653 {
654   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
655                                                  (__v8sf) __B,
656                                                  (__v8sf)
657                                                  _mm256_setzero_ps (),
658                                                  (__mmask8) __U);
659 }
660
661 extern __inline __m128
662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
663 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
664 {
665   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
666                                                  (__v4sf) __B,
667                                                  (__v4sf) __W,
668                                                  (__mmask8) __U);
669 }
670
671 extern __inline __m128
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
674 {
675   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
676                                                  (__v4sf) __B,
677                                                  (__v4sf)
678                                                  _mm_setzero_ps (),
679                                                  (__mmask8) __U);
680 }
681
682 extern __inline __m256i
683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
684 _mm256_cvtps_epi64 (__m128 __A)
685 {
686   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
687                                                     (__v4di)
688                                                     _mm256_setzero_si256 (),
689                                                     (__mmask8) -1);
690 }
691
692 extern __inline __m256i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
695 {
696   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
697                                                     (__v4di) __W,
698                                                     (__mmask8) __U);
699 }
700
701 extern __inline __m256i
702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
704 {
705   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
706                                                     (__v4di)
707                                                     _mm256_setzero_si256 (),
708                                                     (__mmask8) __U);
709 }
710
711 extern __inline __m128i
712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713 _mm_cvtps_epi64 (__m128 __A)
714 {
715   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
716                                                     (__v2di)
717                                                     _mm_setzero_di (),
718                                                     (__mmask8) -1);
719 }
720
721 extern __inline __m128i
722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
723 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
724 {
725   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
726                                                     (__v2di) __W,
727                                                     (__mmask8) __U);
728 }
729
730 extern __inline __m128i
731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
732 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
733 {
734   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
735                                                     (__v2di)
736                                                     _mm_setzero_di (),
737                                                     (__mmask8) __U);
738 }
739
740 extern __inline __m256i
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 _mm256_cvtps_epu64 (__m128 __A)
743 {
744   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
745                                                      (__v4di)
746                                                      _mm256_setzero_si256 (),
747                                                      (__mmask8) -1);
748 }
749
750 extern __inline __m256i
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
753 {
754   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
755                                                      (__v4di) __W,
756                                                      (__mmask8) __U);
757 }
758
759 extern __inline __m256i
760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
761 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
762 {
763   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
764                                                      (__v4di)
765                                                      _mm256_setzero_si256 (),
766                                                      (__mmask8) __U);
767 }
768
769 extern __inline __m128i
770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
771 _mm_cvtps_epu64 (__m128 __A)
772 {
773   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
774                                                      (__v2di)
775                                                      _mm_setzero_di (),
776                                                      (__mmask8) -1);
777 }
778
779 extern __inline __m128i
780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
782 {
783   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
784                                                      (__v2di) __W,
785                                                      (__mmask8) __U);
786 }
787
788 extern __inline __m128i
789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
790 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
791 {
792   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
793                                                      (__v2di)
794                                                      _mm_setzero_di (),
795                                                      (__mmask8) __U);
796 }
797
798 extern __inline __m128
799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
800 _mm256_cvtepi64_ps (__m256i __A)
801 {
802   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
803                                                    (__v4sf)
804                                                    _mm_setzero_ps (),
805                                                    (__mmask8) -1);
806 }
807
808 extern __inline __m128
809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
811 {
812   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
813                                                    (__v4sf) __W,
814                                                    (__mmask8) __U);
815 }
816
817 extern __inline __m128
818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
820 {
821   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
822                                                    (__v4sf)
823                                                    _mm_setzero_ps (),
824                                                    (__mmask8) __U);
825 }
826
827 extern __inline __m128
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm_cvtepi64_ps (__m128i __A)
830 {
831   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
832                                                    (__v4sf)
833                                                    _mm_setzero_ps (),
834                                                    (__mmask8) -1);
835 }
836
837 extern __inline __m128
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
840 {
841   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
842                                                    (__v4sf) __W,
843                                                    (__mmask8) __U);
844 }
845
846 extern __inline __m128
847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
848 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
849 {
850   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
851                                                    (__v4sf)
852                                                    _mm_setzero_ps (),
853                                                    (__mmask8) __U);
854 }
855
856 extern __inline __m128
857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858 _mm256_cvtepu64_ps (__m256i __A)
859 {
860   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
861                                                     (__v4sf)
862                                                     _mm_setzero_ps (),
863                                                     (__mmask8) -1);
864 }
865
866 extern __inline __m128
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
869 {
870   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
871                                                     (__v4sf) __W,
872                                                     (__mmask8) __U);
873 }
874
875 extern __inline __m128
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
878 {
879   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
880                                                     (__v4sf)
881                                                     _mm_setzero_ps (),
882                                                     (__mmask8) __U);
883 }
884
885 extern __inline __m128
886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
887 _mm_cvtepu64_ps (__m128i __A)
888 {
889   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
890                                                     (__v4sf)
891                                                     _mm_setzero_ps (),
892                                                     (__mmask8) -1);
893 }
894
895 extern __inline __m128
896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
897 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
898 {
899   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
900                                                     (__v4sf) __W,
901                                                     (__mmask8) __U);
902 }
903
904 extern __inline __m128
905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
907 {
908   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
909                                                     (__v4sf)
910                                                     _mm_setzero_ps (),
911                                                     (__mmask8) __U);
912 }
913
914 extern __inline __m256d
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm256_cvtepi64_pd (__m256i __A)
917 {
918   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
919                                                     (__v4df)
920                                                     _mm256_setzero_pd (),
921                                                     (__mmask8) -1);
922 }
923
924 extern __inline __m256d
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
926 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
927 {
928   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
929                                                     (__v4df) __W,
930                                                     (__mmask8) __U);
931 }
932
933 extern __inline __m256d
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
936 {
937   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
938                                                     (__v4df)
939                                                     _mm256_setzero_pd (),
940                                                     (__mmask8) __U);
941 }
942
943 extern __inline __m128d
944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
945 _mm_cvtepi64_pd (__m128i __A)
946 {
947   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
948                                                     (__v2df)
949                                                     _mm_setzero_pd (),
950                                                     (__mmask8) -1);
951 }
952
953 extern __inline __m128d
954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
956 {
957   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
958                                                     (__v2df) __W,
959                                                     (__mmask8) __U);
960 }
961
962 extern __inline __m128d
963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
964 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
965 {
966   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
967                                                     (__v2df)
968                                                     _mm_setzero_pd (),
969                                                     (__mmask8) __U);
970 }
971
972 extern __inline __m256d
973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
974 _mm256_cvtepu64_pd (__m256i __A)
975 {
976   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
977                                                      (__v4df)
978                                                      _mm256_setzero_pd (),
979                                                      (__mmask8) -1);
980 }
981
982 extern __inline __m256d
983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
984 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
985 {
986   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
987                                                      (__v4df) __W,
988                                                      (__mmask8) __U);
989 }
990
991 extern __inline __m256d
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
994 {
995   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
996                                                      (__v4df)
997                                                      _mm256_setzero_pd (),
998                                                      (__mmask8) __U);
999 }
1000
1001 extern __inline __m256d
1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1004                     __m256d __B)
1005 {
1006   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1007                                                  (__v4df) __B,
1008                                                  (__v4df) __W,
1009                                                  (__mmask8) __U);
1010 }
1011
1012 extern __inline __m256d
1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1015 {
1016   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1017                                                  (__v4df) __B,
1018                                                  (__v4df)
1019                                                  _mm256_setzero_pd (),
1020                                                  (__mmask8) __U);
1021 }
1022
1023 extern __inline __m128d
1024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1025 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1026 {
1027   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1028                                                  (__v2df) __B,
1029                                                  (__v2df) __W,
1030                                                  (__mmask8) __U);
1031 }
1032
1033 extern __inline __m128d
1034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1035 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1036 {
1037   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1038                                                  (__v2df) __B,
1039                                                  (__v2df)
1040                                                  _mm_setzero_pd (),
1041                                                  (__mmask8) __U);
1042 }
1043
1044 extern __inline __m256
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1047 {
1048   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1049                                                 (__v8sf) __B,
1050                                                 (__v8sf) __W,
1051                                                 (__mmask8) __U);
1052 }
1053
1054 extern __inline __m256
1055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1056 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1057 {
1058   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1059                                                 (__v8sf) __B,
1060                                                 (__v8sf)
1061                                                 _mm256_setzero_ps (),
1062                                                 (__mmask8) __U);
1063 }
1064
1065 extern __inline __m128
1066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1067 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1068 {
1069   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1070                                                 (__v4sf) __B,
1071                                                 (__v4sf) __W,
1072                                                 (__mmask8) __U);
1073 }
1074
1075 extern __inline __m128
1076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1078 {
1079   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1080                                                 (__v4sf) __B,
1081                                                 (__v4sf)
1082                                                 _mm_setzero_ps (),
1083                                                 (__mmask8) __U);
1084 }
1085
1086 extern __inline __m128d
1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1088 _mm_cvtepu64_pd (__m128i __A)
1089 {
1090   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1091                                                      (__v2df)
1092                                                      _mm_setzero_pd (),
1093                                                      (__mmask8) -1);
1094 }
1095
1096 extern __inline __m128d
1097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1098 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1099 {
1100   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1101                                                      (__v2df) __W,
1102                                                      (__mmask8) __U);
1103 }
1104
1105 extern __inline __m128d
1106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1107 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1108 {
1109   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1110                                                      (__v2df)
1111                                                      _mm_setzero_pd (),
1112                                                      (__mmask8) __U);
1113 }
1114
1115 extern __inline __m256d
1116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1117 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1118                     __m256d __B)
1119 {
1120   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1121                                                  (__v4df) __B,
1122                                                  (__v4df) __W,
1123                                                  (__mmask8) __U);
1124 }
1125
1126 extern __inline __m256d
1127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1129 {
1130   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1131                                                  (__v4df) __B,
1132                                                  (__v4df)
1133                                                  _mm256_setzero_pd (),
1134                                                  (__mmask8) __U);
1135 }
1136
1137 extern __inline __m128d
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1140 {
1141   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1142                                                  (__v2df) __B,
1143                                                  (__v2df) __W,
1144                                                  (__mmask8) __U);
1145 }
1146
1147 extern __inline __m128d
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1150 {
1151   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1152                                                  (__v2df) __B,
1153                                                  (__v2df)
1154                                                  _mm_setzero_pd (),
1155                                                  (__mmask8) __U);
1156 }
1157
1158 extern __inline __m256
1159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1160 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1161 {
1162   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1163                                                 (__v8sf) __B,
1164                                                 (__v8sf) __W,
1165                                                 (__mmask8) __U);
1166 }
1167
1168 extern __inline __m256
1169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1171 {
1172   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1173                                                 (__v8sf) __B,
1174                                                 (__v8sf)
1175                                                 _mm256_setzero_ps (),
1176                                                 (__mmask8) __U);
1177 }
1178
1179 extern __inline __m128
1180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1181 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1182 {
1183   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1184                                                 (__v4sf) __B,
1185                                                 (__v4sf) __W,
1186                                                 (__mmask8) __U);
1187 }
1188
1189 extern __inline __m128
1190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1191 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1192 {
1193   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1194                                                 (__v4sf) __B,
1195                                                 (__v4sf)
1196                                                 _mm_setzero_ps (),
1197                                                 (__mmask8) __U);
1198 }
1199
1200 extern __inline __m256d
1201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1202 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1203 {
1204   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1205                                                 (__v4df) __B,
1206                                                 (__v4df) __W,
1207                                                 (__mmask8) __U);
1208 }
1209
1210 extern __inline __m256d
1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1213 {
1214   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1215                                                 (__v4df) __B,
1216                                                 (__v4df)
1217                                                 _mm256_setzero_pd (),
1218                                                 (__mmask8) __U);
1219 }
1220
1221 extern __inline __m128d
1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1224 {
1225   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1226                                                 (__v2df) __B,
1227                                                 (__v2df) __W,
1228                                                 (__mmask8) __U);
1229 }
1230
1231 extern __inline __m128d
1232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1233 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1234 {
1235   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1236                                                 (__v2df) __B,
1237                                                 (__v2df)
1238                                                 _mm_setzero_pd (),
1239                                                 (__mmask8) __U);
1240 }
1241
1242 extern __inline __m256
1243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1245 {
1246   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1247                                                (__v8sf) __B,
1248                                                (__v8sf) __W,
1249                                                (__mmask8) __U);
1250 }
1251
1252 extern __inline __m256
1253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1255 {
1256   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1257                                                (__v8sf) __B,
1258                                                (__v8sf)
1259                                                _mm256_setzero_ps (),
1260                                                (__mmask8) __U);
1261 }
1262
1263 extern __inline __m128
1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1266 {
1267   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1268                                                (__v4sf) __B,
1269                                                (__v4sf) __W,
1270                                                (__mmask8) __U);
1271 }
1272
1273 extern __inline __m128
1274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1276 {
1277   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1278                                                (__v4sf) __B,
1279                                                (__v4sf)
1280                                                _mm_setzero_ps (),
1281                                                (__mmask8) __U);
1282 }
1283
1284 extern __inline __m128i
1285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1286 _mm_movm_epi32 (__mmask8 __A)
1287 {
1288   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1289 }
1290
1291 extern __inline __m256i
1292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1293 _mm256_movm_epi32 (__mmask8 __A)
1294 {
1295   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1296 }
1297
1298 extern __inline __m128i
1299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1300 _mm_movm_epi64 (__mmask8 __A)
1301 {
1302   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1303 }
1304
1305 extern __inline __m256i
1306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307 _mm256_movm_epi64 (__mmask8 __A)
1308 {
1309   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1310 }
1311
1312 extern __inline __mmask8
1313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 _mm_movepi32_mask (__m128i __A)
1315 {
1316   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1317 }
1318
1319 extern __inline __mmask8
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 _mm256_movepi32_mask (__m256i __A)
1322 {
1323   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1324 }
1325
1326 extern __inline __mmask8
1327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1328 _mm_movepi64_mask (__m128i __A)
1329 {
1330   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1331 }
1332
1333 extern __inline __mmask8
1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1335 _mm256_movepi64_mask (__m256i __A)
1336 {
1337   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1338 }
1339
1340 #ifdef __OPTIMIZE__
1341 extern __inline __m128d
1342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1343 _mm256_extractf64x2_pd (__m256d __A, const int __imm)
1344 {
1345   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1346                                                          __imm,
1347                                                          (__v2df)
1348                                                          _mm_setzero_pd (),
1349                                                          (__mmask8) -1);
1350 }
1351
1352 extern __inline __m128d
1353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1354 _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1355                              const int __imm)
1356 {
1357   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1358                                                          __imm,
1359                                                          (__v2df) __W,
1360                                                          (__mmask8)
1361                                                          __U);
1362 }
1363
1364 extern __inline __m128d
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1367                               const int __imm)
1368 {
1369   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1370                                                          __imm,
1371                                                          (__v2df)
1372                                                          _mm_setzero_pd (),
1373                                                          (__mmask8)
1374                                                          __U);
1375 }
1376
1377 extern __inline __m128i
1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379 _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1380 {
1381   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1382                                                          __imm,
1383                                                          (__v2di)
1384                                                          _mm_setzero_di (),
1385                                                          (__mmask8) -1);
1386 }
1387
1388 extern __inline __m128i
1389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390 _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1391                                 const int __imm)
1392 {
1393   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1394                                                          __imm,
1395                                                          (__v2di) __W,
1396                                                          (__mmask8)
1397                                                          __U);
1398 }
1399
1400 extern __inline __m128i
1401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1402 _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1403                                  const int __imm)
1404 {
1405   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1406                                                          __imm,
1407                                                          (__v2di)
1408                                                          _mm_setzero_di (),
1409                                                          (__mmask8)
1410                                                          __U);
1411 }
1412
1413 extern __inline __m256d
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm256_reduce_pd (__m256d __A, int __B)
1416 {
1417   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1418                                                     (__v4df)
1419                                                     _mm256_setzero_pd (),
1420                                                     (__mmask8) -1);
1421 }
1422
1423 extern __inline __m256d
1424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1425 _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1426 {
1427   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1428                                                     (__v4df) __W,
1429                                                     (__mmask8) __U);
1430 }
1431
1432 extern __inline __m256d
1433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1434 _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1435 {
1436   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1437                                                     (__v4df)
1438                                                     _mm256_setzero_pd (),
1439                                                     (__mmask8) __U);
1440 }
1441
1442 extern __inline __m128d
1443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1444 _mm_reduce_pd (__m128d __A, int __B)
1445 {
1446   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1447                                                     (__v2df)
1448                                                     _mm_setzero_pd (),
1449                                                     (__mmask8) -1);
1450 }
1451
1452 extern __inline __m128d
1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454 _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1455 {
1456   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1457                                                     (__v2df) __W,
1458                                                     (__mmask8) __U);
1459 }
1460
1461 extern __inline __m128d
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1464 {
1465   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1466                                                     (__v2df)
1467                                                     _mm_setzero_pd (),
1468                                                     (__mmask8) __U);
1469 }
1470
1471 extern __inline __m256
1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 _mm256_reduce_ps (__m256 __A, int __B)
1474 {
1475   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1476                                                    (__v8sf)
1477                                                    _mm256_setzero_ps (),
1478                                                    (__mmask8) -1);
1479 }
1480
1481 extern __inline __m256
1482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483 _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1484 {
1485   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1486                                                    (__v8sf) __W,
1487                                                    (__mmask8) __U);
1488 }
1489
1490 extern __inline __m256
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1493 {
1494   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1495                                                    (__v8sf)
1496                                                    _mm256_setzero_ps (),
1497                                                    (__mmask8) __U);
1498 }
1499
1500 extern __inline __m128
1501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1502 _mm_reduce_ps (__m128 __A, int __B)
1503 {
1504   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1505                                                    (__v4sf)
1506                                                    _mm_setzero_ps (),
1507                                                    (__mmask8) -1);
1508 }
1509
1510 extern __inline __m128
1511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1512 _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1513 {
1514   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1515                                                    (__v4sf) __W,
1516                                                    (__mmask8) __U);
1517 }
1518
1519 extern __inline __m128
1520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1521 _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1522 {
1523   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1524                                                    (__v4sf)
1525                                                    _mm_setzero_ps (),
1526                                                    (__mmask8) __U);
1527 }
1528
1529 extern __inline __m256d
1530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1531 _mm256_range_pd (__m256d __A, __m256d __B, int __C)
1532 {
1533   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1534                                                    (__v4df) __B, __C,
1535                                                    (__v4df)
1536                                                    _mm256_setzero_pd (),
1537                                                    (__mmask8) -1);
1538 }
1539
1540 extern __inline __m256d
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1543                       __m256d __A, __m256d __B, int __C)
1544 {
1545   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1546                                                    (__v4df) __B, __C,
1547                                                    (__v4df) __W,
1548                                                    (__mmask8) __U);
1549 }
1550
1551 extern __inline __m256d
1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1554 {
1555   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1556                                                    (__v4df) __B, __C,
1557                                                    (__v4df)
1558                                                    _mm256_setzero_pd (),
1559                                                    (__mmask8) __U);
1560 }
1561
1562 extern __inline __m128d
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 _mm_range_pd (__m128d __A, __m128d __B, int __C)
1565 {
1566   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1567                                                    (__v2df) __B, __C,
1568                                                    (__v2df)
1569                                                    _mm_setzero_pd (),
1570                                                    (__mmask8) -1);
1571 }
1572
1573 extern __inline __m128d
1574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575 _mm_mask_range_pd (__m128d __W, __mmask8 __U,
1576                    __m128d __A, __m128d __B, int __C)
1577 {
1578   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1579                                                    (__v2df) __B, __C,
1580                                                    (__v2df) __W,
1581                                                    (__mmask8) __U);
1582 }
1583
1584 extern __inline __m128d
1585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1586 _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1587 {
1588   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1589                                                    (__v2df) __B, __C,
1590                                                    (__v2df)
1591                                                    _mm_setzero_pd (),
1592                                                    (__mmask8) __U);
1593 }
1594
1595 extern __inline __m256
1596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1597 _mm256_range_ps (__m256 __A, __m256 __B, int __C)
1598 {
1599   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1600                                                   (__v8sf) __B, __C,
1601                                                   (__v8sf)
1602                                                   _mm256_setzero_ps (),
1603                                                   (__mmask8) -1);
1604 }
1605
1606 extern __inline __m256
1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1608 _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1609                       int __C)
1610 {
1611   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1612                                                   (__v8sf) __B, __C,
1613                                                   (__v8sf) __W,
1614                                                   (__mmask8) __U);
1615 }
1616
1617 extern __inline __m256
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1620 {
1621   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1622                                                   (__v8sf) __B, __C,
1623                                                   (__v8sf)
1624                                                   _mm256_setzero_ps (),
1625                                                   (__mmask8) __U);
1626 }
1627
1628 extern __inline __m128
1629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630 _mm_range_ps (__m128 __A, __m128 __B, int __C)
1631 {
1632   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1633                                                   (__v4sf) __B, __C,
1634                                                   (__v4sf)
1635                                                   _mm_setzero_ps (),
1636                                                   (__mmask8) -1);
1637 }
1638
1639 extern __inline __m128
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm_mask_range_ps (__m128 __W, __mmask8 __U,
1642                    __m128 __A, __m128 __B, int __C)
1643 {
1644   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1645                                                   (__v4sf) __B, __C,
1646                                                   (__v4sf) __W,
1647                                                   (__mmask8) __U);
1648 }
1649
1650 extern __inline __m128
1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1653 {
1654   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1655                                                   (__v4sf) __B, __C,
1656                                                   (__v4sf)
1657                                                   _mm_setzero_ps (),
1658                                                   (__mmask8) __U);
1659 }
1660
1661 extern __inline __mmask8
1662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1663 _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1664                              const int __imm)
1665 {
1666   return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1667                                                       __imm, __U);
1668 }
1669
1670 extern __inline __mmask8
1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672 _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1673 {
1674   return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1675                                                       __imm,
1676                                                       (__mmask8) -1);
1677 }
1678
1679 extern __inline __mmask8
1680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1681 _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1682 {
1683   return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1684                                                       __imm, __U);
1685 }
1686
1687 extern __inline __mmask8
1688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1689 _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1690 {
1691   return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1692                                                       __imm,
1693                                                       (__mmask8) -1);
1694 }
1695
1696 extern __inline __mmask8
1697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698 _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1699 {
1700   return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1701                                                       __imm, __U);
1702 }
1703
1704 extern __inline __mmask8
1705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1706 _mm_fpclass_pd_mask (__m128d __A, const int __imm)
1707 {
1708   return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1709                                                       __imm,
1710                                                       (__mmask8) -1);
1711 }
1712
1713 extern __inline __mmask8
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1716 {
1717   return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1718                                                       __imm, __U);
1719 }
1720
1721 extern __inline __mmask8
1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723 _mm_fpclass_ps_mask (__m128 __A, const int __imm)
1724 {
1725   return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1726                                                       __imm,
1727                                                       (__mmask8) -1);
1728 }
1729
1730 extern __inline __m256i
1731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732 _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1733 {
1734   return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1735                                                         (__v2di) __B,
1736                                                         __imm,
1737                                                         (__v4di)
1738                                                         _mm256_setzero_si256 (),
1739                                                         (__mmask8) -1);
1740 }
1741
1742 extern __inline __m256i
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1745                          __m128i __B, const int __imm)
1746 {
1747   return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1748                                                         (__v2di) __B,
1749                                                         __imm,
1750                                                         (__v4di) __W,
1751                                                         (__mmask8)
1752                                                         __U);
1753 }
1754
1755 extern __inline __m256i
1756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1757 _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1758                           const int __imm)
1759 {
1760   return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1761                                                         (__v2di) __B,
1762                                                         __imm,
1763                                                         (__v4di)
1764                                                         _mm256_setzero_si256 (),
1765                                                         (__mmask8)
1766                                                         __U);
1767 }
1768
1769 extern __inline __m256d
1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1771 _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1772 {
1773   return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1774                                                         (__v2df) __B,
1775                                                         __imm,
1776                                                         (__v4df)
1777                                                         _mm256_setzero_pd (),
1778                                                         (__mmask8) -1);
1779 }
1780
1781 extern __inline __m256d
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1784                          __m128d __B, const int __imm)
1785 {
1786   return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1787                                                         (__v2df) __B,
1788                                                         __imm,
1789                                                         (__v4df) __W,
1790                                                         (__mmask8)
1791                                                         __U);
1792 }
1793
1794 extern __inline __m256d
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1797                           const int __imm)
1798 {
1799   return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1800                                                         (__v2df) __B,
1801                                                         __imm,
1802                                                         (__v4df)
1803                                                         _mm256_setzero_pd (),
1804                                                         (__mmask8)
1805                                                         __U);
1806 }
1807
1808 #else
1809 #define _mm256_insertf64x2(X, Y, C)                                     \
1810   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1811     (__v2df)(__m128d) (Y), (int) (C),                                   \
1812     (__v4df)(__m256d)_mm256_setzero_pd(),                               \
1813     (__mmask8)-1))
1814
1815 #define _mm256_mask_insertf64x2(W, U, X, Y, C)                          \
1816   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1817     (__v2df)(__m128d) (Y), (int) (C),                                   \
1818     (__v4df)(__m256d)(W),                                               \
1819     (__mmask8)(U)))
1820
1821 #define _mm256_maskz_insertf64x2(U, X, Y, C)                            \
1822   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1823     (__v2df)(__m128d) (Y), (int) (C),                                   \
1824     (__v4df)(__m256d)_mm256_setzero_pd(),                               \
1825     (__mmask8)(U)))
1826
1827 #define _mm256_inserti64x2(X, Y, C)                                     \
1828   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1829     (__v2di)(__m128i) (Y), (int) (C),                                   \
1830     (__v4di)(__m256i)_mm256_setzero_si256 (),                           \
1831     (__mmask8)-1))
1832
1833 #define _mm256_mask_inserti64x2(W, U, X, Y, C)                          \
1834   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1835     (__v2di)(__m128i) (Y), (int) (C),                                   \
1836     (__v4di)(__m256i)(W),                                               \
1837     (__mmask8)(U)))
1838
1839 #define _mm256_maskz_inserti64x2(U, X, Y, C)                            \
1840   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1841     (__v2di)(__m128i) (Y), (int) (C),                                   \
1842     (__v4di)(__m256i)_mm256_setzero_si256 (),                           \
1843     (__mmask8)(U)))
1844
1845 #define _mm256_extractf64x2_pd(X, C)                                    \
1846   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1847     (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1848
1849 #define _mm256_mask_extractf64x2_pd(W, U, X, C)                         \
1850   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1851     (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1852
1853 #define _mm256_maskz_extractf64x2_pd(U, X, C)                           \
1854   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1855     (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1856
1857 #define _mm256_extracti64x2_epi64(X, C)                                 \
1858   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1859     (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1860
1861 #define _mm256_mask_extracti64x2_epi64(W, U, X, C)                     \
1862   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1863     (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1864
1865 #define _mm256_maskz_extracti64x2_epi64(U, X, C)                        \
1866   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1867     (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1868
1869 #define _mm256_reduce_pd(A, B)                                          \
1870   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),     \
1871     (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1872
1873 #define _mm256_mask_reduce_pd(W, U, A, B)                               \
1874   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),     \
1875     (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1876
1877 #define _mm256_maskz_reduce_pd(U, A, B)                                 \
1878   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),     \
1879     (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1880
1881 #define _mm_reduce_pd(A, B)                                             \
1882   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),     \
1883     (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1884
1885 #define _mm_mask_reduce_pd(W, U, A, B)                                  \
1886   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),     \
1887     (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1888
1889 #define _mm_maskz_reduce_pd(U, A, B)                                    \
1890   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),     \
1891     (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1892
1893 #define _mm256_reduce_ps(A, B)                                          \
1894   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),       \
1895     (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1896
1897 #define _mm256_mask_reduce_ps(W, U, A, B)                               \
1898   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),       \
1899     (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1900
1901 #define _mm256_maskz_reduce_ps(U, A, B)                                 \
1902   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),       \
1903     (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1904
1905 #define _mm_reduce_ps(A, B)                                             \
1906   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),       \
1907     (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1908
1909 #define _mm_mask_reduce_ps(W, U, A, B)                                  \
1910   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),       \
1911     (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1912
1913 #define _mm_maskz_reduce_ps(U, A, B)                                    \
1914   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),       \
1915     (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1916
1917 #define _mm256_range_pd(A, B, C)                                        \
1918   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),      \
1919     (__v4df)(__m256d)(B), (int)(C),                                     \
1920     (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1921
1922 #define _mm256_maskz_range_pd(U, A, B, C)                               \
1923   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),      \
1924     (__v4df)(__m256d)(B), (int)(C),                                     \
1925     (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1926
1927 #define _mm_range_pd(A, B, C)                                           \
1928   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),      \
1929     (__v2df)(__m128d)(B), (int)(C),                                     \
1930     (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1931
1932 #define _mm256_range_ps(A, B, C)                                        \
1933   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),        \
1934     (__v8sf)(__m256)(B), (int)(C),                                      \
1935     (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1936
1937 #define _mm256_mask_range_ps(W, U, A, B, C)                             \
1938   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),        \
1939     (__v8sf)(__m256)(B), (int)(C),                                      \
1940     (__v8sf)(__m256)(W), (__mmask8)(U)))
1941
1942 #define _mm256_maskz_range_ps(U, A, B, C)                               \
1943   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),        \
1944     (__v8sf)(__m256)(B), (int)(C),                                      \
1945     (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1946
1947 #define _mm_range_ps(A, B, C)                                           \
1948   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),        \
1949     (__v4sf)(__m128)(B), (int)(C),                                      \
1950     (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1951
1952 #define _mm_mask_range_ps(W, U, A, B, C)                                \
1953   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),        \
1954     (__v4sf)(__m128)(B), (int)(C),                                      \
1955     (__v4sf)(__m128)(W), (__mmask8)(U)))
1956
1957 #define _mm_maskz_range_ps(U, A, B, C)                                  \
1958   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),        \
1959     (__v4sf)(__m128)(B), (int)(C),                                      \
1960     (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1961
1962 #define _mm256_mask_range_pd(W, U, A, B, C)                             \
1963   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),      \
1964     (__v4df)(__m256d)(B), (int)(C),                                     \
1965     (__v4df)(__m256d)(W), (__mmask8)(U)))
1966
1967 #define _mm_mask_range_pd(W, U, A, B, C)                                \
1968   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),      \
1969     (__v2df)(__m128d)(B), (int)(C),                                     \
1970     (__v2df)(__m128d)(W), (__mmask8)(U)))
1971
1972 #define _mm_maskz_range_pd(U, A, B, C)                                  \
1973   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),      \
1974     (__v2df)(__m128d)(B), (int)(C),                                     \
1975     (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1976
1977 #define _mm256_mask_fpclass_pd_mask(u, X, C)                            \
1978   ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1979                                                 (int) (C),(__mmask8)(u)))
1980
1981 #define _mm256_mask_fpclass_ps_mask(u, X, C)                            \
1982   ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
1983                                                 (int) (C),(__mmask8)(u)))
1984
1985 #define _mm_mask_fpclass_pd_mask(u, X, C)                               \
1986   ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
1987                                                 (int) (C),(__mmask8)(u)))
1988
1989 #define _mm_mask_fpclass_ps_mask(u, X, C)                               \
1990   ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
1991                                                 (int) (C),(__mmask8)(u)))
1992
1993 #define _mm256_fpclass_pd_mask(X, C)                                    \
1994   ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1995                                                 (int) (C),(__mmask8)-1))
1996
1997 #define _mm256_fpclass_ps_mask(X, C)                                    \
1998   ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
1999                                                 (int) (C),(__mmask8)-1))
2000
2001 #define _mm_fpclass_pd_mask(X, C)                                       \
2002   ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2003                                                 (int) (C),(__mmask8)-1))
2004
2005 #define _mm_fpclass_ps_mask(X, C)                                       \
2006   ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
2007                                                 (int) (C),(__mmask8)-1))
2008
2009 #endif
2010
2011 #ifdef __DISABLE_AVX512VLDQ__
2012 #undef __DISABLE_AVX512VLDQ__
2013 #pragma GCC pop_options
2014 #endif /* __DISABLE_AVX512VLDQ__ */
2015
2016 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */