Import pre-release gcc-5.0 to new vendor branch
[dragonfly.git] / contrib / gcc-5.0 / gcc / config / i386 / avx512vldqintrin.h
1 /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
29 #define _AVX512VLDQINTRIN_H_INCLUDED
30
31 #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512vl,avx512dq")
34 #define __DISABLE_AVX512VLDQ__
35 #endif /* __AVX512VLDQ__ */
36
37 extern __inline __m256i
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 _mm256_cvttpd_epi64 (__m256d __A)
40 {
41   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
42                                                      (__v4di)
43                                                      _mm256_setzero_si256 (),
44                                                      (__mmask8) -1);
45 }
46
47 extern __inline __m256i
48 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
50 {
51   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
52                                                      (__v4di) __W,
53                                                      (__mmask8) __U);
54 }
55
56 extern __inline __m256i
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
59 {
60   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
61                                                      (__v4di)
62                                                      _mm256_setzero_si256 (),
63                                                      (__mmask8) __U);
64 }
65
66 extern __inline __m128i
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 _mm_cvttpd_epi64 (__m128d __A)
69 {
70   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
71                                                      (__v2di)
72                                                      _mm_setzero_di (),
73                                                      (__mmask8) -1);
74 }
75
76 extern __inline __m128i
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
79 {
80   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
81                                                      (__v2di) __W,
82                                                      (__mmask8) __U);
83 }
84
85 extern __inline __m128i
86 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
88 {
89   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
90                                                      (__v2di)
91                                                      _mm_setzero_si128 (),
92                                                      (__mmask8) __U);
93 }
94
95 extern __inline __m256i
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_cvttpd_epu64 (__m256d __A)
98 {
99   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
100                                                       (__v4di)
101                                                       _mm256_setzero_si256 (),
102                                                       (__mmask8) -1);
103 }
104
105 extern __inline __m256i
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
108 {
109   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
110                                                       (__v4di) __W,
111                                                       (__mmask8) __U);
112 }
113
114 extern __inline __m256i
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
117 {
118   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
119                                                       (__v4di)
120                                                       _mm256_setzero_si256 (),
121                                                       (__mmask8) __U);
122 }
123
124 extern __inline __m128i
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm_cvttpd_epu64 (__m128d __A)
127 {
128   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
129                                                       (__v2di)
130                                                       _mm_setzero_di (),
131                                                       (__mmask8) -1);
132 }
133
134 extern __inline __m128i
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
137 {
138   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
139                                                       (__v2di) __W,
140                                                       (__mmask8) __U);
141 }
142
143 extern __inline __m128i
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
146 {
147   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
148                                                       (__v2di)
149                                                       _mm_setzero_si128 (),
150                                                       (__mmask8) __U);
151 }
152
153 extern __inline __m256i
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 _mm256_cvtpd_epi64 (__m256d __A)
156 {
157   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
158                                                     (__v4di)
159                                                     _mm256_setzero_si256 (),
160                                                     (__mmask8) -1);
161 }
162
163 extern __inline __m256i
164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
166 {
167   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
168                                                     (__v4di) __W,
169                                                     (__mmask8) __U);
170 }
171
172 extern __inline __m256i
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
175 {
176   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
177                                                     (__v4di)
178                                                     _mm256_setzero_si256 (),
179                                                     (__mmask8) __U);
180 }
181
182 extern __inline __m128i
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 _mm_cvtpd_epi64 (__m128d __A)
185 {
186   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
187                                                     (__v2di)
188                                                     _mm_setzero_di (),
189                                                     (__mmask8) -1);
190 }
191
192 extern __inline __m128i
193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
195 {
196   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
197                                                     (__v2di) __W,
198                                                     (__mmask8) __U);
199 }
200
201 extern __inline __m128i
202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
204 {
205   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
206                                                     (__v2di)
207                                                     _mm_setzero_si128 (),
208                                                     (__mmask8) __U);
209 }
210
211 extern __inline __m256i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm256_cvtpd_epu64 (__m256d __A)
214 {
215   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
216                                                      (__v4di)
217                                                      _mm256_setzero_si256 (),
218                                                      (__mmask8) -1);
219 }
220
221 extern __inline __m256i
222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
224 {
225   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
226                                                      (__v4di) __W,
227                                                      (__mmask8) __U);
228 }
229
230 extern __inline __m256i
231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
233 {
234   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
235                                                      (__v4di)
236                                                      _mm256_setzero_si256 (),
237                                                      (__mmask8) __U);
238 }
239
240 extern __inline __m128i
241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
242 _mm_cvtpd_epu64 (__m128d __A)
243 {
244   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
245                                                      (__v2di)
246                                                      _mm_setzero_di (),
247                                                      (__mmask8) -1);
248 }
249
250 extern __inline __m128i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
253 {
254   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
255                                                      (__v2di) __W,
256                                                      (__mmask8) __U);
257 }
258
259 extern __inline __m128i
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
262 {
263   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
264                                                      (__v2di)
265                                                      _mm_setzero_si128 (),
266                                                      (__mmask8) __U);
267 }
268
269 extern __inline __m256i
270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 _mm256_cvttps_epi64 (__m128 __A)
272 {
273   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
274                                                      (__v4di)
275                                                      _mm256_setzero_si256 (),
276                                                      (__mmask8) -1);
277 }
278
279 extern __inline __m256i
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
282 {
283   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
284                                                      (__v4di) __W,
285                                                      (__mmask8) __U);
286 }
287
288 extern __inline __m256i
289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
291 {
292   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
293                                                      (__v4di)
294                                                      _mm256_setzero_si256 (),
295                                                      (__mmask8) __U);
296 }
297
298 extern __inline __m128i
299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_cvttps_epi64 (__m128 __A)
301 {
302   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
303                                                      (__v2di)
304                                                      _mm_setzero_di (),
305                                                      (__mmask8) -1);
306 }
307
308 extern __inline __m128i
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
311 {
312   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
313                                                      (__v2di) __W,
314                                                      (__mmask8) __U);
315 }
316
317 extern __inline __m128i
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
320 {
321   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
322                                                      (__v2di)
323                                                      _mm_setzero_di (),
324                                                      (__mmask8) __U);
325 }
326
327 extern __inline __m256i
328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329 _mm256_cvttps_epu64 (__m128 __A)
330 {
331   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
332                                                       (__v4di)
333                                                       _mm256_setzero_si256 (),
334                                                       (__mmask8) -1);
335 }
336
337 extern __inline __m256i
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
340 {
341   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
342                                                       (__v4di) __W,
343                                                       (__mmask8) __U);
344 }
345
346 extern __inline __m256i
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
349 {
350   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
351                                                       (__v4di)
352                                                       _mm256_setzero_si256 (),
353                                                       (__mmask8) __U);
354 }
355
356 extern __inline __m128i
357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
358 _mm_cvttps_epu64 (__m128 __A)
359 {
360   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
361                                                       (__v2di)
362                                                       _mm_setzero_di (),
363                                                       (__mmask8) -1);
364 }
365
366 extern __inline __m128i
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
369 {
370   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
371                                                       (__v2di) __W,
372                                                       (__mmask8) __U);
373 }
374
375 extern __inline __m128i
376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
378 {
379   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
380                                                       (__v2di)
381                                                       _mm_setzero_di (),
382                                                       (__mmask8) __U);
383 }
384
385 extern __inline __m256d
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 _mm256_broadcast_f64x2 (__m128d __A)
388 {
389   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
390                                                            __A,
391                                                            (__v4df)_mm256_undefined_pd(),
392                                                            (__mmask8) -
393                                                            1);
394 }
395
396 extern __inline __m256d
397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
399 {
400   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
401                                                            __A,
402                                                            (__v4df)
403                                                            __O, __M);
404 }
405
406 extern __inline __m256d
407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
409 {
410   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
411                                                            __A,
412                                                            (__v4df)
413                                                            _mm256_setzero_ps (),
414                                                            __M);
415 }
416
417 extern __inline __m256i
418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419 _mm256_broadcast_i64x2 (__m128i __A)
420 {
421   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
422                                                            __A,
423                                                            (__v4di)_mm256_undefined_si256(),
424                                                            (__mmask8) -
425                                                            1);
426 }
427
428 extern __inline __m256i
429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
430 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
431 {
432   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
433                                                            __A,
434                                                            (__v4di)
435                                                            __O, __M);
436 }
437
438 extern __inline __m256i
439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
441 {
442   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
443                                                            __A,
444                                                            (__v4di)
445                                                            _mm256_setzero_si256 (),
446                                                            __M);
447 }
448
449 extern __inline __m256
450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 _mm256_broadcast_f32x2 (__m128 __A)
452 {
453   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
454                                                           (__v8sf)_mm256_undefined_ps(),
455                                                           (__mmask8) -
456                                                           1);
457 }
458
459 extern __inline __m256
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
462 {
463   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
464                                                           (__v8sf) __O,
465                                                           __M);
466 }
467
468 extern __inline __m256
469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
471 {
472   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
473                                                           (__v8sf)
474                                                           _mm256_setzero_ps (),
475                                                           __M);
476 }
477
478 extern __inline __m256i
479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480 _mm256_broadcast_i32x2 (__m128i __A)
481 {
482   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
483                                                            __A,
484                                                           (__v8si)_mm256_undefined_si256(),
485                                                            (__mmask8) -
486                                                            1);
487 }
488
489 extern __inline __m256i
490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
491 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
492 {
493   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
494                                                            __A,
495                                                            (__v8si)
496                                                            __O, __M);
497 }
498
499 extern __inline __m256i
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
502 {
503   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
504                                                            __A,
505                                                            (__v8si)
506                                                            _mm256_setzero_si256 (),
507                                                            __M);
508 }
509
510 extern __inline __m128i
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm_broadcast_i32x2 (__m128i __A)
513 {
514   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
515                                                            __A,
516                                                           (__v4si)_mm_undefined_si128(),
517                                                            (__mmask8) -
518                                                            1);
519 }
520
521 extern __inline __m128i
522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
523 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
524 {
525   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
526                                                            __A,
527                                                            (__v4si)
528                                                            __O, __M);
529 }
530
531 extern __inline __m128i
532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
534 {
535   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
536                                                            __A,
537                                                            (__v4si)
538                                                            _mm_setzero_si128 (),
539                                                            __M);
540 }
541
542 extern __inline __m256i
543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
544 _mm256_mullo_epi64 (__m256i __A, __m256i __B)
545 {
546   return (__m256i) ((__v4du) __A * (__v4du) __B);
547 }
548
549 extern __inline __m256i
550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
552                          __m256i __B)
553 {
554   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
555                                                   (__v4di) __B,
556                                                   (__v4di) __W,
557                                                   (__mmask8) __U);
558 }
559
560 extern __inline __m256i
561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
562 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
563 {
564   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
565                                                   (__v4di) __B,
566                                                   (__v4di)
567                                                   _mm256_setzero_si256 (),
568                                                   (__mmask8) __U);
569 }
570
571 extern __inline __m128i
572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573 _mm_mullo_epi64 (__m128i __A, __m128i __B)
574 {
575   return (__m128i) ((__v2du) __A * (__v2du) __B);
576 }
577
578 extern __inline __m128i
579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
581                       __m128i __B)
582 {
583   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
584                                                   (__v2di) __B,
585                                                   (__v2di) __W,
586                                                   (__mmask8) __U);
587 }
588
589 extern __inline __m128i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
592 {
593   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
594                                                   (__v2di) __B,
595                                                   (__v2di)
596                                                   _mm_setzero_di (),
597                                                   (__mmask8) __U);
598 }
599
600 extern __inline __m256d
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
603                        __m256d __B)
604 {
605   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
606                                                   (__v4df) __B,
607                                                   (__v4df) __W,
608                                                   (__mmask8) __U);
609 }
610
611 extern __inline __m256d
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
614 {
615   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
616                                                   (__v4df) __B,
617                                                   (__v4df)
618                                                   _mm256_setzero_pd (),
619                                                   (__mmask8) __U);
620 }
621
622 extern __inline __m128d
623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
624 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
625                     __m128d __B)
626 {
627   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
628                                                   (__v2df) __B,
629                                                   (__v2df) __W,
630                                                   (__mmask8) __U);
631 }
632
633 extern __inline __m128d
634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
636 {
637   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
638                                                   (__v2df) __B,
639                                                   (__v2df)
640                                                   _mm_setzero_pd (),
641                                                   (__mmask8) __U);
642 }
643
644 extern __inline __m256
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
647                        __m256 __B)
648 {
649   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
650                                                  (__v8sf) __B,
651                                                  (__v8sf) __W,
652                                                  (__mmask8) __U);
653 }
654
655 extern __inline __m256
656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
658 {
659   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
660                                                  (__v8sf) __B,
661                                                  (__v8sf)
662                                                  _mm256_setzero_ps (),
663                                                  (__mmask8) __U);
664 }
665
666 extern __inline __m128
667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
669 {
670   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
671                                                  (__v4sf) __B,
672                                                  (__v4sf) __W,
673                                                  (__mmask8) __U);
674 }
675
676 extern __inline __m128
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
679 {
680   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
681                                                  (__v4sf) __B,
682                                                  (__v4sf)
683                                                  _mm_setzero_ps (),
684                                                  (__mmask8) __U);
685 }
686
687 extern __inline __m256i
688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689 _mm256_cvtps_epi64 (__m128 __A)
690 {
691   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
692                                                     (__v4di)
693                                                     _mm256_setzero_si256 (),
694                                                     (__mmask8) -1);
695 }
696
697 extern __inline __m256i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
700 {
701   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
702                                                     (__v4di) __W,
703                                                     (__mmask8) __U);
704 }
705
706 extern __inline __m256i
707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
708 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
709 {
710   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
711                                                     (__v4di)
712                                                     _mm256_setzero_si256 (),
713                                                     (__mmask8) __U);
714 }
715
716 extern __inline __m128i
717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718 _mm_cvtps_epi64 (__m128 __A)
719 {
720   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
721                                                     (__v2di)
722                                                     _mm_setzero_di (),
723                                                     (__mmask8) -1);
724 }
725
726 extern __inline __m128i
727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
729 {
730   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
731                                                     (__v2di) __W,
732                                                     (__mmask8) __U);
733 }
734
735 extern __inline __m128i
736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
738 {
739   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
740                                                     (__v2di)
741                                                     _mm_setzero_di (),
742                                                     (__mmask8) __U);
743 }
744
745 extern __inline __m256i
746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
747 _mm256_cvtps_epu64 (__m128 __A)
748 {
749   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
750                                                      (__v4di)
751                                                      _mm256_setzero_si256 (),
752                                                      (__mmask8) -1);
753 }
754
755 extern __inline __m256i
756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
757 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
758 {
759   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
760                                                      (__v4di) __W,
761                                                      (__mmask8) __U);
762 }
763
764 extern __inline __m256i
765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
767 {
768   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
769                                                      (__v4di)
770                                                      _mm256_setzero_si256 (),
771                                                      (__mmask8) __U);
772 }
773
774 extern __inline __m128i
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm_cvtps_epu64 (__m128 __A)
777 {
778   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
779                                                      (__v2di)
780                                                      _mm_setzero_di (),
781                                                      (__mmask8) -1);
782 }
783
784 extern __inline __m128i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
787 {
788   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
789                                                      (__v2di) __W,
790                                                      (__mmask8) __U);
791 }
792
793 extern __inline __m128i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
796 {
797   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
798                                                      (__v2di)
799                                                      _mm_setzero_di (),
800                                                      (__mmask8) __U);
801 }
802
803 extern __inline __m128
804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805 _mm256_cvtepi64_ps (__m256i __A)
806 {
807   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
808                                                    (__v4sf)
809                                                    _mm_setzero_ps (),
810                                                    (__mmask8) -1);
811 }
812
813 extern __inline __m128
814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
816 {
817   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
818                                                    (__v4sf) __W,
819                                                    (__mmask8) __U);
820 }
821
822 extern __inline __m128
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
825 {
826   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
827                                                    (__v4sf)
828                                                    _mm_setzero_ps (),
829                                                    (__mmask8) __U);
830 }
831
832 extern __inline __m128
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm_cvtepi64_ps (__m128i __A)
835 {
836   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
837                                                    (__v4sf)
838                                                    _mm_setzero_ps (),
839                                                    (__mmask8) -1);
840 }
841
842 extern __inline __m128
843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
845 {
846   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
847                                                    (__v4sf) __W,
848                                                    (__mmask8) __U);
849 }
850
851 extern __inline __m128
852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
854 {
855   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
856                                                    (__v4sf)
857                                                    _mm_setzero_ps (),
858                                                    (__mmask8) __U);
859 }
860
861 extern __inline __m128
862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 _mm256_cvtepu64_ps (__m256i __A)
864 {
865   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
866                                                     (__v4sf)
867                                                     _mm_setzero_ps (),
868                                                     (__mmask8) -1);
869 }
870
871 extern __inline __m128
872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
874 {
875   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
876                                                     (__v4sf) __W,
877                                                     (__mmask8) __U);
878 }
879
880 extern __inline __m128
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
883 {
884   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
885                                                     (__v4sf)
886                                                     _mm_setzero_ps (),
887                                                     (__mmask8) __U);
888 }
889
890 extern __inline __m128
891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892 _mm_cvtepu64_ps (__m128i __A)
893 {
894   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
895                                                     (__v4sf)
896                                                     _mm_setzero_ps (),
897                                                     (__mmask8) -1);
898 }
899
900 extern __inline __m128
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
903 {
904   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
905                                                     (__v4sf) __W,
906                                                     (__mmask8) __U);
907 }
908
909 extern __inline __m128
910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
912 {
913   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
914                                                     (__v4sf)
915                                                     _mm_setzero_ps (),
916                                                     (__mmask8) __U);
917 }
918
919 extern __inline __m256d
920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 _mm256_cvtepi64_pd (__m256i __A)
922 {
923   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
924                                                     (__v4df)
925                                                     _mm256_setzero_pd (),
926                                                     (__mmask8) -1);
927 }
928
929 extern __inline __m256d
930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
932 {
933   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
934                                                     (__v4df) __W,
935                                                     (__mmask8) __U);
936 }
937
938 extern __inline __m256d
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
941 {
942   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
943                                                     (__v4df)
944                                                     _mm256_setzero_pd (),
945                                                     (__mmask8) __U);
946 }
947
948 extern __inline __m128d
949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950 _mm_cvtepi64_pd (__m128i __A)
951 {
952   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
953                                                     (__v2df)
954                                                     _mm_setzero_pd (),
955                                                     (__mmask8) -1);
956 }
957
958 extern __inline __m128d
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
961 {
962   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
963                                                     (__v2df) __W,
964                                                     (__mmask8) __U);
965 }
966
967 extern __inline __m128d
968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
970 {
971   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
972                                                     (__v2df)
973                                                     _mm_setzero_pd (),
974                                                     (__mmask8) __U);
975 }
976
977 extern __inline __m256d
978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979 _mm256_cvtepu64_pd (__m256i __A)
980 {
981   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
982                                                      (__v4df)
983                                                      _mm256_setzero_pd (),
984                                                      (__mmask8) -1);
985 }
986
987 extern __inline __m256d
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
990 {
991   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
992                                                      (__v4df) __W,
993                                                      (__mmask8) __U);
994 }
995
996 extern __inline __m256d
997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
998 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
999 {
1000   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
1001                                                      (__v4df)
1002                                                      _mm256_setzero_pd (),
1003                                                      (__mmask8) __U);
1004 }
1005
1006 extern __inline __m256d
1007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1009                     __m256d __B)
1010 {
1011   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1012                                                  (__v4df) __B,
1013                                                  (__v4df) __W,
1014                                                  (__mmask8) __U);
1015 }
1016
1017 extern __inline __m256d
1018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1019 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1020 {
1021   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1022                                                  (__v4df) __B,
1023                                                  (__v4df)
1024                                                  _mm256_setzero_pd (),
1025                                                  (__mmask8) __U);
1026 }
1027
1028 extern __inline __m128d
1029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1030 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1031 {
1032   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1033                                                  (__v2df) __B,
1034                                                  (__v2df) __W,
1035                                                  (__mmask8) __U);
1036 }
1037
1038 extern __inline __m128d
1039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1041 {
1042   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1043                                                  (__v2df) __B,
1044                                                  (__v2df)
1045                                                  _mm_setzero_pd (),
1046                                                  (__mmask8) __U);
1047 }
1048
1049 extern __inline __m256
1050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1052 {
1053   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1054                                                 (__v8sf) __B,
1055                                                 (__v8sf) __W,
1056                                                 (__mmask8) __U);
1057 }
1058
1059 extern __inline __m256
1060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1061 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1062 {
1063   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1064                                                 (__v8sf) __B,
1065                                                 (__v8sf)
1066                                                 _mm256_setzero_ps (),
1067                                                 (__mmask8) __U);
1068 }
1069
1070 extern __inline __m128
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1073 {
1074   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1075                                                 (__v4sf) __B,
1076                                                 (__v4sf) __W,
1077                                                 (__mmask8) __U);
1078 }
1079
1080 extern __inline __m128
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1083 {
1084   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1085                                                 (__v4sf) __B,
1086                                                 (__v4sf)
1087                                                 _mm_setzero_ps (),
1088                                                 (__mmask8) __U);
1089 }
1090
1091 extern __inline __m128d
1092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093 _mm_cvtepu64_pd (__m128i __A)
1094 {
1095   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1096                                                      (__v2df)
1097                                                      _mm_setzero_pd (),
1098                                                      (__mmask8) -1);
1099 }
1100
1101 extern __inline __m128d
1102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1104 {
1105   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1106                                                      (__v2df) __W,
1107                                                      (__mmask8) __U);
1108 }
1109
1110 extern __inline __m128d
1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1113 {
1114   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1115                                                      (__v2df)
1116                                                      _mm_setzero_pd (),
1117                                                      (__mmask8) __U);
1118 }
1119
1120 extern __inline __m256d
1121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1123                     __m256d __B)
1124 {
1125   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1126                                                  (__v4df) __B,
1127                                                  (__v4df) __W,
1128                                                  (__mmask8) __U);
1129 }
1130
1131 extern __inline __m256d
1132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1134 {
1135   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1136                                                  (__v4df) __B,
1137                                                  (__v4df)
1138                                                  _mm256_setzero_pd (),
1139                                                  (__mmask8) __U);
1140 }
1141
1142 extern __inline __m128d
1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1145 {
1146   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1147                                                  (__v2df) __B,
1148                                                  (__v2df) __W,
1149                                                  (__mmask8) __U);
1150 }
1151
1152 extern __inline __m128d
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1155 {
1156   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1157                                                  (__v2df) __B,
1158                                                  (__v2df)
1159                                                  _mm_setzero_pd (),
1160                                                  (__mmask8) __U);
1161 }
1162
1163 extern __inline __m256
1164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1165 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1166 {
1167   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1168                                                 (__v8sf) __B,
1169                                                 (__v8sf) __W,
1170                                                 (__mmask8) __U);
1171 }
1172
1173 extern __inline __m256
1174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1175 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1176 {
1177   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1178                                                 (__v8sf) __B,
1179                                                 (__v8sf)
1180                                                 _mm256_setzero_ps (),
1181                                                 (__mmask8) __U);
1182 }
1183
1184 extern __inline __m128
1185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1187 {
1188   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1189                                                 (__v4sf) __B,
1190                                                 (__v4sf) __W,
1191                                                 (__mmask8) __U);
1192 }
1193
1194 extern __inline __m128
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1197 {
1198   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1199                                                 (__v4sf) __B,
1200                                                 (__v4sf)
1201                                                 _mm_setzero_ps (),
1202                                                 (__mmask8) __U);
1203 }
1204
1205 extern __inline __m256d
1206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1208 {
1209   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1210                                                 (__v4df) __B,
1211                                                 (__v4df) __W,
1212                                                 (__mmask8) __U);
1213 }
1214
1215 extern __inline __m256d
1216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1218 {
1219   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1220                                                 (__v4df) __B,
1221                                                 (__v4df)
1222                                                 _mm256_setzero_pd (),
1223                                                 (__mmask8) __U);
1224 }
1225
1226 extern __inline __m128d
1227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1229 {
1230   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1231                                                 (__v2df) __B,
1232                                                 (__v2df) __W,
1233                                                 (__mmask8) __U);
1234 }
1235
1236 extern __inline __m128d
1237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1239 {
1240   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1241                                                 (__v2df) __B,
1242                                                 (__v2df)
1243                                                 _mm_setzero_pd (),
1244                                                 (__mmask8) __U);
1245 }
1246
1247 extern __inline __m256
1248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1250 {
1251   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1252                                                (__v8sf) __B,
1253                                                (__v8sf) __W,
1254                                                (__mmask8) __U);
1255 }
1256
1257 extern __inline __m256
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1260 {
1261   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1262                                                (__v8sf) __B,
1263                                                (__v8sf)
1264                                                _mm256_setzero_ps (),
1265                                                (__mmask8) __U);
1266 }
1267
1268 extern __inline __m128
1269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1270 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1271 {
1272   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1273                                                (__v4sf) __B,
1274                                                (__v4sf) __W,
1275                                                (__mmask8) __U);
1276 }
1277
1278 extern __inline __m128
1279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1280 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1281 {
1282   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1283                                                (__v4sf) __B,
1284                                                (__v4sf)
1285                                                _mm_setzero_ps (),
1286                                                (__mmask8) __U);
1287 }
1288
1289 extern __inline __m128i
1290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 _mm_movm_epi32 (__mmask8 __A)
1292 {
1293   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1294 }
1295
1296 extern __inline __m256i
1297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1298 _mm256_movm_epi32 (__mmask8 __A)
1299 {
1300   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1301 }
1302
1303 extern __inline __m128i
1304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1305 _mm_movm_epi64 (__mmask8 __A)
1306 {
1307   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1308 }
1309
1310 extern __inline __m256i
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm256_movm_epi64 (__mmask8 __A)
1313 {
1314   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1315 }
1316
1317 extern __inline __mmask8
1318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 _mm_movepi32_mask (__m128i __A)
1320 {
1321   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1322 }
1323
1324 extern __inline __mmask8
1325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm256_movepi32_mask (__m256i __A)
1327 {
1328   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1329 }
1330
1331 extern __inline __mmask8
1332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 _mm_movepi64_mask (__m128i __A)
1334 {
1335   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1336 }
1337
1338 extern __inline __mmask8
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm256_movepi64_mask (__m256i __A)
1341 {
1342   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1343 }
1344
1345 #ifdef __OPTIMIZE__
1346 extern __inline __m128d
1347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1348 _mm256_extractf64x2_pd (__m256d __A, const int __imm)
1349 {
1350   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1351                                                          __imm,
1352                                                          (__v2df)
1353                                                          _mm_setzero_pd (),
1354                                                          (__mmask8) -
1355                                                          1);
1356 }
1357
1358 extern __inline __m128d
1359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360 _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1361                              const int __imm)
1362 {
1363   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1364                                                          __imm,
1365                                                          (__v2df) __W,
1366                                                          (__mmask8)
1367                                                          __U);
1368 }
1369
1370 extern __inline __m128d
1371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372 _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1373                               const int __imm)
1374 {
1375   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1376                                                          __imm,
1377                                                          (__v2df)
1378                                                          _mm_setzero_pd (),
1379                                                          (__mmask8)
1380                                                          __U);
1381 }
1382
1383 extern __inline __m128i
1384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1385 _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1386 {
1387   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1388                                                          __imm,
1389                                                          (__v2di)
1390                                                          _mm_setzero_di (),
1391                                                          (__mmask8) -
1392                                                          1);
1393 }
1394
1395 extern __inline __m128i
1396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1398                                 const int __imm)
1399 {
1400   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1401                                                          __imm,
1402                                                          (__v2di) __W,
1403                                                          (__mmask8)
1404                                                          __U);
1405 }
1406
1407 extern __inline __m128i
1408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409 _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1410                                  const int __imm)
1411 {
1412   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1413                                                          __imm,
1414                                                          (__v2di)
1415                                                          _mm_setzero_di (),
1416                                                          (__mmask8)
1417                                                          __U);
1418 }
1419
1420 extern __inline __m256d
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm256_reduce_pd (__m256d __A, int __B)
1423 {
1424   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1425                                                     (__v4df)
1426                                                     _mm256_setzero_pd (),
1427                                                     (__mmask8) -1);
1428 }
1429
1430 extern __inline __m256d
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1433 {
1434   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1435                                                     (__v4df) __W,
1436                                                     (__mmask8) __U);
1437 }
1438
1439 extern __inline __m256d
1440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1441 _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1442 {
1443   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1444                                                     (__v4df)
1445                                                     _mm256_setzero_pd (),
1446                                                     (__mmask8) __U);
1447 }
1448
1449 extern __inline __m128d
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm_reduce_pd (__m128d __A, int __B)
1452 {
1453   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1454                                                     (__v2df)
1455                                                     _mm_setzero_pd (),
1456                                                     (__mmask8) -1);
1457 }
1458
1459 extern __inline __m128d
1460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461 _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1462 {
1463   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1464                                                     (__v2df) __W,
1465                                                     (__mmask8) __U);
1466 }
1467
1468 extern __inline __m128d
1469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470 _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1471 {
1472   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1473                                                     (__v2df)
1474                                                     _mm_setzero_pd (),
1475                                                     (__mmask8) __U);
1476 }
1477
1478 extern __inline __m256
1479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480 _mm256_reduce_ps (__m256 __A, int __B)
1481 {
1482   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1483                                                    (__v8sf)
1484                                                    _mm256_setzero_ps (),
1485                                                    (__mmask8) -1);
1486 }
1487
1488 extern __inline __m256
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1491 {
1492   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1493                                                    (__v8sf) __W,
1494                                                    (__mmask8) __U);
1495 }
1496
1497 extern __inline __m256
1498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499 _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1500 {
1501   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1502                                                    (__v8sf)
1503                                                    _mm256_setzero_ps (),
1504                                                    (__mmask8) __U);
1505 }
1506
1507 extern __inline __m128
1508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm_reduce_ps (__m128 __A, int __B)
1510 {
1511   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1512                                                    (__v4sf)
1513                                                    _mm_setzero_ps (),
1514                                                    (__mmask8) -1);
1515 }
1516
1517 extern __inline __m128
1518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1519 _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1520 {
1521   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1522                                                    (__v4sf) __W,
1523                                                    (__mmask8) __U);
1524 }
1525
1526 extern __inline __m128
1527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1529 {
1530   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1531                                                    (__v4sf)
1532                                                    _mm_setzero_ps (),
1533                                                    (__mmask8) __U);
1534 }
1535
1536 extern __inline __m256d
1537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 _mm256_range_pd (__m256d __A, __m256d __B, int __C)
1539 {
1540   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1541                                                    (__v4df) __B, __C,
1542                                                    (__v4df)
1543                                                    _mm256_setzero_pd (),
1544                                                    (__mmask8) -1);
1545 }
1546
1547 extern __inline __m256d
1548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1550                       __m256d __A, __m256d __B, int __C)
1551 {
1552   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1553                                                    (__v4df) __B, __C,
1554                                                    (__v4df) __W,
1555                                                    (__mmask8) __U);
1556 }
1557
1558 extern __inline __m256d
1559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1560 _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1561 {
1562   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1563                                                    (__v4df) __B, __C,
1564                                                    (__v4df)
1565                                                    _mm256_setzero_pd (),
1566                                                    (__mmask8) __U);
1567 }
1568
1569 extern __inline __m128d
1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571 _mm_range_pd (__m128d __A, __m128d __B, int __C)
1572 {
1573   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1574                                                    (__v2df) __B, __C,
1575                                                    (__v2df)
1576                                                    _mm_setzero_pd (),
1577                                                    (__mmask8) -1);
1578 }
1579
1580 extern __inline __m128d
1581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582 _mm_mask_range_pd (__m128d __W, __mmask8 __U,
1583                    __m128d __A, __m128d __B, int __C)
1584 {
1585   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1586                                                    (__v2df) __B, __C,
1587                                                    (__v2df) __W,
1588                                                    (__mmask8) __U);
1589 }
1590
1591 extern __inline __m128d
1592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593 _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1594 {
1595   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1596                                                    (__v2df) __B, __C,
1597                                                    (__v2df)
1598                                                    _mm_setzero_pd (),
1599                                                    (__mmask8) __U);
1600 }
1601
1602 extern __inline __m256
1603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1604 _mm256_range_ps (__m256 __A, __m256 __B, int __C)
1605 {
1606   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1607                                                   (__v8sf) __B, __C,
1608                                                   (__v8sf)
1609                                                   _mm256_setzero_ps (),
1610                                                   (__mmask8) -1);
1611 }
1612
1613 extern __inline __m256
1614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1615 _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1616                       int __C)
1617 {
1618   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1619                                                   (__v8sf) __B, __C,
1620                                                   (__v8sf) __W,
1621                                                   (__mmask8) __U);
1622 }
1623
1624 extern __inline __m256
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1627 {
1628   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1629                                                   (__v8sf) __B, __C,
1630                                                   (__v8sf)
1631                                                   _mm256_setzero_ps (),
1632                                                   (__mmask8) __U);
1633 }
1634
1635 extern __inline __m128
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 _mm_range_ps (__m128 __A, __m128 __B, int __C)
1638 {
1639   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1640                                                   (__v4sf) __B, __C,
1641                                                   (__v4sf)
1642                                                   _mm_setzero_ps (),
1643                                                   (__mmask8) -1);
1644 }
1645
1646 extern __inline __m128
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm_mask_range_ps (__m128 __W, __mmask8 __U,
1649                    __m128 __A, __m128 __B, int __C)
1650 {
1651   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1652                                                   (__v4sf) __B, __C,
1653                                                   (__v4sf) __W,
1654                                                   (__mmask8) __U);
1655 }
1656
1657 extern __inline __m128
1658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659 _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1660 {
1661   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1662                                                   (__v4sf) __B, __C,
1663                                                   (__v4sf)
1664                                                   _mm_setzero_ps (),
1665                                                   (__mmask8) __U);
1666 }
1667
1668 extern __inline __mmask8
1669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670 _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1671                              const int __imm)
1672 {
1673   return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1674                                                       __imm, __U);
1675 }
1676
1677 extern __inline __mmask8
1678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1679 _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1680 {
1681   return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1682                                                       __imm,
1683                                                       (__mmask8) -1);
1684 }
1685
1686 extern __inline __mmask8
1687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688 _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1689 {
1690   return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1691                                                       __imm, __U);
1692 }
1693
1694 extern __inline __mmask8
1695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696 _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1697 {
1698   return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1699                                                       __imm,
1700                                                       (__mmask8) -1);
1701 }
1702
1703 extern __inline __mmask8
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1706 {
1707   return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1708                                                       __imm, __U);
1709 }
1710
1711 extern __inline __mmask8
1712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713 _mm_fpclass_pd_mask (__m128d __A, const int __imm)
1714 {
1715   return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1716                                                       __imm,
1717                                                       (__mmask8) -1);
1718 }
1719
1720 extern __inline __mmask8
1721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1722 _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1723 {
1724   return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1725                                                       __imm, __U);
1726 }
1727
1728 extern __inline __mmask8
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm_fpclass_ps_mask (__m128 __A, const int __imm)
1731 {
1732   return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1733                                                       __imm,
1734                                                       (__mmask8) -1);
1735 }
1736
1737 extern __inline __m256i
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1740 {
1741   return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1742                                                         (__v2di) __B,
1743                                                         __imm,
1744                                                         (__v4di)
1745                                                         _mm256_setzero_si256 (),
1746                                                         (__mmask8) -
1747                                                         1);
1748 }
1749
1750 extern __inline __m256i
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1753                          __m128i __B, const int __imm)
1754 {
1755   return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1756                                                         (__v2di) __B,
1757                                                         __imm,
1758                                                         (__v4di) __W,
1759                                                         (__mmask8)
1760                                                         __U);
1761 }
1762
1763 extern __inline __m256i
1764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1765 _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1766                           const int __imm)
1767 {
1768   return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1769                                                         (__v2di) __B,
1770                                                         __imm,
1771                                                         (__v4di)
1772                                                         _mm256_setzero_si256 (),
1773                                                         (__mmask8)
1774                                                         __U);
1775 }
1776
1777 extern __inline __m256d
1778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779 _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1780 {
1781   return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1782                                                         (__v2df) __B,
1783                                                         __imm,
1784                                                         (__v4df)
1785                                                         _mm256_setzero_pd (),
1786                                                         (__mmask8) -
1787                                                         1);
1788 }
1789
1790 extern __inline __m256d
1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1793                          __m128d __B, const int __imm)
1794 {
1795   return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1796                                                         (__v2df) __B,
1797                                                         __imm,
1798                                                         (__v4df) __W,
1799                                                         (__mmask8)
1800                                                         __U);
1801 }
1802
1803 extern __inline __m256d
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1806                           const int __imm)
1807 {
1808   return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1809                                                         (__v2df) __B,
1810                                                         __imm,
1811                                                         (__v4df)
1812                                                         _mm256_setzero_pd (),
1813                                                         (__mmask8)
1814                                                         __U);
1815 }
1816
1817 #else
1818 #define _mm256_insertf64x2(X, Y, C)                                     \
1819   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1820     (__v2df)(__m128d) (Y), (int) (C),                                   \
1821     (__v4df)(__m256d)_mm256_setzero_pd(),                               \
1822     (__mmask8)-1))
1823
1824 #define _mm256_mask_insertf64x2(W, U, X, Y, C)                          \
1825   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1826     (__v2df)(__m128d) (Y), (int) (C),                                   \
1827     (__v4df)(__m256d)(W),                                               \
1828     (__mmask8)(U)))
1829
1830 #define _mm256_maskz_insertf64x2(U, X, Y, C)                            \
1831   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1832     (__v2df)(__m128d) (Y), (int) (C),                                   \
1833     (__v4df)(__m256d)_mm256_setzero_pd(),                               \
1834     (__mmask8)(U)))
1835
1836 #define _mm256_inserti64x2(X, Y, C)                                     \
1837   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1838     (__v2di)(__m128i) (Y), (int) (C),                                   \
1839     (__v4di)(__m256i)_mm256_setzero_si256 (),                           \
1840     (__mmask8)-1))
1841
1842 #define _mm256_mask_inserti64x2(W, U, X, Y, C)                          \
1843   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1844     (__v2di)(__m128i) (Y), (int) (C),                                   \
1845     (__v4di)(__m256i)(W),                                               \
1846     (__mmask8)(U)))
1847
1848 #define _mm256_maskz_inserti64x2(U, X, Y, C)                            \
1849   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1850     (__v2di)(__m128i) (Y), (int) (C),                                   \
1851     (__v4di)(__m256i)_mm256_setzero_si256 (),                           \
1852     (__mmask8)(U)))
1853
1854 #define _mm256_extractf64x2_pd(X, C)                                    \
1855   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1856     (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1857
1858 #define _mm256_mask_extractf64x2_pd(W, U, X, C)                         \
1859   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1860     (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1861
1862 #define _mm256_maskz_extractf64x2_pd(U, X, C)                           \
1863   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1864     (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1865
1866 #define _mm256_extracti64x2_epi64(X, C)                                 \
1867   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1868     (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1869
1870 #define _mm256_mask_extracti64x2_epi64(W, U, X, C)                     \
1871   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1872     (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1873
1874 #define _mm256_maskz_extracti64x2_epi64(U, X, C)                        \
1875   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1876     (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1877
1878 #define _mm256_reduce_pd(A, B)                                          \
1879   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),     \
1880     (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1881
1882 #define _mm256_mask_reduce_pd(W, U, A, B)                               \
1883   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),     \
1884     (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1885
1886 #define _mm256_maskz_reduce_pd(U, A, B)                                 \
1887   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),     \
1888     (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1889
1890 #define _mm_reduce_pd(A, B)                                             \
1891   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),     \
1892     (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1893
1894 #define _mm_mask_reduce_pd(W, U, A, B)                                  \
1895   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),     \
1896     (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1897
1898 #define _mm_maskz_reduce_pd(U, A, B)                                    \
1899   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),     \
1900     (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1901
1902 #define _mm256_reduce_ps(A, B)                                          \
1903   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),       \
1904     (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1905
1906 #define _mm256_mask_reduce_ps(W, U, A, B)                               \
1907   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),       \
1908     (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1909
1910 #define _mm256_maskz_reduce_ps(U, A, B)                                 \
1911   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),       \
1912     (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1913
1914 #define _mm_reduce_ps(A, B)                                             \
1915   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),       \
1916     (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1917
1918 #define _mm_mask_reduce_ps(W, U, A, B)                                  \
1919   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),       \
1920     (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1921
1922 #define _mm_maskz_reduce_ps(U, A, B)                                    \
1923   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),       \
1924     (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1925
1926 #define _mm256_range_pd(A, B, C)                                        \
1927   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),      \
1928     (__v4df)(__m256d)(B), (int)(C),                                     \
1929     (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1930
1931 #define _mm256_maskz_range_pd(U, A, B, C)                               \
1932   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),      \
1933     (__v4df)(__m256d)(B), (int)(C),                                     \
1934     (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1935
1936 #define _mm_range_pd(A, B, C)                                           \
1937   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),      \
1938     (__v2df)(__m128d)(B), (int)(C),                                     \
1939     (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1940
1941 #define _mm256_range_ps(A, B, C)                                        \
1942   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),        \
1943     (__v8sf)(__m256)(B), (int)(C),                                      \
1944     (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1945
1946 #define _mm256_mask_range_ps(W, U, A, B, C)                             \
1947   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),        \
1948     (__v8sf)(__m256)(B), (int)(C),                                      \
1949     (__v8sf)(__m256)(W), (__mmask8)(U)))
1950
1951 #define _mm256_maskz_range_ps(U, A, B, C)                               \
1952   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),        \
1953     (__v8sf)(__m256)(B), (int)(C),                                      \
1954     (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1955
1956 #define _mm_range_ps(A, B, C)                                           \
1957   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),        \
1958     (__v4sf)(__m128)(B), (int)(C),                                      \
1959     (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1960
1961 #define _mm_mask_range_ps(W, U, A, B, C)                                \
1962   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),        \
1963     (__v4sf)(__m128)(B), (int)(C),                                      \
1964     (__v4sf)(__m128)(W), (__mmask8)(U)))
1965
1966 #define _mm_maskz_range_ps(U, A, B, C)                                  \
1967   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),        \
1968     (__v4sf)(__m128)(B), (int)(C),                                      \
1969     (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1970
1971 #define _mm256_mask_range_pd(W, U, A, B, C)                             \
1972   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),      \
1973     (__v4df)(__m256d)(B), (int)(C),                                     \
1974     (__v4df)(__m256d)(W), (__mmask8)(U)))
1975
1976 #define _mm_mask_range_pd(W, U, A, B, C)                                \
1977   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),      \
1978     (__v2df)(__m128d)(B), (int)(C),                                     \
1979     (__v2df)(__m128d)(W), (__mmask8)(U)))
1980
1981 #define _mm_maskz_range_pd(U, A, B, C)                                  \
1982   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),      \
1983     (__v2df)(__m128d)(B), (int)(C),                                     \
1984     (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1985
1986 #define _mm256_mask_fpclass_pd_mask(u, X, C)                            \
1987   ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1988                                                 (int) (C),(__mmask8)(u)))
1989
1990 #define _mm256_mask_fpclass_ps_mask(u, X, C)                            \
1991   ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
1992                                                 (int) (C),(__mmask8)(u)))
1993
1994 #define _mm_mask_fpclass_pd_mask(u, X, C)                               \
1995   ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
1996                                                 (int) (C),(__mmask8)(u)))
1997
1998 #define _mm_mask_fpclass_ps_mask(u, X, C)                               \
1999   ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
2000                                                 (int) (C),(__mmask8)(u)))
2001
2002 #define _mm256_fpclass_pd_mask(X, C)                                    \
2003   ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
2004                                                 (int) (C),(__mmask8)-1))
2005
2006 #define _mm256_fpclass_ps_mask(X, C)                                    \
2007   ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
2008                                                 (int) (C),(__mmask8)-1))
2009
2010 #define _mm_fpclass_pd_mask(X, C)                                       \
2011   ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2012                                                 (int) (C),(__mmask8)-1))
2013
2014 #define _mm_fpclass_ps_mask(X, C)                                       \
2015   ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
2016                                                 (int) (C),(__mmask8)-1))
2017
2018 #endif
2019
2020 #ifdef __DISABLE_AVX512VLDQ__
2021 #undef __DISABLE_AVX512VLDQ__
2022 #pragma GCC pop_options
2023 #endif /* __DISABLE_AVX512VLDQ__ */
2024
2025 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */