gcc50: Disconnect from buildworld.
[dragonfly.git] / contrib / gcc-5.0 / gcc / config / i386 / avx512vlintrin.h
1 /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512VLINTRIN_H_INCLUDED
29 #define _AVX512VLINTRIN_H_INCLUDED
30
31 /* Doesn't require avx512vl target and is used in avx512dqintrin.h.  */
32 extern __inline __m128i
33 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
34 _mm_setzero_di (void)
35 {
36   return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
37 }
38
39 #ifndef __AVX512VL__
40 #pragma GCC push_options
41 #pragma GCC target("avx512vl")
42 #define __DISABLE_AVX512VL__
43 #endif /* __AVX512VL__ */
44
45 /* Internal data types for implementing the intrinsics.  */
46 typedef unsigned int __mmask32;
47
48 extern __inline __m256d
49 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
51 {
52   return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
53                                                   (__v4df) __W,
54                                                   (__mmask8) __U);
55 }
56
57 extern __inline __m256d
58 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
60 {
61   return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
62                                                   (__v4df)
63                                                   _mm256_setzero_pd (),
64                                                   (__mmask8) __U);
65 }
66
67 extern __inline __m128d
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
70 {
71   return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
72                                                   (__v2df) __W,
73                                                   (__mmask8) __U);
74 }
75
76 extern __inline __m128d
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
79 {
80   return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
81                                                   (__v2df)
82                                                   _mm_setzero_pd (),
83                                                   (__mmask8) __U);
84 }
85
86 extern __inline __m256d
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
89 {
90   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
91                                                    (__v4df) __W,
92                                                    (__mmask8) __U);
93 }
94
95 extern __inline __m256d
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
98 {
99   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
100                                                    (__v4df)
101                                                    _mm256_setzero_pd (),
102                                                    (__mmask8) __U);
103 }
104
105 extern __inline __m128d
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
108 {
109   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
110                                                    (__v2df) __W,
111                                                    (__mmask8) __U);
112 }
113
114 extern __inline __m128d
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
117 {
118   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
119                                                    (__v2df)
120                                                    _mm_setzero_pd (),
121                                                    (__mmask8) __U);
122 }
123
124 extern __inline void
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
127 {
128   __builtin_ia32_storeapd256_mask ((__v4df *) __P,
129                                    (__v4df) __A,
130                                    (__mmask8) __U);
131 }
132
133 extern __inline void
134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
136 {
137   __builtin_ia32_storeapd128_mask ((__v2df *) __P,
138                                    (__v2df) __A,
139                                    (__mmask8) __U);
140 }
141
142 extern __inline __m256
143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
145 {
146   return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
147                                                  (__v8sf) __W,
148                                                  (__mmask8) __U);
149 }
150
151 extern __inline __m256
152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
154 {
155   return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
156                                                  (__v8sf)
157                                                  _mm256_setzero_ps (),
158                                                  (__mmask8) __U);
159 }
160
161 extern __inline __m128
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
164 {
165   return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
166                                                  (__v4sf) __W,
167                                                  (__mmask8) __U);
168 }
169
170 extern __inline __m128
171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
173 {
174   return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
175                                                  (__v4sf)
176                                                  _mm_setzero_ps (),
177                                                  (__mmask8) __U);
178 }
179
180 extern __inline __m256
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
183 {
184   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
185                                                   (__v8sf) __W,
186                                                   (__mmask8) __U);
187 }
188
189 extern __inline __m256
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
192 {
193   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
194                                                   (__v8sf)
195                                                   _mm256_setzero_ps (),
196                                                   (__mmask8) __U);
197 }
198
199 extern __inline __m128
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
202 {
203   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
204                                                   (__v4sf) __W,
205                                                   (__mmask8) __U);
206 }
207
208 extern __inline __m128
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
211 {
212   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
213                                                   (__v4sf)
214                                                   _mm_setzero_ps (),
215                                                   (__mmask8) __U);
216 }
217
218 extern __inline void
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
221 {
222   __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
223                                    (__v8sf) __A,
224                                    (__mmask8) __U);
225 }
226
227 extern __inline void
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
230 {
231   __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
232                                    (__v4sf) __A,
233                                    (__mmask8) __U);
234 }
235
236 extern __inline __m256i
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
239 {
240   return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
241                                                      (__v4di) __W,
242                                                      (__mmask8) __U);
243 }
244
245 extern __inline __m256i
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
248 {
249   return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
250                                                      (__v4di)
251                                                      _mm256_setzero_si256 (),
252                                                      (__mmask8) __U);
253 }
254
255 extern __inline __m128i
256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
258 {
259   return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
260                                                      (__v2di) __W,
261                                                      (__mmask8) __U);
262 }
263
264 extern __inline __m128i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
267 {
268   return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
269                                                      (__v2di)
270                                                      _mm_setzero_di (),
271                                                      (__mmask8) __U);
272 }
273
274 extern __inline __m256i
275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
277 {
278   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
279                                                         (__v4di) __W,
280                                                         (__mmask8)
281                                                         __U);
282 }
283
284 extern __inline __m256i
285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
286 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
287 {
288   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
289                                                         (__v4di)
290                                                         _mm256_setzero_si256 (),
291                                                         (__mmask8)
292                                                         __U);
293 }
294
295 extern __inline __m128i
296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
297 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
298 {
299   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
300                                                         (__v2di) __W,
301                                                         (__mmask8)
302                                                         __U);
303 }
304
305 extern __inline __m128i
306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
308 {
309   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
310                                                         (__v2di)
311                                                         _mm_setzero_di (),
312                                                         (__mmask8)
313                                                         __U);
314 }
315
316 extern __inline void
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
319 {
320   __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
321                                         (__v4di) __A,
322                                         (__mmask8) __U);
323 }
324
325 extern __inline void
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
328 {
329   __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
330                                         (__v2di) __A,
331                                         (__mmask8) __U);
332 }
333
334 extern __inline __m256i
335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
336 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
337 {
338   return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
339                                                      (__v8si) __W,
340                                                      (__mmask8) __U);
341 }
342
343 extern __inline __m256i
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
346 {
347   return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
348                                                      (__v8si)
349                                                      _mm256_setzero_si256 (),
350                                                      (__mmask8) __U);
351 }
352
353 extern __inline __m128i
354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
356 {
357   return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
358                                                      (__v4si) __W,
359                                                      (__mmask8) __U);
360 }
361
362 extern __inline __m128i
363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
364 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
365 {
366   return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
367                                                      (__v4si)
368                                                      _mm_setzero_si128 (),
369                                                      (__mmask8) __U);
370 }
371
372 extern __inline __m256i
373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
375 {
376   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
377                                                         (__v8si) __W,
378                                                         (__mmask8)
379                                                         __U);
380 }
381
382 extern __inline __m256i
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
385 {
386   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
387                                                         (__v8si)
388                                                         _mm256_setzero_si256 (),
389                                                         (__mmask8)
390                                                         __U);
391 }
392
393 extern __inline __m128i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
396 {
397   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
398                                                         (__v4si) __W,
399                                                         (__mmask8)
400                                                         __U);
401 }
402
403 extern __inline __m128i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
406 {
407   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
408                                                         (__v4si)
409                                                         _mm_setzero_si128 (),
410                                                         (__mmask8)
411                                                         __U);
412 }
413
414 extern __inline void
415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
417 {
418   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
419                                         (__v8si) __A,
420                                         (__mmask8) __U);
421 }
422
423 extern __inline void
424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
426 {
427   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
428                                         (__v4si) __A,
429                                         (__mmask8) __U);
430 }
431
432 extern __inline __m128i
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 _mm_setzero_hi (void)
435 {
436   return __extension__ (__m128i) (__v8hi)
437   {
438   0, 0, 0, 0, 0, 0, 0, 0};
439 }
440
441 extern __inline __m128d
442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
443 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
444 {
445   return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
446                                                  (__v2df) __B,
447                                                  (__v2df) __W,
448                                                  (__mmask8) __U);
449 }
450
451 extern __inline __m128d
452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
454 {
455   return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
456                                                  (__v2df) __B,
457                                                  (__v2df)
458                                                  _mm_setzero_pd (),
459                                                  (__mmask8) __U);
460 }
461
462 extern __inline __m256d
463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
464 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
465                     __m256d __B)
466 {
467   return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
468                                                  (__v4df) __B,
469                                                  (__v4df) __W,
470                                                  (__mmask8) __U);
471 }
472
473 extern __inline __m256d
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
476 {
477   return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
478                                                  (__v4df) __B,
479                                                  (__v4df)
480                                                  _mm256_setzero_pd (),
481                                                  (__mmask8) __U);
482 }
483
484 extern __inline __m128
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
487 {
488   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
489                                                 (__v4sf) __B,
490                                                 (__v4sf) __W,
491                                                 (__mmask8) __U);
492 }
493
494 extern __inline __m128
495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496 _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
497 {
498   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
499                                                 (__v4sf) __B,
500                                                 (__v4sf)
501                                                 _mm_setzero_ps (),
502                                                 (__mmask8) __U);
503 }
504
505 extern __inline __m256
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
508 {
509   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
510                                                 (__v8sf) __B,
511                                                 (__v8sf) __W,
512                                                 (__mmask8) __U);
513 }
514
515 extern __inline __m256
516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517 _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
518 {
519   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
520                                                 (__v8sf) __B,
521                                                 (__v8sf)
522                                                 _mm256_setzero_ps (),
523                                                 (__mmask8) __U);
524 }
525
526 extern __inline __m128d
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
529 {
530   return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
531                                                  (__v2df) __B,
532                                                  (__v2df) __W,
533                                                  (__mmask8) __U);
534 }
535
536 extern __inline __m128d
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
539 {
540   return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
541                                                  (__v2df) __B,
542                                                  (__v2df)
543                                                  _mm_setzero_pd (),
544                                                  (__mmask8) __U);
545 }
546
547 extern __inline __m256d
548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
550                     __m256d __B)
551 {
552   return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
553                                                  (__v4df) __B,
554                                                  (__v4df) __W,
555                                                  (__mmask8) __U);
556 }
557
558 extern __inline __m256d
559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
561 {
562   return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
563                                                  (__v4df) __B,
564                                                  (__v4df)
565                                                  _mm256_setzero_pd (),
566                                                  (__mmask8) __U);
567 }
568
569 extern __inline __m128
570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571 _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
572 {
573   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
574                                                 (__v4sf) __B,
575                                                 (__v4sf) __W,
576                                                 (__mmask8) __U);
577 }
578
579 extern __inline __m128
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
582 {
583   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
584                                                 (__v4sf) __B,
585                                                 (__v4sf)
586                                                 _mm_setzero_ps (),
587                                                 (__mmask8) __U);
588 }
589
590 extern __inline __m256
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
593 {
594   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
595                                                 (__v8sf) __B,
596                                                 (__v8sf) __W,
597                                                 (__mmask8) __U);
598 }
599
600 extern __inline __m256
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
603 {
604   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
605                                                 (__v8sf) __B,
606                                                 (__v8sf)
607                                                 _mm256_setzero_ps (),
608                                                 (__mmask8) __U);
609 }
610
611 extern __inline void
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm256_store_epi64 (void *__P, __m256i __A)
614 {
615   *(__m256i *) __P = __A;
616 }
617
618 extern __inline void
619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620 _mm_store_epi64 (void *__P, __m128i __A)
621 {
622   *(__m128i *) __P = __A;
623 }
624
625 extern __inline __m256d
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
628 {
629   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
630                                                    (__v4df) __W,
631                                                    (__mmask8) __U);
632 }
633
634 extern __inline __m256d
635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
637 {
638   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
639                                                    (__v4df)
640                                                    _mm256_setzero_pd (),
641                                                    (__mmask8) __U);
642 }
643
644 extern __inline __m128d
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
647 {
648   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
649                                                    (__v2df) __W,
650                                                    (__mmask8) __U);
651 }
652
653 extern __inline __m128d
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
656 {
657   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
658                                                    (__v2df)
659                                                    _mm_setzero_pd (),
660                                                    (__mmask8) __U);
661 }
662
663 extern __inline void
664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
665 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
666 {
667   __builtin_ia32_storeupd256_mask ((__v4df *) __P,
668                                    (__v4df) __A,
669                                    (__mmask8) __U);
670 }
671
672 extern __inline void
673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
675 {
676   __builtin_ia32_storeupd128_mask ((__v2df *) __P,
677                                    (__v2df) __A,
678                                    (__mmask8) __U);
679 }
680
681 extern __inline __m256
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
684 {
685   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
686                                                   (__v8sf) __W,
687                                                   (__mmask8) __U);
688 }
689
690 extern __inline __m256
691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
693 {
694   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
695                                                   (__v8sf)
696                                                   _mm256_setzero_ps (),
697                                                   (__mmask8) __U);
698 }
699
700 extern __inline __m128
701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
702 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
703 {
704   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
705                                                   (__v4sf) __W,
706                                                   (__mmask8) __U);
707 }
708
709 extern __inline __m128
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
712 {
713   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
714                                                   (__v4sf)
715                                                   _mm_setzero_ps (),
716                                                   (__mmask8) __U);
717 }
718
719 extern __inline void
720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
722 {
723   __builtin_ia32_storeups256_mask ((__v8sf *) __P,
724                                    (__v8sf) __A,
725                                    (__mmask8) __U);
726 }
727
728 extern __inline void
729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
731 {
732   __builtin_ia32_storeups128_mask ((__v4sf *) __P,
733                                    (__v4sf) __A,
734                                    (__mmask8) __U);
735 }
736
737 extern __inline __m256i
738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
740 {
741   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
742                                                      (__v4di) __W,
743                                                      (__mmask8) __U);
744 }
745
746 extern __inline __m256i
747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
749 {
750   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
751                                                      (__v4di)
752                                                      _mm256_setzero_si256 (),
753                                                      (__mmask8) __U);
754 }
755
756 extern __inline __m128i
757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
759 {
760   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
761                                                      (__v2di) __W,
762                                                      (__mmask8) __U);
763 }
764
765 extern __inline __m128i
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
768 {
769   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
770                                                      (__v2di)
771                                                      _mm_setzero_di (),
772                                                      (__mmask8) __U);
773 }
774
775 extern __inline void
776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
778 {
779   __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
780                                      (__v4di) __A,
781                                      (__mmask8) __U);
782 }
783
784 extern __inline void
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
787 {
788   __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
789                                      (__v2di) __A,
790                                      (__mmask8) __U);
791 }
792
793 extern __inline __m256i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
796 {
797   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
798                                                      (__v8si) __W,
799                                                      (__mmask8) __U);
800 }
801
802 extern __inline __m256i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
805 {
806   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
807                                                      (__v8si)
808                                                      _mm256_setzero_si256 (),
809                                                      (__mmask8) __U);
810 }
811
812 extern __inline __m128i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
815 {
816   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
817                                                      (__v4si) __W,
818                                                      (__mmask8) __U);
819 }
820
821 extern __inline __m128i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
824 {
825   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
826                                                      (__v4si)
827                                                      _mm_setzero_si128 (),
828                                                      (__mmask8) __U);
829 }
830
831 extern __inline void
832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
833 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
834 {
835   __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
836                                      (__v8si) __A,
837                                      (__mmask8) __U);
838 }
839
840 extern __inline void
841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
842 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
843 {
844   __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
845                                      (__v4si) __A,
846                                      (__mmask8) __U);
847 }
848
849 extern __inline __m256i
850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
852 {
853   return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
854                                                  (__v8si) __W,
855                                                  (__mmask8) __U);
856 }
857
858 extern __inline __m256i
859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
860 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
861 {
862   return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
863                                                  (__v8si)
864                                                  _mm256_setzero_si256 (),
865                                                  (__mmask8) __U);
866 }
867
868 extern __inline __m128i
869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
870 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
871 {
872   return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
873                                                  (__v4si) __W,
874                                                  (__mmask8) __U);
875 }
876
877 extern __inline __m128i
878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
879 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
880 {
881   return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
882                                                  (__v4si)
883                                                  _mm_setzero_si128 (),
884                                                  (__mmask8) __U);
885 }
886
887 extern __inline __m256i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm256_abs_epi64 (__m256i __A)
890 {
891   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
892                                                  (__v4di)
893                                                  _mm256_setzero_si256 (),
894                                                  (__mmask8) -1);
895 }
896
897 extern __inline __m256i
898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
900 {
901   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
902                                                  (__v4di) __W,
903                                                  (__mmask8) __U);
904 }
905
906 extern __inline __m256i
907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
909 {
910   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
911                                                  (__v4di)
912                                                  _mm256_setzero_si256 (),
913                                                  (__mmask8) __U);
914 }
915
916 extern __inline __m128i
917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918 _mm_abs_epi64 (__m128i __A)
919 {
920   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
921                                                  (__v2di)
922                                                  _mm_setzero_di (),
923                                                  (__mmask8) -1);
924 }
925
926 extern __inline __m128i
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
929 {
930   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
931                                                  (__v2di) __W,
932                                                  (__mmask8) __U);
933 }
934
935 extern __inline __m128i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
938 {
939   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
940                                                  (__v2di)
941                                                  _mm_setzero_di (),
942                                                  (__mmask8) __U);
943 }
944
945 extern __inline __m128i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm256_cvtpd_epu32 (__m256d __A)
948 {
949   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
950                                                      (__v4si)
951                                                      _mm_setzero_si128 (),
952                                                      (__mmask8) -1);
953 }
954
955 extern __inline __m128i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
958 {
959   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
960                                                      (__v4si) __W,
961                                                      (__mmask8) __U);
962 }
963
964 extern __inline __m128i
965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
966 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
967 {
968   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
969                                                      (__v4si)
970                                                      _mm_setzero_si128 (),
971                                                      (__mmask8) __U);
972 }
973
974 extern __inline __m128i
975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976 _mm_cvtpd_epu32 (__m128d __A)
977 {
978   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
979                                                      (__v4si)
980                                                      _mm_setzero_si128 (),
981                                                      (__mmask8) -1);
982 }
983
984 extern __inline __m128i
985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
987 {
988   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
989                                                      (__v4si) __W,
990                                                      (__mmask8) __U);
991 }
992
993 extern __inline __m128i
994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
996 {
997   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
998                                                      (__v4si)
999                                                      _mm_setzero_si128 (),
1000                                                      (__mmask8) __U);
1001 }
1002
1003 extern __inline __m256i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1006 {
1007   return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1008                                                      (__v8si) __W,
1009                                                      (__mmask8) __U);
1010 }
1011
1012 extern __inline __m256i
1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1015 {
1016   return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1017                                                      (__v8si)
1018                                                      _mm256_setzero_si256 (),
1019                                                      (__mmask8) __U);
1020 }
1021
1022 extern __inline __m128i
1023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1025 {
1026   return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1027                                                      (__v4si) __W,
1028                                                      (__mmask8) __U);
1029 }
1030
1031 extern __inline __m128i
1032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1034 {
1035   return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1036                                                      (__v4si)
1037                                                      _mm_setzero_si128 (),
1038                                                      (__mmask8) __U);
1039 }
1040
1041 extern __inline __m256i
1042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1043 _mm256_cvttps_epu32 (__m256 __A)
1044 {
1045   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1046                                                       (__v8si)
1047                                                       _mm256_setzero_si256 (),
1048                                                       (__mmask8) -1);
1049 }
1050
1051 extern __inline __m256i
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1054 {
1055   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1056                                                       (__v8si) __W,
1057                                                       (__mmask8) __U);
1058 }
1059
1060 extern __inline __m256i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1063 {
1064   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1065                                                       (__v8si)
1066                                                       _mm256_setzero_si256 (),
1067                                                       (__mmask8) __U);
1068 }
1069
1070 extern __inline __m128i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm_cvttps_epu32 (__m128 __A)
1073 {
1074   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1075                                                       (__v4si)
1076                                                       _mm_setzero_si128 (),
1077                                                       (__mmask8) -1);
1078 }
1079
1080 extern __inline __m128i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1083 {
1084   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1085                                                       (__v4si) __W,
1086                                                       (__mmask8) __U);
1087 }
1088
1089 extern __inline __m128i
1090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1091 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1092 {
1093   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1094                                                       (__v4si)
1095                                                       _mm_setzero_si128 (),
1096                                                       (__mmask8) __U);
1097 }
1098
1099 extern __inline __m128i
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1102 {
1103   return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1104                                                      (__v4si) __W,
1105                                                      (__mmask8) __U);
1106 }
1107
1108 extern __inline __m128i
1109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1111 {
1112   return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1113                                                      (__v4si)
1114                                                      _mm_setzero_si128 (),
1115                                                      (__mmask8) __U);
1116 }
1117
1118 extern __inline __m128i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1121 {
1122   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1123                                                      (__v4si) __W,
1124                                                      (__mmask8) __U);
1125 }
1126
1127 extern __inline __m128i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1130 {
1131   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1132                                                      (__v4si)
1133                                                      _mm_setzero_si128 (),
1134                                                      (__mmask8) __U);
1135 }
1136
1137 extern __inline __m128i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm256_cvttpd_epu32 (__m256d __A)
1140 {
1141   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1142                                                       (__v4si)
1143                                                       _mm_setzero_si128 (),
1144                                                       (__mmask8) -1);
1145 }
1146
1147 extern __inline __m128i
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1150 {
1151   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1152                                                       (__v4si) __W,
1153                                                       (__mmask8) __U);
1154 }
1155
1156 extern __inline __m128i
1157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1159 {
1160   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1161                                                       (__v4si)
1162                                                       _mm_setzero_si128 (),
1163                                                       (__mmask8) __U);
1164 }
1165
1166 extern __inline __m128i
1167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm_cvttpd_epu32 (__m128d __A)
1169 {
1170   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1171                                                       (__v4si)
1172                                                       _mm_setzero_si128 (),
1173                                                       (__mmask8) -1);
1174 }
1175
1176 extern __inline __m128i
1177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1179 {
1180   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1181                                                       (__v4si) __W,
1182                                                       (__mmask8) __U);
1183 }
1184
1185 extern __inline __m128i
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1188 {
1189   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1190                                                       (__v4si)
1191                                                       _mm_setzero_si128 (),
1192                                                       (__mmask8) __U);
1193 }
1194
1195 extern __inline __m128i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1198 {
1199   return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1200                                                     (__v4si) __W,
1201                                                     (__mmask8) __U);
1202 }
1203
1204 extern __inline __m128i
1205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1207 {
1208   return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1209                                                     (__v4si)
1210                                                     _mm_setzero_si128 (),
1211                                                     (__mmask8) __U);
1212 }
1213
1214 extern __inline __m128i
1215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1217 {
1218   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1219                                                     (__v4si) __W,
1220                                                     (__mmask8) __U);
1221 }
1222
1223 extern __inline __m128i
1224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1226 {
1227   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1228                                                     (__v4si)
1229                                                     _mm_setzero_si128 (),
1230                                                     (__mmask8) __U);
1231 }
1232
1233 extern __inline __m256d
1234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1236 {
1237   return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1238                                                     (__v4df) __W,
1239                                                     (__mmask8) __U);
1240 }
1241
1242 extern __inline __m256d
1243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1245 {
1246   return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1247                                                     (__v4df)
1248                                                     _mm256_setzero_pd (),
1249                                                     (__mmask8) __U);
1250 }
1251
1252 extern __inline __m128d
1253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1255 {
1256   return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1257                                                     (__v2df) __W,
1258                                                     (__mmask8) __U);
1259 }
1260
1261 extern __inline __m128d
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1264 {
1265   return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1266                                                     (__v2df)
1267                                                     _mm_setzero_pd (),
1268                                                     (__mmask8) __U);
1269 }
1270
1271 extern __inline __m256d
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm256_cvtepu32_pd (__m128i __A)
1274 {
1275   return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1276                                                      (__v4df)
1277                                                      _mm256_setzero_pd (),
1278                                                      (__mmask8) -1);
1279 }
1280
1281 extern __inline __m256d
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1284 {
1285   return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1286                                                      (__v4df) __W,
1287                                                      (__mmask8) __U);
1288 }
1289
1290 extern __inline __m256d
1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1293 {
1294   return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1295                                                      (__v4df)
1296                                                      _mm256_setzero_pd (),
1297                                                      (__mmask8) __U);
1298 }
1299
1300 extern __inline __m128d
1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm_cvtepu32_pd (__m128i __A)
1303 {
1304   return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1305                                                      (__v2df)
1306                                                      _mm_setzero_pd (),
1307                                                      (__mmask8) -1);
1308 }
1309
1310 extern __inline __m128d
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1313 {
1314   return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1315                                                      (__v2df) __W,
1316                                                      (__mmask8) __U);
1317 }
1318
1319 extern __inline __m128d
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1322 {
1323   return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1324                                                      (__v2df)
1325                                                      _mm_setzero_pd (),
1326                                                      (__mmask8) __U);
1327 }
1328
1329 extern __inline __m256
1330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1332 {
1333   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1334                                                    (__v8sf) __W,
1335                                                    (__mmask8) __U);
1336 }
1337
1338 extern __inline __m256
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1341 {
1342   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1343                                                    (__v8sf)
1344                                                    _mm256_setzero_ps (),
1345                                                    (__mmask8) __U);
1346 }
1347
1348 extern __inline __m128
1349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1351 {
1352   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1353                                                    (__v4sf) __W,
1354                                                    (__mmask8) __U);
1355 }
1356
1357 extern __inline __m128
1358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1360 {
1361   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1362                                                    (__v4sf)
1363                                                    _mm_setzero_ps (),
1364                                                    (__mmask8) __U);
1365 }
1366
1367 extern __inline __m256
1368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1369 _mm256_cvtepu32_ps (__m256i __A)
1370 {
1371   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1372                                                     (__v8sf)
1373                                                     _mm256_setzero_ps (),
1374                                                     (__mmask8) -1);
1375 }
1376
1377 extern __inline __m256
1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1380 {
1381   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1382                                                     (__v8sf) __W,
1383                                                     (__mmask8) __U);
1384 }
1385
1386 extern __inline __m256
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1389 {
1390   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1391                                                     (__v8sf)
1392                                                     _mm256_setzero_ps (),
1393                                                     (__mmask8) __U);
1394 }
1395
1396 extern __inline __m128
1397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398 _mm_cvtepu32_ps (__m128i __A)
1399 {
1400   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1401                                                     (__v4sf)
1402                                                     _mm_setzero_ps (),
1403                                                     (__mmask8) -1);
1404 }
1405
1406 extern __inline __m128
1407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1409 {
1410   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1411                                                     (__v4sf) __W,
1412                                                     (__mmask8) __U);
1413 }
1414
1415 extern __inline __m128
1416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1418 {
1419   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1420                                                     (__v4sf)
1421                                                     _mm_setzero_ps (),
1422                                                     (__mmask8) __U);
1423 }
1424
1425 extern __inline __m256d
1426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1427 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1428 {
1429   return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1430                                                     (__v4df) __W,
1431                                                     (__mmask8) __U);
1432 }
1433
1434 extern __inline __m256d
1435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1436 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1437 {
1438   return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1439                                                     (__v4df)
1440                                                     _mm256_setzero_pd (),
1441                                                     (__mmask8) __U);
1442 }
1443
1444 extern __inline __m128d
1445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1447 {
1448   return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1449                                                     (__v2df) __W,
1450                                                     (__mmask8) __U);
1451 }
1452
1453 extern __inline __m128d
1454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1456 {
1457   return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1458                                                     (__v2df)
1459                                                     _mm_setzero_pd (),
1460                                                     (__mmask8) __U);
1461 }
1462
1463 extern __inline __m128i
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm_cvtepi32_epi8 (__m128i __A)
1466 {
1467   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468                                                   (__v16qi)_mm_undefined_si128(),
1469                                                   (__mmask8) -1);
1470 }
1471
1472 extern __inline void
1473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1474 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1475 {
1476   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1477 }
1478
1479 extern __inline __m128i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1482 {
1483   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1484                                                   (__v16qi) __O, __M);
1485 }
1486
1487 extern __inline __m128i
1488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1490 {
1491   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1492                                                   (__v16qi)
1493                                                   _mm_setzero_si128 (),
1494                                                   __M);
1495 }
1496
1497 extern __inline __m128i
1498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499 _mm256_cvtepi32_epi8 (__m256i __A)
1500 {
1501   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1502                                                   (__v16qi)_mm_undefined_si128(),
1503                                                   (__mmask8) -1);
1504 }
1505
1506 extern __inline __m128i
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1509 {
1510   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511                                                   (__v16qi) __O, __M);
1512 }
1513
1514 extern __inline void
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1517 {
1518   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1519 }
1520
1521 extern __inline __m128i
1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1524 {
1525   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1526                                                   (__v16qi)
1527                                                   _mm_setzero_si128 (),
1528                                                   __M);
1529 }
1530
1531 extern __inline __m128i
1532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533 _mm_cvtsepi32_epi8 (__m128i __A)
1534 {
1535   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1536                                                    (__v16qi)_mm_undefined_si128(),
1537                                                    (__mmask8) -1);
1538 }
1539
1540 extern __inline void
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1543 {
1544   __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1545 }
1546
1547 extern __inline __m128i
1548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1550 {
1551   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1552                                                    (__v16qi) __O, __M);
1553 }
1554
1555 extern __inline __m128i
1556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1558 {
1559   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1560                                                    (__v16qi)
1561                                                    _mm_setzero_si128 (),
1562                                                    __M);
1563 }
1564
1565 extern __inline __m128i
1566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567 _mm256_cvtsepi32_epi8 (__m256i __A)
1568 {
1569   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1570                                                    (__v16qi)_mm_undefined_si128(),
1571                                                    (__mmask8) -1);
1572 }
1573
1574 extern __inline void
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1577 {
1578   __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1579 }
1580
1581 extern __inline __m128i
1582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1584 {
1585   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1586                                                    (__v16qi) __O, __M);
1587 }
1588
1589 extern __inline __m128i
1590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1592 {
1593   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1594                                                    (__v16qi)
1595                                                    _mm_setzero_si128 (),
1596                                                    __M);
1597 }
1598
1599 extern __inline __m128i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm_cvtusepi32_epi8 (__m128i __A)
1602 {
1603   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1604                                                     (__v16qi)_mm_undefined_si128(),
1605                                                     (__mmask8) -1);
1606 }
1607
1608 extern __inline void
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1611 {
1612   __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1613 }
1614
1615 extern __inline __m128i
1616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1618 {
1619   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1620                                                     (__v16qi) __O,
1621                                                     __M);
1622 }
1623
1624 extern __inline __m128i
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1627 {
1628   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1629                                                     (__v16qi)
1630                                                     _mm_setzero_si128 (),
1631                                                     __M);
1632 }
1633
1634 extern __inline __m128i
1635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636 _mm256_cvtusepi32_epi8 (__m256i __A)
1637 {
1638   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1639                                                     (__v16qi)_mm_undefined_si128(),
1640                                                     (__mmask8) -1);
1641 }
1642
1643 extern __inline void
1644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1646 {
1647   __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1648 }
1649
1650 extern __inline __m128i
1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1653 {
1654   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1655                                                     (__v16qi) __O,
1656                                                     __M);
1657 }
1658
1659 extern __inline __m128i
1660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1662 {
1663   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1664                                                     (__v16qi)
1665                                                     _mm_setzero_si128 (),
1666                                                     __M);
1667 }
1668
1669 extern __inline __m128i
1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671 _mm_cvtepi32_epi16 (__m128i __A)
1672 {
1673   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1674                                                   (__v8hi) _mm_setzero_si128 (),
1675                                                   (__mmask8) -1);
1676 }
1677
1678 extern __inline void
1679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1681 {
1682   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1683 }
1684
1685 extern __inline __m128i
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1688 {
1689   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1690                                                   (__v8hi) __O, __M);
1691 }
1692
1693 extern __inline __m128i
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1696 {
1697   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1698                                                   (__v8hi)
1699                                                   _mm_setzero_si128 (),
1700                                                   __M);
1701 }
1702
1703 extern __inline __m128i
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm256_cvtepi32_epi16 (__m256i __A)
1706 {
1707   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1708                                                   (__v8hi)_mm_setzero_si128 (),
1709                                                   (__mmask8) -1);
1710 }
1711
1712 extern __inline void
1713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714 _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
1715 {
1716   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1717 }
1718
1719 extern __inline __m128i
1720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1721 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1722 {
1723   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1724                                                   (__v8hi) __O, __M);
1725 }
1726
1727 extern __inline __m128i
1728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1729 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1730 {
1731   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1732                                                   (__v8hi)
1733                                                   _mm_setzero_si128 (),
1734                                                   __M);
1735 }
1736
1737 extern __inline __m128i
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm_cvtsepi32_epi16 (__m128i __A)
1740 {
1741   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1742                                                    (__v8hi)_mm_setzero_si128 (),
1743                                                    (__mmask8) -1);
1744 }
1745
1746 extern __inline void
1747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1749 {
1750   __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1751 }
1752
1753 extern __inline __m128i
1754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1755 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1756 {
1757   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1758                                                    (__v8hi)__O,
1759                                                    __M);
1760 }
1761
1762 extern __inline __m128i
1763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1764 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1765 {
1766   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1767                                                    (__v8hi)
1768                                                    _mm_setzero_si128 (),
1769                                                    __M);
1770 }
1771
1772 extern __inline __m128i
1773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1774 _mm256_cvtsepi32_epi16 (__m256i __A)
1775 {
1776   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1777                                                    (__v8hi)_mm_undefined_si128(),
1778                                                    (__mmask8) -1);
1779 }
1780
1781 extern __inline void
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1784 {
1785   __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1786 }
1787
1788 extern __inline __m128i
1789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1791 {
1792   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1793                                                    (__v8hi) __O, __M);
1794 }
1795
1796 extern __inline __m128i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1799 {
1800   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1801                                                    (__v8hi)
1802                                                    _mm_setzero_si128 (),
1803                                                    __M);
1804 }
1805
1806 extern __inline __m128i
1807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808 _mm_cvtusepi32_epi16 (__m128i __A)
1809 {
1810   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1811                                                     (__v8hi)_mm_undefined_si128(),
1812                                                     (__mmask8) -1);
1813 }
1814
1815 extern __inline void
1816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1818 {
1819   __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1820 }
1821
1822 extern __inline __m128i
1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1825 {
1826   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1827                                                     (__v8hi) __O, __M);
1828 }
1829
1830 extern __inline __m128i
1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1833 {
1834   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1835                                                     (__v8hi)
1836                                                     _mm_setzero_si128 (),
1837                                                     __M);
1838 }
1839
1840 extern __inline __m128i
1841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842 _mm256_cvtusepi32_epi16 (__m256i __A)
1843 {
1844   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1845                                                     (__v8hi)_mm_undefined_si128(),
1846                                                     (__mmask8) -1);
1847 }
1848
1849 extern __inline void
1850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1852 {
1853   __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1854 }
1855
1856 extern __inline __m128i
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1859 {
1860   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1861                                                     (__v8hi) __O, __M);
1862 }
1863
1864 extern __inline __m128i
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1867 {
1868   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1869                                                     (__v8hi)
1870                                                     _mm_setzero_si128 (),
1871                                                     __M);
1872 }
1873
1874 extern __inline __m128i
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_cvtepi64_epi8 (__m128i __A)
1877 {
1878   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1879                                                   (__v16qi)_mm_undefined_si128(),
1880                                                   (__mmask8) -1);
1881 }
1882
1883 extern __inline void
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1886 {
1887   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1888 }
1889
1890 extern __inline __m128i
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1893 {
1894   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1895                                                   (__v16qi) __O, __M);
1896 }
1897
1898 extern __inline __m128i
1899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1901 {
1902   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1903                                                   (__v16qi)
1904                                                   _mm_setzero_si128 (),
1905                                                   __M);
1906 }
1907
1908 extern __inline __m128i
1909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1910 _mm256_cvtepi64_epi8 (__m256i __A)
1911 {
1912   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1913                                                   (__v16qi)_mm_undefined_si128(),
1914                                                   (__mmask8) -1);
1915 }
1916
1917 extern __inline void
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1920 {
1921   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1922 }
1923
1924 extern __inline __m128i
1925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1926 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1927 {
1928   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1929                                                   (__v16qi) __O, __M);
1930 }
1931
1932 extern __inline __m128i
1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1935 {
1936   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1937                                                   (__v16qi)
1938                                                   _mm_setzero_si128 (),
1939                                                   __M);
1940 }
1941
1942 extern __inline __m128i
1943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944 _mm_cvtsepi64_epi8 (__m128i __A)
1945 {
1946   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1947                                                    (__v16qi)_mm_undefined_si128(),
1948                                                    (__mmask8) -1);
1949 }
1950
1951 extern __inline void
1952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1954 {
1955   __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1956 }
1957
1958 extern __inline __m128i
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1961 {
1962   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1963                                                    (__v16qi) __O, __M);
1964 }
1965
1966 extern __inline __m128i
1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1969 {
1970   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1971                                                    (__v16qi)
1972                                                    _mm_setzero_si128 (),
1973                                                    __M);
1974 }
1975
1976 extern __inline __m128i
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm256_cvtsepi64_epi8 (__m256i __A)
1979 {
1980   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1981                                                    (__v16qi)_mm_undefined_si128(),
1982                                                    (__mmask8) -1);
1983 }
1984
1985 extern __inline void
1986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1988 {
1989   __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1990 }
1991
1992 extern __inline __m128i
1993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1995 {
1996   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1997                                                    (__v16qi) __O, __M);
1998 }
1999
2000 extern __inline __m128i
2001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2002 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2003 {
2004   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2005                                                    (__v16qi)
2006                                                    _mm_setzero_si128 (),
2007                                                    __M);
2008 }
2009
2010 extern __inline __m128i
2011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012 _mm_cvtusepi64_epi8 (__m128i __A)
2013 {
2014   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2015                                                     (__v16qi)_mm_undefined_si128(),
2016                                                     (__mmask8) -1);
2017 }
2018
2019 extern __inline void
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022 {
2023   __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024 }
2025
2026 extern __inline __m128i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029 {
2030   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031                                                     (__v16qi) __O,
2032                                                     __M);
2033 }
2034
2035 extern __inline __m128i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038 {
2039   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040                                                     (__v16qi)
2041                                                     _mm_setzero_si128 (),
2042                                                     __M);
2043 }
2044
2045 extern __inline __m128i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm256_cvtusepi64_epi8 (__m256i __A)
2048 {
2049   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050                                                     (__v16qi)_mm_undefined_si128(),
2051                                                     (__mmask8) -1);
2052 }
2053
2054 extern __inline void
2055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2057 {
2058   __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2059 }
2060
2061 extern __inline __m128i
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2064 {
2065   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2066                                                     (__v16qi) __O,
2067                                                     __M);
2068 }
2069
2070 extern __inline __m128i
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2073 {
2074   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2075                                                     (__v16qi)
2076                                                     _mm_setzero_si128 (),
2077                                                     __M);
2078 }
2079
2080 extern __inline __m128i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_cvtepi64_epi16 (__m128i __A)
2083 {
2084   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2085                                                   (__v8hi)_mm_undefined_si128(),
2086                                                   (__mmask8) -1);
2087 }
2088
2089 extern __inline void
2090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2092 {
2093   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2094 }
2095
2096 extern __inline __m128i
2097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2099 {
2100   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2101                                                   (__v8hi)__O,
2102                                                   __M);
2103 }
2104
2105 extern __inline __m128i
2106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2108 {
2109   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2110                                                   (__v8hi)
2111                                                   _mm_setzero_si128 (),
2112                                                   __M);
2113 }
2114
2115 extern __inline __m128i
2116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2117 _mm256_cvtepi64_epi16 (__m256i __A)
2118 {
2119   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2120                                                   (__v8hi)_mm_undefined_si128(),
2121                                                   (__mmask8) -1);
2122 }
2123
2124 extern __inline void
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2127 {
2128   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2129 }
2130
2131 extern __inline __m128i
2132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2134 {
2135   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2136                                                   (__v8hi) __O, __M);
2137 }
2138
2139 extern __inline __m128i
2140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2141 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2142 {
2143   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2144                                                   (__v8hi)
2145                                                   _mm_setzero_si128 (),
2146                                                   __M);
2147 }
2148
2149 extern __inline __m128i
2150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2151 _mm_cvtsepi64_epi16 (__m128i __A)
2152 {
2153   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2154                                                    (__v8hi)_mm_undefined_si128(),
2155                                                    (__mmask8) -1);
2156 }
2157
2158 extern __inline void
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2161 {
2162   __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2163 }
2164
2165 extern __inline __m128i
2166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2168 {
2169   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2170                                                    (__v8hi) __O, __M);
2171 }
2172
2173 extern __inline __m128i
2174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2175 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2176 {
2177   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2178                                                    (__v8hi)
2179                                                    _mm_setzero_si128 (),
2180                                                    __M);
2181 }
2182
2183 extern __inline __m128i
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 _mm256_cvtsepi64_epi16 (__m256i __A)
2186 {
2187   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2188                                                    (__v8hi)_mm_undefined_si128(),
2189                                                    (__mmask8) -1);
2190 }
2191
2192 extern __inline void
2193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2195 {
2196   __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2197 }
2198
2199 extern __inline __m128i
2200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2201 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2202 {
2203   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2204                                                    (__v8hi) __O, __M);
2205 }
2206
2207 extern __inline __m128i
2208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2210 {
2211   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2212                                                    (__v8hi)
2213                                                    _mm_setzero_si128 (),
2214                                                    __M);
2215 }
2216
2217 extern __inline __m128i
2218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2219 _mm_cvtusepi64_epi16 (__m128i __A)
2220 {
2221   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2222                                                     (__v8hi)_mm_undefined_si128(),
2223                                                     (__mmask8) -1);
2224 }
2225
2226 extern __inline void
2227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2229 {
2230   __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2231 }
2232
2233 extern __inline __m128i
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2236 {
2237   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2238                                                     (__v8hi) __O, __M);
2239 }
2240
2241 extern __inline __m128i
2242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2243 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2244 {
2245   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2246                                                     (__v8hi)
2247                                                     _mm_setzero_si128 (),
2248                                                     __M);
2249 }
2250
2251 extern __inline __m128i
2252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253 _mm256_cvtusepi64_epi16 (__m256i __A)
2254 {
2255   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2256                                                     (__v8hi)_mm_undefined_si128(),
2257                                                     (__mmask8) -1);
2258 }
2259
2260 extern __inline void
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2263 {
2264   __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2265 }
2266
2267 extern __inline __m128i
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2270 {
2271   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2272                                                     (__v8hi) __O, __M);
2273 }
2274
2275 extern __inline __m128i
2276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2278 {
2279   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2280                                                     (__v8hi)
2281                                                     _mm_setzero_si128 (),
2282                                                     __M);
2283 }
2284
2285 extern __inline __m128i
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm_cvtepi64_epi32 (__m128i __A)
2288 {
2289   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2290                                                   (__v4si)_mm_undefined_si128(),
2291                                                   (__mmask8) -1);
2292 }
2293
2294 extern __inline void
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2297 {
2298   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2299 }
2300
2301 extern __inline __m128i
2302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2303 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2304 {
2305   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2306                                                   (__v4si) __O, __M);
2307 }
2308
2309 extern __inline __m128i
2310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2312 {
2313   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314                                                   (__v4si)
2315                                                   _mm_setzero_si128 (),
2316                                                   __M);
2317 }
2318
2319 extern __inline __m128i
2320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321 _mm256_cvtepi64_epi32 (__m256i __A)
2322 {
2323   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2324                                                   (__v4si)_mm_undefined_si128(),
2325                                                   (__mmask8) -1);
2326 }
2327
2328 extern __inline void
2329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2331 {
2332   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2333 }
2334
2335 extern __inline __m128i
2336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2338 {
2339   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2340                                                   (__v4si) __O, __M);
2341 }
2342
2343 extern __inline __m128i
2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2346 {
2347   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2348                                                   (__v4si)
2349                                                   _mm_setzero_si128 (),
2350                                                   __M);
2351 }
2352
2353 extern __inline __m128i
2354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355 _mm_cvtsepi64_epi32 (__m128i __A)
2356 {
2357   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2358                                                    (__v4si)_mm_undefined_si128(),
2359                                                    (__mmask8) -1);
2360 }
2361
2362 extern __inline void
2363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2365 {
2366   __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2367 }
2368
2369 extern __inline __m128i
2370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2372 {
2373   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2374                                                    (__v4si) __O, __M);
2375 }
2376
2377 extern __inline __m128i
2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2380 {
2381   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2382                                                    (__v4si)
2383                                                    _mm_setzero_si128 (),
2384                                                    __M);
2385 }
2386
2387 extern __inline __m128i
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm256_cvtsepi64_epi32 (__m256i __A)
2390 {
2391   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2392                                                    (__v4si)_mm_undefined_si128(),
2393                                                    (__mmask8) -1);
2394 }
2395
2396 extern __inline void
2397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2399 {
2400   __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2401 }
2402
2403 extern __inline __m128i
2404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2406 {
2407   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2408                                                    (__v4si)__O,
2409                                                    __M);
2410 }
2411
2412 extern __inline __m128i
2413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2415 {
2416   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2417                                                    (__v4si)
2418                                                    _mm_setzero_si128 (),
2419                                                    __M);
2420 }
2421
2422 extern __inline __m128i
2423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2424 _mm_cvtusepi64_epi32 (__m128i __A)
2425 {
2426   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2427                                                     (__v4si)_mm_undefined_si128(),
2428                                                     (__mmask8) -1);
2429 }
2430
2431 extern __inline void
2432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2433 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2434 {
2435   __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2436 }
2437
2438 extern __inline __m128i
2439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2441 {
2442   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2443                                                     (__v4si) __O, __M);
2444 }
2445
2446 extern __inline __m128i
2447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2448 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2449 {
2450   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2451                                                     (__v4si)
2452                                                     _mm_setzero_si128 (),
2453                                                     __M);
2454 }
2455
2456 extern __inline __m128i
2457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2458 _mm256_cvtusepi64_epi32 (__m256i __A)
2459 {
2460   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2461                                                     (__v4si)_mm_undefined_si128(),
2462                                                     (__mmask8) -1);
2463 }
2464
2465 extern __inline void
2466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2467 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2468 {
2469   __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2470 }
2471
2472 extern __inline __m128i
2473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2475 {
2476   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2477                                                     (__v4si) __O, __M);
2478 }
2479
2480 extern __inline __m128i
2481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2482 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2483 {
2484   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2485                                                     (__v4si)
2486                                                     _mm_setzero_si128 (),
2487                                                     __M);
2488 }
2489
2490 extern __inline __m256
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2493 {
2494   return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2495                                                       (__v8sf) __O,
2496                                                       __M);
2497 }
2498
2499 extern __inline __m256
2500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2502 {
2503   return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2504                                                       (__v8sf)
2505                                                       _mm256_setzero_ps (),
2506                                                       __M);
2507 }
2508
2509 extern __inline __m128
2510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2511 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2512 {
2513   return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2514                                                       (__v4sf) __O,
2515                                                       __M);
2516 }
2517
2518 extern __inline __m128
2519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2521 {
2522   return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2523                                                       (__v4sf)
2524                                                       _mm_setzero_ps (),
2525                                                       __M);
2526 }
2527
2528 extern __inline __m256d
2529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2531 {
2532   return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2533                                                        (__v4df) __O,
2534                                                        __M);
2535 }
2536
2537 extern __inline __m256d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2540 {
2541   return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2542                                                        (__v4df)
2543                                                        _mm256_setzero_pd (),
2544                                                        __M);
2545 }
2546
2547 extern __inline __m256i
2548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2549 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2550 {
2551   return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2552                                                        (__v8si) __O,
2553                                                        __M);
2554 }
2555
2556 extern __inline __m256i
2557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2558 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2559 {
2560   return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2561                                                        (__v8si)
2562                                                        _mm256_setzero_si256 (),
2563                                                        __M);
2564 }
2565
2566 extern __inline __m256i
2567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2568 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2569 {
2570   return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2571                                                            __M);
2572 }
2573
2574 extern __inline __m256i
2575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2576 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2577 {
2578   return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2579                                                            (__v8si)
2580                                                            _mm256_setzero_si256 (),
2581                                                            __M);
2582 }
2583
2584 extern __inline __m128i
2585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2587 {
2588   return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2589                                                        (__v4si) __O,
2590                                                        __M);
2591 }
2592
2593 extern __inline __m128i
2594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2595 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2596 {
2597   return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2598                                                        (__v4si)
2599                                                        _mm_setzero_si128 (),
2600                                                        __M);
2601 }
2602
2603 extern __inline __m128i
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2606 {
2607   return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2608                                                            __M);
2609 }
2610
2611 extern __inline __m128i
2612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2613 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2614 {
2615   return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2616                                                            (__v4si)
2617                                                            _mm_setzero_si128 (),
2618                                                            __M);
2619 }
2620
2621 extern __inline __m256i
2622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2623 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2624 {
2625   return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2626                                                        (__v4di) __O,
2627                                                        __M);
2628 }
2629
2630 extern __inline __m256i
2631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2632 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2633 {
2634   return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2635                                                        (__v4di)
2636                                                        _mm256_setzero_si256 (),
2637                                                        __M);
2638 }
2639
2640 extern __inline __m256i
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2643 {
2644   return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2645                                                            __M);
2646 }
2647
2648 extern __inline __m256i
2649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2651 {
2652   return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2653                                                            (__v4di)
2654                                                            _mm256_setzero_si256 (),
2655                                                            __M);
2656 }
2657
2658 extern __inline __m128i
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2661 {
2662   return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2663                                                        (__v2di) __O,
2664                                                        __M);
2665 }
2666
2667 extern __inline __m128i
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2670 {
2671   return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2672                                                        (__v2di)
2673                                                        _mm_setzero_si128 (),
2674                                                        __M);
2675 }
2676
2677 extern __inline __m128i
2678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2679 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2680 {
2681   return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2682                                                            __M);
2683 }
2684
2685 extern __inline __m128i
2686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2688 {
2689   return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2690                                                            (__v2di)
2691                                                            _mm_setzero_si128 (),
2692                                                            __M);
2693 }
2694
2695 extern __inline __m256
2696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2697 _mm256_broadcast_f32x4 (__m128 __A)
2698 {
2699   return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2700                                                           (__v8sf)_mm256_undefined_pd (),
2701                                                           (__mmask8) -1);
2702 }
2703
2704 extern __inline __m256
2705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2706 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2707 {
2708   return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2709                                                           (__v8sf) __O,
2710                                                           __M);
2711 }
2712
2713 extern __inline __m256
2714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2715 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2716 {
2717   return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2718                                                           (__v8sf)
2719                                                           _mm256_setzero_ps (),
2720                                                           __M);
2721 }
2722
2723 extern __inline __m256i
2724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2725 _mm256_broadcast_i32x4 (__m128i __A)
2726 {
2727   return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2728                                                            __A,
2729                                                            (__v8si)_mm256_undefined_si256 (),
2730                                                            (__mmask8) -1);
2731 }
2732
2733 extern __inline __m256i
2734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2735 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2736 {
2737   return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2738                                                            __A,
2739                                                            (__v8si)
2740                                                            __O, __M);
2741 }
2742
2743 extern __inline __m256i
2744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2746 {
2747   return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2748                                                            __A,
2749                                                            (__v8si)
2750                                                            _mm256_setzero_si256 (),
2751                                                            __M);
2752 }
2753
2754 extern __inline __m256i
2755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2756 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2757 {
2758   return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2759                                                     (__v8si) __W,
2760                                                     (__mmask8) __U);
2761 }
2762
2763 extern __inline __m256i
2764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2765 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2766 {
2767   return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2768                                                     (__v8si)
2769                                                     _mm256_setzero_si256 (),
2770                                                     (__mmask8) __U);
2771 }
2772
2773 extern __inline __m128i
2774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2775 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2776 {
2777   return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2778                                                     (__v4si) __W,
2779                                                     (__mmask8) __U);
2780 }
2781
2782 extern __inline __m128i
2783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2785 {
2786   return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2787                                                     (__v4si)
2788                                                     _mm_setzero_si128 (),
2789                                                     (__mmask8) __U);
2790 }
2791
2792 extern __inline __m256i
2793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2794 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2795 {
2796   return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2797                                                     (__v4di) __W,
2798                                                     (__mmask8) __U);
2799 }
2800
2801 extern __inline __m256i
2802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2803 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2804 {
2805   return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2806                                                     (__v4di)
2807                                                     _mm256_setzero_si256 (),
2808                                                     (__mmask8) __U);
2809 }
2810
2811 extern __inline __m128i
2812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2813 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2814 {
2815   return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2816                                                     (__v2di) __W,
2817                                                     (__mmask8) __U);
2818 }
2819
2820 extern __inline __m128i
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2823 {
2824   return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2825                                                     (__v2di)
2826                                                     _mm_setzero_si128 (),
2827                                                     (__mmask8) __U);
2828 }
2829
2830 extern __inline __m256i
2831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2832 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2833 {
2834   return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2835                                                     (__v8si) __W,
2836                                                     (__mmask8) __U);
2837 }
2838
2839 extern __inline __m256i
2840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2842 {
2843   return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2844                                                     (__v8si)
2845                                                     _mm256_setzero_si256 (),
2846                                                     (__mmask8) __U);
2847 }
2848
2849 extern __inline __m128i
2850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2851 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2852 {
2853   return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2854                                                     (__v4si) __W,
2855                                                     (__mmask8) __U);
2856 }
2857
2858 extern __inline __m128i
2859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2860 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2861 {
2862   return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2863                                                     (__v4si)
2864                                                     _mm_setzero_si128 (),
2865                                                     (__mmask8) __U);
2866 }
2867
2868 extern __inline __m256i
2869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2870 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2871 {
2872   return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2873                                                     (__v4di) __W,
2874                                                     (__mmask8) __U);
2875 }
2876
2877 extern __inline __m256i
2878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2879 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2880 {
2881   return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2882                                                     (__v4di)
2883                                                     _mm256_setzero_si256 (),
2884                                                     (__mmask8) __U);
2885 }
2886
2887 extern __inline __m128i
2888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2889 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2890 {
2891   return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2892                                                     (__v2di) __W,
2893                                                     (__mmask8) __U);
2894 }
2895
2896 extern __inline __m128i
2897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2898 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2899 {
2900   return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2901                                                     (__v2di)
2902                                                     _mm_setzero_si128 (),
2903                                                     (__mmask8) __U);
2904 }
2905
2906 extern __inline __m256i
2907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2908 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2909 {
2910   return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2911                                                     (__v4di) __W,
2912                                                     (__mmask8) __U);
2913 }
2914
2915 extern __inline __m256i
2916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2918 {
2919   return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2920                                                     (__v4di)
2921                                                     _mm256_setzero_si256 (),
2922                                                     (__mmask8) __U);
2923 }
2924
2925 extern __inline __m128i
2926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2928 {
2929   return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2930                                                     (__v2di) __W,
2931                                                     (__mmask8) __U);
2932 }
2933
2934 extern __inline __m128i
2935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2936 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2937 {
2938   return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2939                                                     (__v2di)
2940                                                     _mm_setzero_si128 (),
2941                                                     (__mmask8) __U);
2942 }
2943
2944 extern __inline __m256i
2945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2946 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2947 {
2948   return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2949                                                     (__v8si) __W,
2950                                                     (__mmask8) __U);
2951 }
2952
2953 extern __inline __m256i
2954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2955 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2956 {
2957   return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2958                                                     (__v8si)
2959                                                     _mm256_setzero_si256 (),
2960                                                     (__mmask8) __U);
2961 }
2962
2963 extern __inline __m128i
2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2965 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2966 {
2967   return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2968                                                     (__v4si) __W,
2969                                                     (__mmask8) __U);
2970 }
2971
2972 extern __inline __m128i
2973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2974 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2975 {
2976   return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2977                                                     (__v4si)
2978                                                     _mm_setzero_si128 (),
2979                                                     (__mmask8) __U);
2980 }
2981
2982 extern __inline __m256i
2983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2984 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2985 {
2986   return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2987                                                     (__v4di) __W,
2988                                                     (__mmask8) __U);
2989 }
2990
2991 extern __inline __m256i
2992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2993 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2994 {
2995   return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2996                                                     (__v4di)
2997                                                     _mm256_setzero_si256 (),
2998                                                     (__mmask8) __U);
2999 }
3000
3001 extern __inline __m128i
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3004 {
3005   return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3006                                                     (__v2di) __W,
3007                                                     (__mmask8) __U);
3008 }
3009
3010 extern __inline __m128i
3011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3012 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3013 {
3014   return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3015                                                     (__v2di)
3016                                                     _mm_setzero_si128 (),
3017                                                     (__mmask8) __U);
3018 }
3019
3020 extern __inline __m256i
3021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3022 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3023 {
3024   return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3025                                                     (__v8si) __W,
3026                                                     (__mmask8) __U);
3027 }
3028
3029 extern __inline __m256i
3030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3031 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3032 {
3033   return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3034                                                     (__v8si)
3035                                                     _mm256_setzero_si256 (),
3036                                                     (__mmask8) __U);
3037 }
3038
3039 extern __inline __m128i
3040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3041 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3042 {
3043   return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3044                                                     (__v4si) __W,
3045                                                     (__mmask8) __U);
3046 }
3047
3048 extern __inline __m128i
3049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3050 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3051 {
3052   return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3053                                                     (__v4si)
3054                                                     _mm_setzero_si128 (),
3055                                                     (__mmask8) __U);
3056 }
3057
3058 extern __inline __m256i
3059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3060 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3061 {
3062   return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3063                                                     (__v4di) __W,
3064                                                     (__mmask8) __U);
3065 }
3066
3067 extern __inline __m256i
3068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3070 {
3071   return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3072                                                     (__v4di)
3073                                                     _mm256_setzero_si256 (),
3074                                                     (__mmask8) __U);
3075 }
3076
3077 extern __inline __m128i
3078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3079 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3080 {
3081   return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3082                                                     (__v2di) __W,
3083                                                     (__mmask8) __U);
3084 }
3085
3086 extern __inline __m128i
3087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3088 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3089 {
3090   return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3091                                                     (__v2di)
3092                                                     _mm_setzero_si128 (),
3093                                                     (__mmask8) __U);
3094 }
3095
3096 extern __inline __m256i
3097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3098 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3099 {
3100   return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3101                                                     (__v4di) __W,
3102                                                     (__mmask8) __U);
3103 }
3104
3105 extern __inline __m256i
3106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3107 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3108 {
3109   return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3110                                                     (__v4di)
3111                                                     _mm256_setzero_si256 (),
3112                                                     (__mmask8) __U);
3113 }
3114
3115 extern __inline __m128i
3116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3117 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3118 {
3119   return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3120                                                     (__v2di) __W,
3121                                                     (__mmask8) __U);
3122 }
3123
3124 extern __inline __m128i
3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3126 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3127 {
3128   return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3129                                                     (__v2di)
3130                                                     _mm_setzero_si128 (),
3131                                                     (__mmask8) __U);
3132 }
3133
3134 extern __inline __m256d
3135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3136 _mm256_rcp14_pd (__m256d __A)
3137 {
3138   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3139                                               (__v4df)
3140                                               _mm256_setzero_pd (),
3141                                               (__mmask8) -1);
3142 }
3143
3144 extern __inline __m256d
3145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3147 {
3148   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3149                                               (__v4df) __W,
3150                                               (__mmask8) __U);
3151 }
3152
3153 extern __inline __m256d
3154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3155 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3156 {
3157   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3158                                               (__v4df)
3159                                               _mm256_setzero_pd (),
3160                                               (__mmask8) __U);
3161 }
3162
3163 extern __inline __m128d
3164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3165 _mm_rcp14_pd (__m128d __A)
3166 {
3167   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3168                                               (__v2df)
3169                                               _mm_setzero_pd (),
3170                                               (__mmask8) -1);
3171 }
3172
3173 extern __inline __m128d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3176 {
3177   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3178                                               (__v2df) __W,
3179                                               (__mmask8) __U);
3180 }
3181
3182 extern __inline __m128d
3183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3184 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3185 {
3186   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3187                                               (__v2df)
3188                                               _mm_setzero_pd (),
3189                                               (__mmask8) __U);
3190 }
3191
3192 extern __inline __m256
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194 _mm256_rcp14_ps (__m256 __A)
3195 {
3196   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3197                                              (__v8sf)
3198                                              _mm256_setzero_ps (),
3199                                              (__mmask8) -1);
3200 }
3201
3202 extern __inline __m256
3203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3204 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3205 {
3206   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3207                                              (__v8sf) __W,
3208                                              (__mmask8) __U);
3209 }
3210
3211 extern __inline __m256
3212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3213 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3214 {
3215   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3216                                              (__v8sf)
3217                                              _mm256_setzero_ps (),
3218                                              (__mmask8) __U);
3219 }
3220
3221 extern __inline __m128
3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223 _mm_rcp14_ps (__m128 __A)
3224 {
3225   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3226                                              (__v4sf)
3227                                              _mm_setzero_ps (),
3228                                              (__mmask8) -1);
3229 }
3230
3231 extern __inline __m128
3232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3233 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3234 {
3235   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3236                                              (__v4sf) __W,
3237                                              (__mmask8) __U);
3238 }
3239
3240 extern __inline __m128
3241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3242 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3243 {
3244   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3245                                              (__v4sf)
3246                                              _mm_setzero_ps (),
3247                                              (__mmask8) __U);
3248 }
3249
3250 extern __inline __m256d
3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3252 _mm256_rsqrt14_pd (__m256d __A)
3253 {
3254   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3255                                                      (__v4df)
3256                                                      _mm256_setzero_pd (),
3257                                                      (__mmask8) -1);
3258 }
3259
3260 extern __inline __m256d
3261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3262 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3263 {
3264   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3265                                                      (__v4df) __W,
3266                                                      (__mmask8) __U);
3267 }
3268
3269 extern __inline __m256d
3270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3271 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3272 {
3273   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3274                                                      (__v4df)
3275                                                      _mm256_setzero_pd (),
3276                                                      (__mmask8) __U);
3277 }
3278
3279 extern __inline __m128d
3280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3281 _mm_rsqrt14_pd (__m128d __A)
3282 {
3283   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3284                                                      (__v2df)
3285                                                      _mm_setzero_pd (),
3286                                                      (__mmask8) -1);
3287 }
3288
3289 extern __inline __m128d
3290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3291 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3292 {
3293   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3294                                                      (__v2df) __W,
3295                                                      (__mmask8) __U);
3296 }
3297
3298 extern __inline __m128d
3299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3300 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3301 {
3302   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3303                                                      (__v2df)
3304                                                      _mm_setzero_pd (),
3305                                                      (__mmask8) __U);
3306 }
3307
3308 extern __inline __m256
3309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3310 _mm256_rsqrt14_ps (__m256 __A)
3311 {
3312   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3313                                                     (__v8sf)
3314                                                     _mm256_setzero_ps (),
3315                                                     (__mmask8) -1);
3316 }
3317
3318 extern __inline __m256
3319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3321 {
3322   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3323                                                     (__v8sf) __W,
3324                                                     (__mmask8) __U);
3325 }
3326
3327 extern __inline __m256
3328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3329 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3330 {
3331   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3332                                                     (__v8sf)
3333                                                     _mm256_setzero_ps (),
3334                                                     (__mmask8) __U);
3335 }
3336
3337 extern __inline __m128
3338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3339 _mm_rsqrt14_ps (__m128 __A)
3340 {
3341   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3342                                                     (__v4sf)
3343                                                     _mm_setzero_ps (),
3344                                                     (__mmask8) -1);
3345 }
3346
3347 extern __inline __m128
3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3350 {
3351   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3352                                                     (__v4sf) __W,
3353                                                     (__mmask8) __U);
3354 }
3355
3356 extern __inline __m128
3357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3358 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3359 {
3360   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3361                                                     (__v4sf)
3362                                                     _mm_setzero_ps (),
3363                                                     (__mmask8) __U);
3364 }
3365
3366 extern __inline __m256d
3367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3368 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3369 {
3370   return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3371                                                   (__v4df) __W,
3372                                                   (__mmask8) __U);
3373 }
3374
3375 extern __inline __m256d
3376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3377 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3378 {
3379   return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3380                                                   (__v4df)
3381                                                   _mm256_setzero_pd (),
3382                                                   (__mmask8) __U);
3383 }
3384
3385 extern __inline __m128d
3386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3387 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3388 {
3389   return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3390                                                   (__v2df) __W,
3391                                                   (__mmask8) __U);
3392 }
3393
3394 extern __inline __m128d
3395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3396 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3397 {
3398   return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3399                                                   (__v2df)
3400                                                   _mm_setzero_pd (),
3401                                                   (__mmask8) __U);
3402 }
3403
3404 extern __inline __m256
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3407 {
3408   return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3409                                                  (__v8sf) __W,
3410                                                  (__mmask8) __U);
3411 }
3412
3413 extern __inline __m256
3414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3416 {
3417   return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3418                                                  (__v8sf)
3419                                                  _mm256_setzero_ps (),
3420                                                  (__mmask8) __U);
3421 }
3422
3423 extern __inline __m128
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3426 {
3427   return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3428                                                  (__v4sf) __W,
3429                                                  (__mmask8) __U);
3430 }
3431
3432 extern __inline __m128
3433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3434 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3435 {
3436   return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3437                                                  (__v4sf)
3438                                                  _mm_setzero_ps (),
3439                                                  (__mmask8) __U);
3440 }
3441
3442 extern __inline __m256i
3443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3444 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3445                        __m256i __B)
3446 {
3447   return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3448                                                  (__v8si) __B,
3449                                                  (__v8si) __W,
3450                                                  (__mmask8) __U);
3451 }
3452
3453 extern __inline __m256i
3454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3455 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3456 {
3457   return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3458                                                  (__v8si) __B,
3459                                                  (__v8si)
3460                                                  _mm256_setzero_si256 (),
3461                                                  (__mmask8) __U);
3462 }
3463
3464 extern __inline __m256i
3465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3467                        __m256i __B)
3468 {
3469   return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3470                                                  (__v4di) __B,
3471                                                  (__v4di) __W,
3472                                                  (__mmask8) __U);
3473 }
3474
3475 extern __inline __m256i
3476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3477 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3478 {
3479   return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3480                                                  (__v4di) __B,
3481                                                  (__v4di)
3482                                                  _mm256_setzero_si256 (),
3483                                                  (__mmask8) __U);
3484 }
3485
3486 extern __inline __m256i
3487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3488 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3489                        __m256i __B)
3490 {
3491   return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3492                                                  (__v8si) __B,
3493                                                  (__v8si) __W,
3494                                                  (__mmask8) __U);
3495 }
3496
3497 extern __inline __m256i
3498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3500 {
3501   return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3502                                                  (__v8si) __B,
3503                                                  (__v8si)
3504                                                  _mm256_setzero_si256 (),
3505                                                  (__mmask8) __U);
3506 }
3507
3508 extern __inline __m256i
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3511                        __m256i __B)
3512 {
3513   return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3514                                                  (__v4di) __B,
3515                                                  (__v4di) __W,
3516                                                  (__mmask8) __U);
3517 }
3518
3519 extern __inline __m256i
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3522 {
3523   return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3524                                                  (__v4di) __B,
3525                                                  (__v4di)
3526                                                  _mm256_setzero_si256 (),
3527                                                  (__mmask8) __U);
3528 }
3529
3530 extern __inline __m128i
3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3533                     __m128i __B)
3534 {
3535   return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3536                                                  (__v4si) __B,
3537                                                  (__v4si) __W,
3538                                                  (__mmask8) __U);
3539 }
3540
3541 extern __inline __m128i
3542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3543 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3544 {
3545   return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3546                                                  (__v4si) __B,
3547                                                  (__v4si)
3548                                                  _mm_setzero_si128 (),
3549                                                  (__mmask8) __U);
3550 }
3551
3552 extern __inline __m128i
3553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3554 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3555                     __m128i __B)
3556 {
3557   return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3558                                                  (__v2di) __B,
3559                                                  (__v2di) __W,
3560                                                  (__mmask8) __U);
3561 }
3562
3563 extern __inline __m128i
3564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3565 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3566 {
3567   return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3568                                                  (__v2di) __B,
3569                                                  (__v2di)
3570                                                  _mm_setzero_si128 (),
3571                                                  (__mmask8) __U);
3572 }
3573
3574 extern __inline __m128i
3575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3577                     __m128i __B)
3578 {
3579   return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3580                                                  (__v4si) __B,
3581                                                  (__v4si) __W,
3582                                                  (__mmask8) __U);
3583 }
3584
3585 extern __inline __m128i
3586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3587 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3588 {
3589   return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3590                                                  (__v4si) __B,
3591                                                  (__v4si)
3592                                                  _mm_setzero_si128 (),
3593                                                  (__mmask8) __U);
3594 }
3595
3596 extern __inline __m128i
3597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3599                     __m128i __B)
3600 {
3601   return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3602                                                  (__v2di) __B,
3603                                                  (__v2di) __W,
3604                                                  (__mmask8) __U);
3605 }
3606
3607 extern __inline __m128i
3608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3609 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3610 {
3611   return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3612                                                  (__v2di) __B,
3613                                                  (__v2di)
3614                                                  _mm_setzero_si128 (),
3615                                                  (__mmask8) __U);
3616 }
3617
3618 extern __inline __m256
3619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3620 _mm256_getexp_ps (__m256 __A)
3621 {
3622   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3623                                                    (__v8sf)
3624                                                    _mm256_setzero_ps (),
3625                                                    (__mmask8) -1);
3626 }
3627
3628 extern __inline __m256
3629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3630 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3631 {
3632   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3633                                                    (__v8sf) __W,
3634                                                    (__mmask8) __U);
3635 }
3636
3637 extern __inline __m256
3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3640 {
3641   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3642                                                    (__v8sf)
3643                                                    _mm256_setzero_ps (),
3644                                                    (__mmask8) __U);
3645 }
3646
3647 extern __inline __m256d
3648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3649 _mm256_getexp_pd (__m256d __A)
3650 {
3651   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3652                                                     (__v4df)
3653                                                     _mm256_setzero_pd (),
3654                                                     (__mmask8) -1);
3655 }
3656
3657 extern __inline __m256d
3658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3659 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3660 {
3661   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3662                                                     (__v4df) __W,
3663                                                     (__mmask8) __U);
3664 }
3665
3666 extern __inline __m256d
3667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3668 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3669 {
3670   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3671                                                     (__v4df)
3672                                                     _mm256_setzero_pd (),
3673                                                     (__mmask8) __U);
3674 }
3675
3676 extern __inline __m128
3677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3678 _mm_getexp_ps (__m128 __A)
3679 {
3680   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3681                                                    (__v4sf)
3682                                                    _mm_setzero_ps (),
3683                                                    (__mmask8) -1);
3684 }
3685
3686 extern __inline __m128
3687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3688 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3689 {
3690   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3691                                                    (__v4sf) __W,
3692                                                    (__mmask8) __U);
3693 }
3694
3695 extern __inline __m128
3696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3697 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3698 {
3699   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3700                                                    (__v4sf)
3701                                                    _mm_setzero_ps (),
3702                                                    (__mmask8) __U);
3703 }
3704
3705 extern __inline __m128d
3706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3707 _mm_getexp_pd (__m128d __A)
3708 {
3709   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3710                                                     (__v2df)
3711                                                     _mm_setzero_pd (),
3712                                                     (__mmask8) -1);
3713 }
3714
3715 extern __inline __m128d
3716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3717 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3718 {
3719   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3720                                                     (__v2df) __W,
3721                                                     (__mmask8) __U);
3722 }
3723
3724 extern __inline __m128d
3725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3726 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3727 {
3728   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3729                                                     (__v2df)
3730                                                     _mm_setzero_pd (),
3731                                                     (__mmask8) __U);
3732 }
3733
3734 extern __inline __m256i
3735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3736 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3737                        __m128i __B)
3738 {
3739   return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3740                                                  (__v4si) __B,
3741                                                  (__v8si) __W,
3742                                                  (__mmask8) __U);
3743 }
3744
3745 extern __inline __m256i
3746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3747 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3748 {
3749   return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3750                                                  (__v4si) __B,
3751                                                  (__v8si)
3752                                                  _mm256_setzero_si256 (),
3753                                                  (__mmask8) __U);
3754 }
3755
3756 extern __inline __m128i
3757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3758 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3759                     __m128i __B)
3760 {
3761   return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3762                                                  (__v4si) __B,
3763                                                  (__v4si) __W,
3764                                                  (__mmask8) __U);
3765 }
3766
3767 extern __inline __m128i
3768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3769 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3770 {
3771   return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3772                                                  (__v4si) __B,
3773                                                  (__v4si)
3774                                                  _mm_setzero_si128 (),
3775                                                  (__mmask8) __U);
3776 }
3777
3778 extern __inline __m256i
3779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3780 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3781                        __m128i __B)
3782 {
3783   return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3784                                                  (__v2di) __B,
3785                                                  (__v4di) __W,
3786                                                  (__mmask8) __U);
3787 }
3788
3789 extern __inline __m256i
3790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3791 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3792 {
3793   return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3794                                                  (__v2di) __B,
3795                                                  (__v4di)
3796                                                  _mm256_setzero_si256 (),
3797                                                  (__mmask8) __U);
3798 }
3799
3800 extern __inline __m128i
3801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3802 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3803                     __m128i __B)
3804 {
3805   return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3806                                                  (__v2di) __B,
3807                                                  (__v2di) __W,
3808                                                  (__mmask8) __U);
3809 }
3810
3811 extern __inline __m128i
3812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3813 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3814 {
3815   return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3816                                                  (__v2di) __B,
3817                                                  (__v2di)
3818                                                  _mm_setzero_di (),
3819                                                  (__mmask8) __U);
3820 }
3821
3822 extern __inline __m256i
3823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3824 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3825                        __m256i __B)
3826 {
3827   return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3828                                                  (__v8si) __B,
3829                                                  (__v8si) __W,
3830                                                  (__mmask8) __U);
3831 }
3832
3833 extern __inline __m256i
3834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3835 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3836 {
3837   return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3838                                                  (__v8si) __B,
3839                                                  (__v8si)
3840                                                  _mm256_setzero_si256 (),
3841                                                  (__mmask8) __U);
3842 }
3843
3844 extern __inline __m256d
3845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3846 _mm256_scalef_pd (__m256d __A, __m256d __B)
3847 {
3848   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3849                                                     (__v4df) __B,
3850                                                     (__v4df)
3851                                                     _mm256_setzero_pd (),
3852                                                     (__mmask8) -1);
3853 }
3854
3855 extern __inline __m256d
3856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3857 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3858                        __m256d __B)
3859 {
3860   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3861                                                     (__v4df) __B,
3862                                                     (__v4df) __W,
3863                                                     (__mmask8) __U);
3864 }
3865
3866 extern __inline __m256d
3867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3868 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3869 {
3870   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3871                                                     (__v4df) __B,
3872                                                     (__v4df)
3873                                                     _mm256_setzero_pd (),
3874                                                     (__mmask8) __U);
3875 }
3876
3877 extern __inline __m256
3878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3879 _mm256_scalef_ps (__m256 __A, __m256 __B)
3880 {
3881   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3882                                                    (__v8sf) __B,
3883                                                    (__v8sf)
3884                                                    _mm256_setzero_ps (),
3885                                                    (__mmask8) -1);
3886 }
3887
3888 extern __inline __m256
3889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3890 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3891                        __m256 __B)
3892 {
3893   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3894                                                    (__v8sf) __B,
3895                                                    (__v8sf) __W,
3896                                                    (__mmask8) __U);
3897 }
3898
3899 extern __inline __m256
3900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3901 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3902 {
3903   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3904                                                    (__v8sf) __B,
3905                                                    (__v8sf)
3906                                                    _mm256_setzero_ps (),
3907                                                    (__mmask8) __U);
3908 }
3909
3910 extern __inline __m128d
3911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3912 _mm_scalef_pd (__m128d __A, __m128d __B)
3913 {
3914   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3915                                                     (__v2df) __B,
3916                                                     (__v2df)
3917                                                     _mm_setzero_pd (),
3918                                                     (__mmask8) -1);
3919 }
3920
3921 extern __inline __m128d
3922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3923 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3924                     __m128d __B)
3925 {
3926   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3927                                                     (__v2df) __B,
3928                                                     (__v2df) __W,
3929                                                     (__mmask8) __U);
3930 }
3931
3932 extern __inline __m128d
3933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3934 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3935 {
3936   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3937                                                     (__v2df) __B,
3938                                                     (__v2df)
3939                                                     _mm_setzero_pd (),
3940                                                     (__mmask8) __U);
3941 }
3942
3943 extern __inline __m128
3944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3945 _mm_scalef_ps (__m128 __A, __m128 __B)
3946 {
3947   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3948                                                    (__v4sf) __B,
3949                                                    (__v4sf)
3950                                                    _mm_setzero_ps (),
3951                                                    (__mmask8) -1);
3952 }
3953
3954 extern __inline __m128
3955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3956 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3957 {
3958   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3959                                                    (__v4sf) __B,
3960                                                    (__v4sf) __W,
3961                                                    (__mmask8) __U);
3962 }
3963
3964 extern __inline __m128
3965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3966 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3967 {
3968   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3969                                                    (__v4sf) __B,
3970                                                    (__v4sf)
3971                                                    _mm_setzero_ps (),
3972                                                    (__mmask8) __U);
3973 }
3974
3975 extern __inline __m256d
3976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3978                       __m256d __C)
3979 {
3980   return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3981                                                     (__v4df) __B,
3982                                                     (__v4df) __C,
3983                                                     (__mmask8) __U);
3984 }
3985
3986 extern __inline __m256d
3987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
3989                        __mmask8 __U)
3990 {
3991   return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
3992                                                      (__v4df) __B,
3993                                                      (__v4df) __C,
3994                                                      (__mmask8) __U);
3995 }
3996
3997 extern __inline __m256d
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4000                        __m256d __C)
4001 {
4002   return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4003                                                      (__v4df) __B,
4004                                                      (__v4df) __C,
4005                                                      (__mmask8) __U);
4006 }
4007
4008 extern __inline __m128d
4009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4010 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4011 {
4012   return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4013                                                     (__v2df) __B,
4014                                                     (__v2df) __C,
4015                                                     (__mmask8) __U);
4016 }
4017
4018 extern __inline __m128d
4019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4020 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4021                     __mmask8 __U)
4022 {
4023   return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4024                                                      (__v2df) __B,
4025                                                      (__v2df) __C,
4026                                                      (__mmask8) __U);
4027 }
4028
4029 extern __inline __m128d
4030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4031 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4032                     __m128d __C)
4033 {
4034   return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4035                                                      (__v2df) __B,
4036                                                      (__v2df) __C,
4037                                                      (__mmask8) __U);
4038 }
4039
4040 extern __inline __m256
4041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4042 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4043 {
4044   return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4045                                                    (__v8sf) __B,
4046                                                    (__v8sf) __C,
4047                                                    (__mmask8) __U);
4048 }
4049
4050 extern __inline __m256
4051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4052 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4053                        __mmask8 __U)
4054 {
4055   return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4056                                                     (__v8sf) __B,
4057                                                     (__v8sf) __C,
4058                                                     (__mmask8) __U);
4059 }
4060
4061 extern __inline __m256
4062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4064                        __m256 __C)
4065 {
4066   return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4067                                                     (__v8sf) __B,
4068                                                     (__v8sf) __C,
4069                                                     (__mmask8) __U);
4070 }
4071
4072 extern __inline __m128
4073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4074 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4075 {
4076   return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4077                                                    (__v4sf) __B,
4078                                                    (__v4sf) __C,
4079                                                    (__mmask8) __U);
4080 }
4081
4082 extern __inline __m128
4083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4084 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4085 {
4086   return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4087                                                     (__v4sf) __B,
4088                                                     (__v4sf) __C,
4089                                                     (__mmask8) __U);
4090 }
4091
4092 extern __inline __m128
4093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4094 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4095 {
4096   return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4097                                                     (__v4sf) __B,
4098                                                     (__v4sf) __C,
4099                                                     (__mmask8) __U);
4100 }
4101
4102 extern __inline __m256d
4103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4104 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4105                       __m256d __C)
4106 {
4107   return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4108                                                     (__v4df) __B,
4109                                                     -(__v4df) __C,
4110                                                     (__mmask8) __U);
4111 }
4112
4113 extern __inline __m256d
4114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4115 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4116                        __mmask8 __U)
4117 {
4118   return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4119                                                      (__v4df) __B,
4120                                                      (__v4df) __C,
4121                                                      (__mmask8) __U);
4122 }
4123
4124 extern __inline __m256d
4125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4126 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4127                        __m256d __C)
4128 {
4129   return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4130                                                      (__v4df) __B,
4131                                                      -(__v4df) __C,
4132                                                      (__mmask8) __U);
4133 }
4134
4135 extern __inline __m128d
4136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4137 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4138 {
4139   return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4140                                                     (__v2df) __B,
4141                                                     -(__v2df) __C,
4142                                                     (__mmask8) __U);
4143 }
4144
4145 extern __inline __m128d
4146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4147 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4148                     __mmask8 __U)
4149 {
4150   return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4151                                                      (__v2df) __B,
4152                                                      (__v2df) __C,
4153                                                      (__mmask8) __U);
4154 }
4155
4156 extern __inline __m128d
4157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4158 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4159                     __m128d __C)
4160 {
4161   return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4162                                                      (__v2df) __B,
4163                                                      -(__v2df) __C,
4164                                                      (__mmask8) __U);
4165 }
4166
4167 extern __inline __m256
4168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4169 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4170 {
4171   return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4172                                                    (__v8sf) __B,
4173                                                    -(__v8sf) __C,
4174                                                    (__mmask8) __U);
4175 }
4176
4177 extern __inline __m256
4178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4179 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4180                        __mmask8 __U)
4181 {
4182   return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4183                                                     (__v8sf) __B,
4184                                                     (__v8sf) __C,
4185                                                     (__mmask8) __U);
4186 }
4187
4188 extern __inline __m256
4189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4190 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4191                        __m256 __C)
4192 {
4193   return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4194                                                     (__v8sf) __B,
4195                                                     -(__v8sf) __C,
4196                                                     (__mmask8) __U);
4197 }
4198
4199 extern __inline __m128
4200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4201 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4202 {
4203   return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4204                                                    (__v4sf) __B,
4205                                                    -(__v4sf) __C,
4206                                                    (__mmask8) __U);
4207 }
4208
4209 extern __inline __m128
4210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4211 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4212 {
4213   return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4214                                                     (__v4sf) __B,
4215                                                     (__v4sf) __C,
4216                                                     (__mmask8) __U);
4217 }
4218
4219 extern __inline __m128
4220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4221 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4222 {
4223   return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4224                                                     (__v4sf) __B,
4225                                                     -(__v4sf) __C,
4226                                                     (__mmask8) __U);
4227 }
4228
4229 extern __inline __m256d
4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4232                          __m256d __C)
4233 {
4234   return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4235                                                        (__v4df) __B,
4236                                                        (__v4df) __C,
4237                                                        (__mmask8) __U);
4238 }
4239
4240 extern __inline __m256d
4241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4243                           __mmask8 __U)
4244 {
4245   return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4246                                                         (__v4df) __B,
4247                                                         (__v4df) __C,
4248                                                         (__mmask8)
4249                                                         __U);
4250 }
4251
4252 extern __inline __m256d
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4255                           __m256d __C)
4256 {
4257   return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4258                                                         (__v4df) __B,
4259                                                         (__v4df) __C,
4260                                                         (__mmask8)
4261                                                         __U);
4262 }
4263
4264 extern __inline __m128d
4265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4266 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4267                       __m128d __C)
4268 {
4269   return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4270                                                        (__v2df) __B,
4271                                                        (__v2df) __C,
4272                                                        (__mmask8) __U);
4273 }
4274
4275 extern __inline __m128d
4276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4277 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4278                        __mmask8 __U)
4279 {
4280   return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4281                                                         (__v2df) __B,
4282                                                         (__v2df) __C,
4283                                                         (__mmask8)
4284                                                         __U);
4285 }
4286
4287 extern __inline __m128d
4288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4289 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4290                        __m128d __C)
4291 {
4292   return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4293                                                         (__v2df) __B,
4294                                                         (__v2df) __C,
4295                                                         (__mmask8)
4296                                                         __U);
4297 }
4298
4299 extern __inline __m256
4300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4301 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4302                          __m256 __C)
4303 {
4304   return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4305                                                       (__v8sf) __B,
4306                                                       (__v8sf) __C,
4307                                                       (__mmask8) __U);
4308 }
4309
4310 extern __inline __m256
4311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4312 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4313                           __mmask8 __U)
4314 {
4315   return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4316                                                        (__v8sf) __B,
4317                                                        (__v8sf) __C,
4318                                                        (__mmask8) __U);
4319 }
4320
4321 extern __inline __m256
4322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4323 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4324                           __m256 __C)
4325 {
4326   return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4327                                                        (__v8sf) __B,
4328                                                        (__v8sf) __C,
4329                                                        (__mmask8) __U);
4330 }
4331
4332 extern __inline __m128
4333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4334 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4335 {
4336   return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4337                                                       (__v4sf) __B,
4338                                                       (__v4sf) __C,
4339                                                       (__mmask8) __U);
4340 }
4341
4342 extern __inline __m128
4343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4344 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4345                        __mmask8 __U)
4346 {
4347   return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4348                                                        (__v4sf) __B,
4349                                                        (__v4sf) __C,
4350                                                        (__mmask8) __U);
4351 }
4352
4353 extern __inline __m128
4354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4355 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4356                        __m128 __C)
4357 {
4358   return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4359                                                        (__v4sf) __B,
4360                                                        (__v4sf) __C,
4361                                                        (__mmask8) __U);
4362 }
4363
4364 extern __inline __m256d
4365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4366 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4367                          __m256d __C)
4368 {
4369   return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4370                                                        (__v4df) __B,
4371                                                        -(__v4df) __C,
4372                                                        (__mmask8) __U);
4373 }
4374
4375 extern __inline __m256d
4376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4377 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4378                           __mmask8 __U)
4379 {
4380   return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4381                                                         (__v4df) __B,
4382                                                         (__v4df) __C,
4383                                                         (__mmask8)
4384                                                         __U);
4385 }
4386
4387 extern __inline __m256d
4388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4390                           __m256d __C)
4391 {
4392   return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4393                                                         (__v4df) __B,
4394                                                         -(__v4df) __C,
4395                                                         (__mmask8)
4396                                                         __U);
4397 }
4398
4399 extern __inline __m128d
4400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4401 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4402                       __m128d __C)
4403 {
4404   return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4405                                                        (__v2df) __B,
4406                                                        -(__v2df) __C,
4407                                                        (__mmask8) __U);
4408 }
4409
4410 extern __inline __m128d
4411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4412 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4413                        __mmask8 __U)
4414 {
4415   return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4416                                                         (__v2df) __B,
4417                                                         (__v2df) __C,
4418                                                         (__mmask8)
4419                                                         __U);
4420 }
4421
4422 extern __inline __m128d
4423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4424 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4425                        __m128d __C)
4426 {
4427   return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4428                                                         (__v2df) __B,
4429                                                         -(__v2df) __C,
4430                                                         (__mmask8)
4431                                                         __U);
4432 }
4433
4434 extern __inline __m256
4435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4436 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4437                          __m256 __C)
4438 {
4439   return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4440                                                       (__v8sf) __B,
4441                                                       -(__v8sf) __C,
4442                                                       (__mmask8) __U);
4443 }
4444
4445 extern __inline __m256
4446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4447 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4448                           __mmask8 __U)
4449 {
4450   return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4451                                                        (__v8sf) __B,
4452                                                        (__v8sf) __C,
4453                                                        (__mmask8) __U);
4454 }
4455
4456 extern __inline __m256
4457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4458 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4459                           __m256 __C)
4460 {
4461   return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4462                                                        (__v8sf) __B,
4463                                                        -(__v8sf) __C,
4464                                                        (__mmask8) __U);
4465 }
4466
4467 extern __inline __m128
4468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4470 {
4471   return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4472                                                       (__v4sf) __B,
4473                                                       -(__v4sf) __C,
4474                                                       (__mmask8) __U);
4475 }
4476
4477 extern __inline __m128
4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4480                        __mmask8 __U)
4481 {
4482   return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4483                                                        (__v4sf) __B,
4484                                                        (__v4sf) __C,
4485                                                        (__mmask8) __U);
4486 }
4487
4488 extern __inline __m128
4489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4490 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4491                        __m128 __C)
4492 {
4493   return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4494                                                        (__v4sf) __B,
4495                                                        -(__v4sf) __C,
4496                                                        (__mmask8) __U);
4497 }
4498
4499 extern __inline __m256d
4500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4501 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4502                        __m256d __C)
4503 {
4504   return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4505                                                      (__v4df) __B,
4506                                                      (__v4df) __C,
4507                                                      (__mmask8) __U);
4508 }
4509
4510 extern __inline __m256d
4511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4512 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4513                         __mmask8 __U)
4514 {
4515   return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4516                                                      (__v4df) __B,
4517                                                      (__v4df) __C,
4518                                                      (__mmask8) __U);
4519 }
4520
4521 extern __inline __m256d
4522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4523 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4524                         __m256d __C)
4525 {
4526   return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4527                                                      (__v4df) __B,
4528                                                      (__v4df) __C,
4529                                                      (__mmask8) __U);
4530 }
4531
4532 extern __inline __m128d
4533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4534 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4535                     __m128d __C)
4536 {
4537   return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4538                                                      (__v2df) __B,
4539                                                      (__v2df) __C,
4540                                                      (__mmask8) __U);
4541 }
4542
4543 extern __inline __m128d
4544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4545 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4546                      __mmask8 __U)
4547 {
4548   return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4549                                                      (__v2df) __B,
4550                                                      (__v2df) __C,
4551                                                      (__mmask8) __U);
4552 }
4553
4554 extern __inline __m128d
4555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4556 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4557                      __m128d __C)
4558 {
4559   return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4560                                                      (__v2df) __B,
4561                                                      (__v2df) __C,
4562                                                      (__mmask8) __U);
4563 }
4564
4565 extern __inline __m256
4566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4567 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4568                        __m256 __C)
4569 {
4570   return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4571                                                     (__v8sf) __B,
4572                                                     (__v8sf) __C,
4573                                                     (__mmask8) __U);
4574 }
4575
4576 extern __inline __m256
4577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4579                         __mmask8 __U)
4580 {
4581   return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4582                                                     (__v8sf) __B,
4583                                                     (__v8sf) __C,
4584                                                     (__mmask8) __U);
4585 }
4586
4587 extern __inline __m256
4588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4589 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4590                         __m256 __C)
4591 {
4592   return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4593                                                     (__v8sf) __B,
4594                                                     (__v8sf) __C,
4595                                                     (__mmask8) __U);
4596 }
4597
4598 extern __inline __m128
4599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4601 {
4602   return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4603                                                     (__v4sf) __B,
4604                                                     (__v4sf) __C,
4605                                                     (__mmask8) __U);
4606 }
4607
4608 extern __inline __m128
4609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4610 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4611 {
4612   return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4613                                                     (__v4sf) __B,
4614                                                     (__v4sf) __C,
4615                                                     (__mmask8) __U);
4616 }
4617
4618 extern __inline __m128
4619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4620 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4621 {
4622   return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4623                                                     (__v4sf) __B,
4624                                                     (__v4sf) __C,
4625                                                     (__mmask8) __U);
4626 }
4627
4628 extern __inline __m256d
4629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4630 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4631                        __m256d __C)
4632 {
4633   return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4634                                                      (__v4df) __B,
4635                                                      (__v4df) __C,
4636                                                      (__mmask8) __U);
4637 }
4638
4639 extern __inline __m256d
4640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4641 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4642                         __mmask8 __U)
4643 {
4644   return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4645                                                       (__v4df) __B,
4646                                                       (__v4df) __C,
4647                                                       (__mmask8) __U);
4648 }
4649
4650 extern __inline __m256d
4651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4652 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4653                         __m256d __C)
4654 {
4655   return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4656                                                      (__v4df) __B,
4657                                                      -(__v4df) __C,
4658                                                      (__mmask8) __U);
4659 }
4660
4661 extern __inline __m128d
4662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4663 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4664                     __m128d __C)
4665 {
4666   return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4667                                                      (__v2df) __B,
4668                                                      (__v2df) __C,
4669                                                      (__mmask8) __U);
4670 }
4671
4672 extern __inline __m128d
4673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4675                      __mmask8 __U)
4676 {
4677   return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4678                                                       (__v2df) __B,
4679                                                       (__v2df) __C,
4680                                                       (__mmask8) __U);
4681 }
4682
4683 extern __inline __m128d
4684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4685 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4686                      __m128d __C)
4687 {
4688   return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4689                                                      (__v2df) __B,
4690                                                      -(__v2df) __C,
4691                                                      (__mmask8) __U);
4692 }
4693
4694 extern __inline __m256
4695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4696 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4697                        __m256 __C)
4698 {
4699   return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4700                                                     (__v8sf) __B,
4701                                                     (__v8sf) __C,
4702                                                     (__mmask8) __U);
4703 }
4704
4705 extern __inline __m256
4706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4707 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4708                         __mmask8 __U)
4709 {
4710   return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4711                                                      (__v8sf) __B,
4712                                                      (__v8sf) __C,
4713                                                      (__mmask8) __U);
4714 }
4715
4716 extern __inline __m256
4717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4718 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4719                         __m256 __C)
4720 {
4721   return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4722                                                     (__v8sf) __B,
4723                                                     -(__v8sf) __C,
4724                                                     (__mmask8) __U);
4725 }
4726
4727 extern __inline __m128
4728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4729 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4730 {
4731   return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4732                                                     (__v4sf) __B,
4733                                                     (__v4sf) __C,
4734                                                     (__mmask8) __U);
4735 }
4736
4737 extern __inline __m128
4738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4739 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4740 {
4741   return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4742                                                      (__v4sf) __B,
4743                                                      (__v4sf) __C,
4744                                                      (__mmask8) __U);
4745 }
4746
4747 extern __inline __m128
4748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4749 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4750 {
4751   return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4752                                                     (__v4sf) __B,
4753                                                     -(__v4sf) __C,
4754                                                     (__mmask8) __U);
4755 }
4756
4757 extern __inline __m128i
4758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4759 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4760                     __m128i __B)
4761 {
4762   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4763                                                  (__v4si) __B,
4764                                                  (__v4si) __W,
4765                                                  (__mmask8) __U);
4766 }
4767
4768 extern __inline __m128i
4769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4770 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4771 {
4772   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4773                                                  (__v4si) __B,
4774                                                  (__v4si)
4775                                                  _mm_setzero_si128 (),
4776                                                  (__mmask8) __U);
4777 }
4778
4779 extern __inline __m256i
4780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4781 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4782                           __m256i __B)
4783 {
4784   return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4785                                                   (__v8si) __B,
4786                                                   (__v8si) __W,
4787                                                   (__mmask8) __U);
4788 }
4789
4790 extern __inline __m256i
4791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4792 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4793 {
4794   return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4795                                                   (__v8si) __B,
4796                                                   (__v8si)
4797                                                   _mm256_setzero_si256 (),
4798                                                   (__mmask8) __U);
4799 }
4800
4801 extern __inline __m128i
4802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4803 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4804                        __m128i __B)
4805 {
4806   return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4807                                                   (__v4si) __B,
4808                                                   (__v4si) __W,
4809                                                   (__mmask8) __U);
4810 }
4811
4812 extern __inline __m128i
4813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4814 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4815 {
4816   return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4817                                                   (__v4si) __B,
4818                                                   (__v4si)
4819                                                   _mm_setzero_si128 (),
4820                                                   (__mmask8) __U);
4821 }
4822
4823 extern __inline __m256i
4824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4825 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4826                       __m256i __B)
4827 {
4828   return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4829                                                 (__v8si) __B,
4830                                                 (__v8si) __W,
4831                                                 (__mmask8) __U);
4832 }
4833
4834 extern __inline __m256i
4835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4836 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4837 {
4838   return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4839                                                 (__v8si) __B,
4840                                                 (__v8si)
4841                                                 _mm256_setzero_si256 (),
4842                                                 (__mmask8) __U);
4843 }
4844
4845 extern __inline __m128i
4846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4847 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4848 {
4849   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4850                                                 (__v4si) __B,
4851                                                 (__v4si) __W,
4852                                                 (__mmask8) __U);
4853 }
4854
4855 extern __inline __m128i
4856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4857 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4858 {
4859   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4860                                                 (__v4si) __B,
4861                                                 (__v4si)
4862                                                 _mm_setzero_si128 (),
4863                                                 (__mmask8) __U);
4864 }
4865
4866 extern __inline __m256i
4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4869                        __m256i __B)
4870 {
4871   return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4872                                                  (__v8si) __B,
4873                                                  (__v8si) __W,
4874                                                  (__mmask8) __U);
4875 }
4876
4877 extern __inline __m256i
4878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4880 {
4881   return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4882                                                  (__v8si) __B,
4883                                                  (__v8si)
4884                                                  _mm256_setzero_si256 (),
4885                                                  (__mmask8) __U);
4886 }
4887
4888 extern __inline __m128i
4889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4890 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4891                     __m128i __B)
4892 {
4893   return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4894                                                  (__v4si) __B,
4895                                                  (__v4si) __W,
4896                                                  (__mmask8) __U);
4897 }
4898
4899 extern __inline __m128i
4900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4901 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4902 {
4903   return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4904                                                  (__v4si) __B,
4905                                                  (__v4si)
4906                                                  _mm_setzero_si128 (),
4907                                                  (__mmask8) __U);
4908 }
4909
4910 extern __inline __m128
4911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4912 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4913 {
4914   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4915                                                 (__v4sf) __W,
4916                                                 (__mmask8) __U);
4917 }
4918
4919 extern __inline __m128
4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4922 {
4923   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4924                                                 (__v4sf)
4925                                                 _mm_setzero_ps (),
4926                                                 (__mmask8) __U);
4927 }
4928
4929 extern __inline __m128
4930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4931 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4932 {
4933   return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4934                                                    (__v4sf) __W,
4935                                                    (__mmask8) __U);
4936 }
4937
4938 extern __inline __m128
4939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4941 {
4942   return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4943                                                    (__v4sf)
4944                                                    _mm_setzero_ps (),
4945                                                    (__mmask8) __U);
4946 }
4947
4948 extern __inline __m256i
4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4950 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4951 {
4952   return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4953                                                     (__v8si) __W,
4954                                                     (__mmask8) __U);
4955 }
4956
4957 extern __inline __m256i
4958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4959 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4960 {
4961   return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4962                                                     (__v8si)
4963                                                     _mm256_setzero_si256 (),
4964                                                     (__mmask8) __U);
4965 }
4966
4967 extern __inline __m128i
4968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4969 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4970 {
4971   return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4972                                                     (__v4si) __W,
4973                                                     (__mmask8) __U);
4974 }
4975
4976 extern __inline __m128i
4977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4978 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
4979 {
4980   return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4981                                                     (__v4si)
4982                                                     _mm_setzero_si128 (),
4983                                                     (__mmask8) __U);
4984 }
4985
4986 extern __inline __m256i
4987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4988 _mm256_cvtps_epu32 (__m256 __A)
4989 {
4990   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
4991                                                      (__v8si)
4992                                                      _mm256_setzero_si256 (),
4993                                                      (__mmask8) -1);
4994 }
4995
4996 extern __inline __m256i
4997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4998 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
4999 {
5000   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5001                                                      (__v8si) __W,
5002                                                      (__mmask8) __U);
5003 }
5004
5005 extern __inline __m256i
5006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5007 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5008 {
5009   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5010                                                      (__v8si)
5011                                                      _mm256_setzero_si256 (),
5012                                                      (__mmask8) __U);
5013 }
5014
5015 extern __inline __m128i
5016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5017 _mm_cvtps_epu32 (__m128 __A)
5018 {
5019   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5020                                                      (__v4si)
5021                                                      _mm_setzero_si128 (),
5022                                                      (__mmask8) -1);
5023 }
5024
5025 extern __inline __m128i
5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5028 {
5029   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5030                                                      (__v4si) __W,
5031                                                      (__mmask8) __U);
5032 }
5033
5034 extern __inline __m128i
5035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5036 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5037 {
5038   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5039                                                      (__v4si)
5040                                                      _mm_setzero_si128 (),
5041                                                      (__mmask8) __U);
5042 }
5043
5044 extern __inline __m256d
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5046 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5047 {
5048   return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5049                                                    (__v4df) __W,
5050                                                    (__mmask8) __U);
5051 }
5052
5053 extern __inline __m256d
5054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5055 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5056 {
5057   return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5058                                                    (__v4df)
5059                                                    _mm256_setzero_pd (),
5060                                                    (__mmask8) __U);
5061 }
5062
5063 extern __inline __m128d
5064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5065 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5066 {
5067   return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5068                                                    (__v2df) __W,
5069                                                    (__mmask8) __U);
5070 }
5071
5072 extern __inline __m128d
5073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5074 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5075 {
5076   return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5077                                                    (__v2df)
5078                                                    _mm_setzero_pd (),
5079                                                    (__mmask8) __U);
5080 }
5081
5082 extern __inline __m256
5083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5084 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5085 {
5086   return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5087                                                    (__v8sf) __W,
5088                                                    (__mmask8) __U);
5089 }
5090
5091 extern __inline __m256
5092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5093 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5094 {
5095   return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5096                                                    (__v8sf)
5097                                                    _mm256_setzero_ps (),
5098                                                    (__mmask8) __U);
5099 }
5100
5101 extern __inline __m128
5102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5103 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5104 {
5105   return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5106                                                    (__v4sf) __W,
5107                                                    (__mmask8) __U);
5108 }
5109
5110 extern __inline __m128
5111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5112 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5113 {
5114   return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5115                                                    (__v4sf)
5116                                                    _mm_setzero_ps (),
5117                                                    (__mmask8) __U);
5118 }
5119
5120 extern __inline __m256
5121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5122 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5123 {
5124   return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5125                                                    (__v8sf) __W,
5126                                                    (__mmask8) __U);
5127 }
5128
5129 extern __inline __m256
5130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5131 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5132 {
5133   return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5134                                                    (__v8sf)
5135                                                    _mm256_setzero_ps (),
5136                                                    (__mmask8) __U);
5137 }
5138
5139 extern __inline __m128
5140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5141 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5142 {
5143   return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5144                                                    (__v4sf) __W,
5145                                                    (__mmask8) __U);
5146 }
5147
5148 extern __inline __m128
5149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5150 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5151 {
5152   return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5153                                                    (__v4sf)
5154                                                    _mm_setzero_ps (),
5155                                                    (__mmask8) __U);
5156 }
5157
5158 extern __inline __m128i
5159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5160 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5161                          __m128i __B)
5162 {
5163   return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5164                                                      (__v4si) __B,
5165                                                      (__v4si) __W,
5166                                                      (__mmask8) __U);
5167 }
5168
5169 extern __inline __m128i
5170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5171 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5172 {
5173   return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5174                                                      (__v4si) __B,
5175                                                      (__v4si)
5176                                                      _mm_setzero_si128 (),
5177                                                      (__mmask8) __U);
5178 }
5179
5180 extern __inline __m256i
5181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5183                             __m256i __B)
5184 {
5185   return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5186                                                      (__v8si) __B,
5187                                                      (__v8si) __W,
5188                                                      (__mmask8) __U);
5189 }
5190
5191 extern __inline __m256i
5192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5193 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5194 {
5195   return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5196                                                      (__v8si) __B,
5197                                                      (__v8si)
5198                                                      _mm256_setzero_si256 (),
5199                                                      (__mmask8) __U);
5200 }
5201
5202 extern __inline __m128i
5203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5204 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5205                          __m128i __B)
5206 {
5207   return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5208                                                       (__v2di) __B,
5209                                                       (__v2di) __W,
5210                                                       (__mmask8) __U);
5211 }
5212
5213 extern __inline __m128i
5214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5216 {
5217   return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5218                                                       (__v2di) __B,
5219                                                       (__v2di)
5220                                                       _mm_setzero_di (),
5221                                                       (__mmask8) __U);
5222 }
5223
5224 extern __inline __m256i
5225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5226 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5227                             __m256i __B)
5228 {
5229   return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5230                                                       (__v4di) __B,
5231                                                       (__v4di) __W,
5232                                                       (__mmask8) __U);
5233 }
5234
5235 extern __inline __m256i
5236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5237 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5238 {
5239   return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5240                                                       (__v4di) __B,
5241                                                       (__v4di)
5242                                                       _mm256_setzero_si256 (),
5243                                                       (__mmask8) __U);
5244 }
5245
5246 extern __inline __m128i
5247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5248 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5249                          __m128i __B)
5250 {
5251   return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5252                                                      (__v4si) __B,
5253                                                      (__v4si) __W,
5254                                                      (__mmask8) __U);
5255 }
5256
5257 extern __inline __m128i
5258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5259 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5260 {
5261   return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5262                                                      (__v4si) __B,
5263                                                      (__v4si)
5264                                                      _mm_setzero_si128 (),
5265                                                      (__mmask8) __U);
5266 }
5267
5268 extern __inline __m256i
5269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5270 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5271                             __m256i __B)
5272 {
5273   return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5274                                                      (__v8si) __B,
5275                                                      (__v8si) __W,
5276                                                      (__mmask8) __U);
5277 }
5278
5279 extern __inline __m256i
5280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5281 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5282 {
5283   return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5284                                                      (__v8si) __B,
5285                                                      (__v8si)
5286                                                      _mm256_setzero_si256 (),
5287                                                      (__mmask8) __U);
5288 }
5289
5290 extern __inline __m128i
5291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5292 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5293                          __m128i __B)
5294 {
5295   return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5296                                                       (__v2di) __B,
5297                                                       (__v2di) __W,
5298                                                       (__mmask8) __U);
5299 }
5300
5301 extern __inline __m128i
5302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5304 {
5305   return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5306                                                       (__v2di) __B,
5307                                                       (__v2di)
5308                                                       _mm_setzero_di (),
5309                                                       (__mmask8) __U);
5310 }
5311
5312 extern __inline __m256i
5313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5314 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5315                             __m256i __B)
5316 {
5317   return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5318                                                       (__v4di) __B,
5319                                                       (__v4di) __W,
5320                                                       (__mmask8) __U);
5321 }
5322
5323 extern __inline __m256i
5324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5325 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5326 {
5327   return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5328                                                       (__v4di) __B,
5329                                                       (__v4di)
5330                                                       _mm256_setzero_si256 (),
5331                                                       (__mmask8) __U);
5332 }
5333
5334 extern __inline __mmask8
5335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5336 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5337 {
5338   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5339                                                    (__v4si) __B, 0,
5340                                                    (__mmask8) -1);
5341 }
5342
5343 extern __inline __mmask8
5344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5345 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5346 {
5347   return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5348                                                     (__v4si) __B,
5349                                                     (__mmask8) -1);
5350 }
5351
5352 extern __inline __mmask8
5353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5354 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5355 {
5356   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5357                                                    (__v4si) __B, 0, __U);
5358 }
5359
5360 extern __inline __mmask8
5361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5363 {
5364   return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5365                                                     (__v4si) __B, __U);
5366 }
5367
5368 extern __inline __mmask8
5369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5371 {
5372   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5373                                                    (__v8si) __B, 0,
5374                                                    (__mmask8) -1);
5375 }
5376
5377 extern __inline __mmask8
5378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5379 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5380 {
5381   return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5382                                                     (__v8si) __B,
5383                                                     (__mmask8) -1);
5384 }
5385
5386 extern __inline __mmask8
5387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5388 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5389 {
5390   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5391                                                    (__v8si) __B, 0, __U);
5392 }
5393
5394 extern __inline __mmask8
5395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5396 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5397 {
5398   return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5399                                                     (__v8si) __B, __U);
5400 }
5401
5402 extern __inline __mmask8
5403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5404 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5405 {
5406   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5407                                                    (__v2di) __B, 0,
5408                                                    (__mmask8) -1);
5409 }
5410
5411 extern __inline __mmask8
5412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5413 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5414 {
5415   return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5416                                                     (__v2di) __B,
5417                                                     (__mmask8) -1);
5418 }
5419
5420 extern __inline __mmask8
5421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5422 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5423 {
5424   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5425                                                    (__v2di) __B, 0, __U);
5426 }
5427
5428 extern __inline __mmask8
5429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5430 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5431 {
5432   return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5433                                                     (__v2di) __B, __U);
5434 }
5435
5436 extern __inline __mmask8
5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5438 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5439 {
5440   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5441                                                    (__v4di) __B, 0,
5442                                                    (__mmask8) -1);
5443 }
5444
5445 extern __inline __mmask8
5446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5447 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5448 {
5449   return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5450                                                     (__v4di) __B,
5451                                                     (__mmask8) -1);
5452 }
5453
5454 extern __inline __mmask8
5455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5456 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5457 {
5458   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5459                                                    (__v4di) __B, 0, __U);
5460 }
5461
5462 extern __inline __mmask8
5463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5464 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5465 {
5466   return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5467                                                     (__v4di) __B, __U);
5468 }
5469
5470 extern __inline __mmask8
5471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5472 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5473 {
5474   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5475                                                    (__v4si) __B, 6,
5476                                                    (__mmask8) -1);
5477 }
5478
5479 extern __inline __mmask8
5480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5481 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5482 {
5483   return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5484                                                     (__v4si) __B,
5485                                                     (__mmask8) -1);
5486 }
5487
5488 extern __inline __mmask8
5489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5490 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5491 {
5492   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5493                                                    (__v4si) __B, 6, __U);
5494 }
5495
5496 extern __inline __mmask8
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5499 {
5500   return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5501                                                     (__v4si) __B, __U);
5502 }
5503
5504 extern __inline __mmask8
5505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5506 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5507 {
5508   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5509                                                    (__v8si) __B, 6,
5510                                                    (__mmask8) -1);
5511 }
5512
5513 extern __inline __mmask8
5514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5515 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5516 {
5517   return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5518                                                     (__v8si) __B,
5519                                                     (__mmask8) -1);
5520 }
5521
5522 extern __inline __mmask8
5523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5525 {
5526   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5527                                                    (__v8si) __B, 6, __U);
5528 }
5529
5530 extern __inline __mmask8
5531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5532 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5533 {
5534   return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5535                                                     (__v8si) __B, __U);
5536 }
5537
5538 extern __inline __mmask8
5539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5540 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5541 {
5542   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5543                                                    (__v2di) __B, 6,
5544                                                    (__mmask8) -1);
5545 }
5546
5547 extern __inline __mmask8
5548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5549 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5550 {
5551   return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5552                                                     (__v2di) __B,
5553                                                     (__mmask8) -1);
5554 }
5555
5556 extern __inline __mmask8
5557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5558 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5559 {
5560   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5561                                                    (__v2di) __B, 6, __U);
5562 }
5563
5564 extern __inline __mmask8
5565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5566 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5567 {
5568   return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5569                                                     (__v2di) __B, __U);
5570 }
5571
5572 extern __inline __mmask8
5573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5574 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5575 {
5576   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5577                                                    (__v4di) __B, 6,
5578                                                    (__mmask8) -1);
5579 }
5580
5581 extern __inline __mmask8
5582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5583 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5584 {
5585   return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5586                                                     (__v4di) __B,
5587                                                     (__mmask8) -1);
5588 }
5589
5590 extern __inline __mmask8
5591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5592 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5593 {
5594   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5595                                                    (__v4di) __B, 6, __U);
5596 }
5597
5598 extern __inline __mmask8
5599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5600 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5601 {
5602   return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5603                                                     (__v4di) __B, __U);
5604 }
5605
5606 extern __inline __mmask8
5607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5608 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5609 {
5610   return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5611                                                (__v4si) __B,
5612                                                (__mmask8) -1);
5613 }
5614
5615 extern __inline __mmask8
5616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5617 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5618 {
5619   return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5620                                                (__v4si) __B, __U);
5621 }
5622
5623 extern __inline __mmask8
5624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5625 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5626 {
5627   return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5628                                                (__v8si) __B,
5629                                                (__mmask8) -1);
5630 }
5631
5632 extern __inline __mmask8
5633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5634 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5635 {
5636   return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5637                                                (__v8si) __B, __U);
5638 }
5639
5640 extern __inline __mmask8
5641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5642 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5643 {
5644   return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5645                                                (__v2di) __B,
5646                                                (__mmask8) -1);
5647 }
5648
5649 extern __inline __mmask8
5650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5651 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5652 {
5653   return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5654                                                (__v2di) __B, __U);
5655 }
5656
5657 extern __inline __mmask8
5658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5659 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5660 {
5661   return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5662                                                (__v4di) __B,
5663                                                (__mmask8) -1);
5664 }
5665
5666 extern __inline __mmask8
5667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5668 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5669 {
5670   return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5671                                                (__v4di) __B, __U);
5672 }
5673
5674 extern __inline __mmask8
5675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5676 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5677 {
5678   return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5679                                                 (__v4si) __B,
5680                                                 (__mmask8) -1);
5681 }
5682
5683 extern __inline __mmask8
5684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5685 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5686 {
5687   return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5688                                                 (__v4si) __B, __U);
5689 }
5690
5691 extern __inline __mmask8
5692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5693 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5694 {
5695   return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5696                                                 (__v8si) __B,
5697                                                 (__mmask8) -1);
5698 }
5699
5700 extern __inline __mmask8
5701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5702 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5703 {
5704   return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5705                                                 (__v8si) __B, __U);
5706 }
5707
5708 extern __inline __mmask8
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5711 {
5712   return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5713                                                 (__v2di) __B,
5714                                                 (__mmask8) -1);
5715 }
5716
5717 extern __inline __mmask8
5718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5719 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5720 {
5721   return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5722                                                 (__v2di) __B, __U);
5723 }
5724
5725 extern __inline __mmask8
5726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5727 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5728 {
5729   return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5730                                                 (__v4di) __B,
5731                                                 (__mmask8) -1);
5732 }
5733
5734 extern __inline __mmask8
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5737 {
5738   return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5739                                                 (__v4di) __B, __U);
5740 }
5741
5742 extern __inline __m256d
5743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5744 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5745 {
5746   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5747                                                       (__v4df) __W,
5748                                                       (__mmask8) __U);
5749 }
5750
5751 extern __inline __m256d
5752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5753 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5754 {
5755   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5756                                                       (__v4df)
5757                                                       _mm256_setzero_pd (),
5758                                                       (__mmask8) __U);
5759 }
5760
5761 extern __inline void
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5764 {
5765   __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5766                                           (__v4df) __A,
5767                                           (__mmask8) __U);
5768 }
5769
5770 extern __inline __m128d
5771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5772 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5773 {
5774   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5775                                                       (__v2df) __W,
5776                                                       (__mmask8) __U);
5777 }
5778
5779 extern __inline __m128d
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5782 {
5783   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5784                                                       (__v2df)
5785                                                       _mm_setzero_pd (),
5786                                                       (__mmask8) __U);
5787 }
5788
5789 extern __inline void
5790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5791 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5792 {
5793   __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5794                                           (__v2df) __A,
5795                                           (__mmask8) __U);
5796 }
5797
5798 extern __inline __m256
5799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5801 {
5802   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5803                                                      (__v8sf) __W,
5804                                                      (__mmask8) __U);
5805 }
5806
5807 extern __inline __m256
5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5810 {
5811   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5812                                                      (__v8sf)
5813                                                      _mm256_setzero_ps (),
5814                                                      (__mmask8) __U);
5815 }
5816
5817 extern __inline void
5818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5819 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5820 {
5821   __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5822                                           (__v8sf) __A,
5823                                           (__mmask8) __U);
5824 }
5825
5826 extern __inline __m128
5827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5829 {
5830   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5831                                                      (__v4sf) __W,
5832                                                      (__mmask8) __U);
5833 }
5834
5835 extern __inline __m128
5836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5838 {
5839   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5840                                                      (__v4sf)
5841                                                      _mm_setzero_ps (),
5842                                                      (__mmask8) __U);
5843 }
5844
5845 extern __inline void
5846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5847 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5848 {
5849   __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5850                                           (__v4sf) __A,
5851                                           (__mmask8) __U);
5852 }
5853
5854 extern __inline __m256i
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5857 {
5858   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5859                                                       (__v4di) __W,
5860                                                       (__mmask8) __U);
5861 }
5862
5863 extern __inline __m256i
5864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5866 {
5867   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5868                                                       (__v4di)
5869                                                       _mm256_setzero_si256 (),
5870                                                       (__mmask8) __U);
5871 }
5872
5873 extern __inline void
5874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5875 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5876 {
5877   __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5878                                           (__v4di) __A,
5879                                           (__mmask8) __U);
5880 }
5881
5882 extern __inline __m128i
5883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5885 {
5886   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5887                                                       (__v2di) __W,
5888                                                       (__mmask8) __U);
5889 }
5890
5891 extern __inline __m128i
5892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5894 {
5895   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5896                                                       (__v2di)
5897                                                       _mm_setzero_di (),
5898                                                       (__mmask8) __U);
5899 }
5900
5901 extern __inline void
5902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5903 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5904 {
5905   __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5906                                           (__v2di) __A,
5907                                           (__mmask8) __U);
5908 }
5909
5910 extern __inline __m256i
5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5913 {
5914   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5915                                                       (__v8si) __W,
5916                                                       (__mmask8) __U);
5917 }
5918
5919 extern __inline __m256i
5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5922 {
5923   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5924                                                       (__v8si)
5925                                                       _mm256_setzero_si256 (),
5926                                                       (__mmask8) __U);
5927 }
5928
5929 extern __inline void
5930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5931 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5932 {
5933   __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5934                                           (__v8si) __A,
5935                                           (__mmask8) __U);
5936 }
5937
5938 extern __inline __m128i
5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5941 {
5942   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5943                                                       (__v4si) __W,
5944                                                       (__mmask8) __U);
5945 }
5946
5947 extern __inline __m128i
5948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5950 {
5951   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5952                                                       (__v4si)
5953                                                       _mm_setzero_si128 (),
5954                                                       (__mmask8) __U);
5955 }
5956
5957 extern __inline void
5958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5959 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5960 {
5961   __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5962                                           (__v4si) __A,
5963                                           (__mmask8) __U);
5964 }
5965
5966 extern __inline __m256d
5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5969 {
5970   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5971                                                     (__v4df) __W,
5972                                                     (__mmask8) __U);
5973 }
5974
5975 extern __inline __m256d
5976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5978 {
5979   return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5980                                                      (__v4df)
5981                                                      _mm256_setzero_pd (),
5982                                                      (__mmask8) __U);
5983 }
5984
5985 extern __inline __m256d
5986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5987 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5988 {
5989   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
5990                                                         (__v4df) __W,
5991                                                         (__mmask8)
5992                                                         __U);
5993 }
5994
5995 extern __inline __m256d
5996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5997 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
5998 {
5999   return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6000                                                          (__v4df)
6001                                                          _mm256_setzero_pd (),
6002                                                          (__mmask8)
6003                                                          __U);
6004 }
6005
6006 extern __inline __m128d
6007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6008 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6009 {
6010   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6011                                                     (__v2df) __W,
6012                                                     (__mmask8) __U);
6013 }
6014
6015 extern __inline __m128d
6016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6017 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6018 {
6019   return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6020                                                      (__v2df)
6021                                                      _mm_setzero_pd (),
6022                                                      (__mmask8) __U);
6023 }
6024
6025 extern __inline __m128d
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6028 {
6029   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6030                                                         (__v2df) __W,
6031                                                         (__mmask8)
6032                                                         __U);
6033 }
6034
6035 extern __inline __m128d
6036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6037 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6038 {
6039   return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6040                                                          (__v2df)
6041                                                          _mm_setzero_pd (),
6042                                                          (__mmask8)
6043                                                          __U);
6044 }
6045
6046 extern __inline __m256
6047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6048 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6049 {
6050   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6051                                                    (__v8sf) __W,
6052                                                    (__mmask8) __U);
6053 }
6054
6055 extern __inline __m256
6056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6058 {
6059   return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6060                                                     (__v8sf)
6061                                                     _mm256_setzero_ps (),
6062                                                     (__mmask8) __U);
6063 }
6064
6065 extern __inline __m256
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6068 {
6069   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6070                                                        (__v8sf) __W,
6071                                                        (__mmask8) __U);
6072 }
6073
6074 extern __inline __m256
6075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6076 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6077 {
6078   return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6079                                                         (__v8sf)
6080                                                         _mm256_setzero_ps (),
6081                                                         (__mmask8)
6082                                                         __U);
6083 }
6084
6085 extern __inline __m128
6086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6087 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6088 {
6089   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6090                                                    (__v4sf) __W,
6091                                                    (__mmask8) __U);
6092 }
6093
6094 extern __inline __m128
6095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6097 {
6098   return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6099                                                     (__v4sf)
6100                                                     _mm_setzero_ps (),
6101                                                     (__mmask8) __U);
6102 }
6103
6104 extern __inline __m128
6105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6106 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6107 {
6108   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6109                                                        (__v4sf) __W,
6110                                                        (__mmask8) __U);
6111 }
6112
6113 extern __inline __m128
6114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6116 {
6117   return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6118                                                         (__v4sf)
6119                                                         _mm_setzero_ps (),
6120                                                         (__mmask8)
6121                                                         __U);
6122 }
6123
6124 extern __inline __m256i
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6127 {
6128   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6129                                                     (__v4di) __W,
6130                                                     (__mmask8) __U);
6131 }
6132
6133 extern __inline __m256i
6134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6136 {
6137   return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6138                                                      (__v4di)
6139                                                      _mm256_setzero_si256 (),
6140                                                      (__mmask8) __U);
6141 }
6142
6143 extern __inline __m256i
6144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6145 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6146                                void const *__P)
6147 {
6148   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6149                                                         (__v4di) __W,
6150                                                         (__mmask8)
6151                                                         __U);
6152 }
6153
6154 extern __inline __m256i
6155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6156 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6157 {
6158   return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6159                                                          (__v4di)
6160                                                          _mm256_setzero_si256 (),
6161                                                          (__mmask8)
6162                                                          __U);
6163 }
6164
6165 extern __inline __m128i
6166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6167 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6168 {
6169   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6170                                                     (__v2di) __W,
6171                                                     (__mmask8) __U);
6172 }
6173
6174 extern __inline __m128i
6175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6177 {
6178   return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6179                                                      (__v2di)
6180                                                      _mm_setzero_si128 (),
6181                                                      (__mmask8) __U);
6182 }
6183
6184 extern __inline __m128i
6185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6186 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6187 {
6188   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6189                                                         (__v2di) __W,
6190                                                         (__mmask8)
6191                                                         __U);
6192 }
6193
6194 extern __inline __m128i
6195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6197 {
6198   return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6199                                                          (__v2di)
6200                                                          _mm_setzero_si128 (),
6201                                                          (__mmask8)
6202                                                          __U);
6203 }
6204
6205 extern __inline __m256i
6206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6207 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6208 {
6209   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6210                                                     (__v8si) __W,
6211                                                     (__mmask8) __U);
6212 }
6213
6214 extern __inline __m256i
6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6217 {
6218   return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6219                                                      (__v8si)
6220                                                      _mm256_setzero_si256 (),
6221                                                      (__mmask8) __U);
6222 }
6223
6224 extern __inline __m256i
6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6226 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6227                                void const *__P)
6228 {
6229   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6230                                                         (__v8si) __W,
6231                                                         (__mmask8)
6232                                                         __U);
6233 }
6234
6235 extern __inline __m256i
6236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6238 {
6239   return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6240                                                          (__v8si)
6241                                                          _mm256_setzero_si256 (),
6242                                                          (__mmask8)
6243                                                          __U);
6244 }
6245
6246 extern __inline __m128i
6247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6248 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6249 {
6250   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6251                                                     (__v4si) __W,
6252                                                     (__mmask8) __U);
6253 }
6254
6255 extern __inline __m128i
6256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6257 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6258 {
6259   return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6260                                                      (__v4si)
6261                                                      _mm_setzero_si128 (),
6262                                                      (__mmask8) __U);
6263 }
6264
6265 extern __inline __m128i
6266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6267 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6268 {
6269   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6270                                                         (__v4si) __W,
6271                                                         (__mmask8)
6272                                                         __U);
6273 }
6274
6275 extern __inline __m128i
6276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6278 {
6279   return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6280                                                          (__v4si)
6281                                                          _mm_setzero_si128 (),
6282                                                          (__mmask8)
6283                                                          __U);
6284 }
6285
6286 extern __inline __m256d
6287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6288 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6289 {
6290   return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6291                                                         /* idx */ ,
6292                                                         (__v4df) __A,
6293                                                         (__v4df) __B,
6294                                                         (__mmask8) -1);
6295 }
6296
6297 extern __inline __m256d
6298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6299 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6300                              __m256d __B)
6301 {
6302   return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6303                                                         /* idx */ ,
6304                                                         (__v4df) __A,
6305                                                         (__v4df) __B,
6306                                                         (__mmask8)
6307                                                         __U);
6308 }
6309
6310 extern __inline __m256d
6311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6312 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6313                               __m256d __B)
6314 {
6315   return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6316                                                         (__v4di) __I
6317                                                         /* idx */ ,
6318                                                         (__v4df) __B,
6319                                                         (__mmask8)
6320                                                         __U);
6321 }
6322
6323 extern __inline __m256d
6324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6326                               __m256d __B)
6327 {
6328   return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6329                                                          /* idx */ ,
6330                                                          (__v4df) __A,
6331                                                          (__v4df) __B,
6332                                                          (__mmask8)
6333                                                          __U);
6334 }
6335
6336 extern __inline __m256
6337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6338 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6339 {
6340   return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6341                                                        /* idx */ ,
6342                                                        (__v8sf) __A,
6343                                                        (__v8sf) __B,
6344                                                        (__mmask8) -1);
6345 }
6346
6347 extern __inline __m256
6348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6350                              __m256 __B)
6351 {
6352   return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6353                                                        /* idx */ ,
6354                                                        (__v8sf) __A,
6355                                                        (__v8sf) __B,
6356                                                        (__mmask8) __U);
6357 }
6358
6359 extern __inline __m256
6360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6361 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6362                               __m256 __B)
6363 {
6364   return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6365                                                        (__v8si) __I
6366                                                        /* idx */ ,
6367                                                        (__v8sf) __B,
6368                                                        (__mmask8) __U);
6369 }
6370
6371 extern __inline __m256
6372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6373 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6374                               __m256 __B)
6375 {
6376   return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6377                                                         /* idx */ ,
6378                                                         (__v8sf) __A,
6379                                                         (__v8sf) __B,
6380                                                         (__mmask8)
6381                                                         __U);
6382 }
6383
6384 extern __inline __m128i
6385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6387 {
6388   return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6389                                                        /* idx */ ,
6390                                                        (__v2di) __A,
6391                                                        (__v2di) __B,
6392                                                        (__mmask8) -1);
6393 }
6394
6395 extern __inline __m128i
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6398                              __m128i __B)
6399 {
6400   return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6401                                                        /* idx */ ,
6402                                                        (__v2di) __A,
6403                                                        (__v2di) __B,
6404                                                        (__mmask8) __U);
6405 }
6406
6407 extern __inline __m128i
6408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6409 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6410                               __m128i __B)
6411 {
6412   return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6413                                                        (__v2di) __I
6414                                                        /* idx */ ,
6415                                                        (__v2di) __B,
6416                                                        (__mmask8) __U);
6417 }
6418
6419 extern __inline __m128i
6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6422                               __m128i __B)
6423 {
6424   return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6425                                                         /* idx */ ,
6426                                                         (__v2di) __A,
6427                                                         (__v2di) __B,
6428                                                         (__mmask8)
6429                                                         __U);
6430 }
6431
6432 extern __inline __m128i
6433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6435 {
6436   return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6437                                                        /* idx */ ,
6438                                                        (__v4si) __A,
6439                                                        (__v4si) __B,
6440                                                        (__mmask8) -1);
6441 }
6442
6443 extern __inline __m128i
6444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6446                              __m128i __B)
6447 {
6448   return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6449                                                        /* idx */ ,
6450                                                        (__v4si) __A,
6451                                                        (__v4si) __B,
6452                                                        (__mmask8) __U);
6453 }
6454
6455 extern __inline __m128i
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6458                               __m128i __B)
6459 {
6460   return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6461                                                        (__v4si) __I
6462                                                        /* idx */ ,
6463                                                        (__v4si) __B,
6464                                                        (__mmask8) __U);
6465 }
6466
6467 extern __inline __m128i
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6470                               __m128i __B)
6471 {
6472   return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6473                                                         /* idx */ ,
6474                                                         (__v4si) __A,
6475                                                         (__v4si) __B,
6476                                                         (__mmask8)
6477                                                         __U);
6478 }
6479
6480 extern __inline __m256i
6481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6483 {
6484   return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6485                                                        /* idx */ ,
6486                                                        (__v4di) __A,
6487                                                        (__v4di) __B,
6488                                                        (__mmask8) -1);
6489 }
6490
6491 extern __inline __m256i
6492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6493 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6494                                 __m256i __B)
6495 {
6496   return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6497                                                        /* idx */ ,
6498                                                        (__v4di) __A,
6499                                                        (__v4di) __B,
6500                                                        (__mmask8) __U);
6501 }
6502
6503 extern __inline __m256i
6504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6505 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6506                                  __mmask8 __U, __m256i __B)
6507 {
6508   return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6509                                                        (__v4di) __I
6510                                                        /* idx */ ,
6511                                                        (__v4di) __B,
6512                                                        (__mmask8) __U);
6513 }
6514
6515 extern __inline __m256i
6516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6517 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6518                                  __m256i __I, __m256i __B)
6519 {
6520   return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6521                                                         /* idx */ ,
6522                                                         (__v4di) __A,
6523                                                         (__v4di) __B,
6524                                                         (__mmask8)
6525                                                         __U);
6526 }
6527
6528 extern __inline __m256i
6529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6531 {
6532   return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6533                                                        /* idx */ ,
6534                                                        (__v8si) __A,
6535                                                        (__v8si) __B,
6536                                                        (__mmask8) -1);
6537 }
6538
6539 extern __inline __m256i
6540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6541 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6542                                 __m256i __B)
6543 {
6544   return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6545                                                        /* idx */ ,
6546                                                        (__v8si) __A,
6547                                                        (__v8si) __B,
6548                                                        (__mmask8) __U);
6549 }
6550
6551 extern __inline __m256i
6552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6553 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6554                                  __mmask8 __U, __m256i __B)
6555 {
6556   return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6557                                                        (__v8si) __I
6558                                                        /* idx */ ,
6559                                                        (__v8si) __B,
6560                                                        (__mmask8) __U);
6561 }
6562
6563 extern __inline __m256i
6564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6565 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6566                                  __m256i __I, __m256i __B)
6567 {
6568   return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6569                                                         /* idx */ ,
6570                                                         (__v8si) __A,
6571                                                         (__v8si) __B,
6572                                                         (__mmask8)
6573                                                         __U);
6574 }
6575
6576 extern __inline __m128d
6577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6579 {
6580   return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6581                                                         /* idx */ ,
6582                                                         (__v2df) __A,
6583                                                         (__v2df) __B,
6584                                                         (__mmask8) -1);
6585 }
6586
6587 extern __inline __m128d
6588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6589 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6590                           __m128d __B)
6591 {
6592   return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6593                                                         /* idx */ ,
6594                                                         (__v2df) __A,
6595                                                         (__v2df) __B,
6596                                                         (__mmask8)
6597                                                         __U);
6598 }
6599
6600 extern __inline __m128d
6601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6602 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6603                            __m128d __B)
6604 {
6605   return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6606                                                         (__v2di) __I
6607                                                         /* idx */ ,
6608                                                         (__v2df) __B,
6609                                                         (__mmask8)
6610                                                         __U);
6611 }
6612
6613 extern __inline __m128d
6614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6616                            __m128d __B)
6617 {
6618   return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6619                                                          /* idx */ ,
6620                                                          (__v2df) __A,
6621                                                          (__v2df) __B,
6622                                                          (__mmask8)
6623                                                          __U);
6624 }
6625
6626 extern __inline __m128
6627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6628 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6629 {
6630   return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6631                                                        /* idx */ ,
6632                                                        (__v4sf) __A,
6633                                                        (__v4sf) __B,
6634                                                        (__mmask8) -1);
6635 }
6636
6637 extern __inline __m128
6638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6640                           __m128 __B)
6641 {
6642   return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6643                                                        /* idx */ ,
6644                                                        (__v4sf) __A,
6645                                                        (__v4sf) __B,
6646                                                        (__mmask8) __U);
6647 }
6648
6649 extern __inline __m128
6650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6651 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6652                            __m128 __B)
6653 {
6654   return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6655                                                        (__v4si) __I
6656                                                        /* idx */ ,
6657                                                        (__v4sf) __B,
6658                                                        (__mmask8) __U);
6659 }
6660
6661 extern __inline __m128
6662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6663 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6664                            __m128 __B)
6665 {
6666   return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6667                                                         /* idx */ ,
6668                                                         (__v4sf) __A,
6669                                                         (__v4sf) __B,
6670                                                         (__mmask8)
6671                                                         __U);
6672 }
6673
6674 extern __inline __m128i
6675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6676 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6677 {
6678   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6679                                                   (__v2di) __Y,
6680                                                   (__v2di)
6681                                                   _mm_setzero_di (),
6682                                                   (__mmask8) -1);
6683 }
6684
6685 extern __inline __m128i
6686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6687 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6688                      __m128i __Y)
6689 {
6690   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6691                                                   (__v2di) __Y,
6692                                                   (__v2di) __W,
6693                                                   (__mmask8) __U);
6694 }
6695
6696 extern __inline __m128i
6697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6698 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6699 {
6700   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6701                                                   (__v2di) __Y,
6702                                                   (__v2di)
6703                                                   _mm_setzero_di (),
6704                                                   (__mmask8) __U);
6705 }
6706
6707 extern __inline __m256i
6708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6709 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6710                         __m256i __Y)
6711 {
6712   return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6713                                                  (__v8si) __Y,
6714                                                  (__v8si) __W,
6715                                                  (__mmask8) __U);
6716 }
6717
6718 extern __inline __m256i
6719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6720 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6721 {
6722   return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6723                                                  (__v8si) __Y,
6724                                                  (__v8si)
6725                                                  _mm256_setzero_si256 (),
6726                                                  (__mmask8) __U);
6727 }
6728
6729 extern __inline __m128i
6730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6731 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6732                      __m128i __Y)
6733 {
6734   return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6735                                                  (__v4si) __Y,
6736                                                  (__v4si) __W,
6737                                                  (__mmask8) __U);
6738 }
6739
6740 extern __inline __m128i
6741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6742 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6743 {
6744   return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6745                                                  (__v4si) __Y,
6746                                                  (__v4si)
6747                                                  _mm_setzero_si128 (),
6748                                                  (__mmask8) __U);
6749 }
6750
6751 extern __inline __m256i
6752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6754                         __m256i __Y)
6755 {
6756   return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6757                                                  (__v4di) __Y,
6758                                                  (__v4di) __W,
6759                                                  (__mmask8) __U);
6760 }
6761
6762 extern __inline __m256i
6763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6764 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6765 {
6766   return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6767                                                  (__v4di) __Y,
6768                                                  (__v4di)
6769                                                  _mm256_setzero_si256 (),
6770                                                  (__mmask8) __U);
6771 }
6772
6773 extern __inline __m128i
6774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6775 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6776                      __m128i __Y)
6777 {
6778   return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6779                                                  (__v2di) __Y,
6780                                                  (__v2di) __W,
6781                                                  (__mmask8) __U);
6782 }
6783
6784 extern __inline __m128i
6785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6786 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6787 {
6788   return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6789                                                  (__v2di) __Y,
6790                                                  (__v2di)
6791                                                  _mm_setzero_di (),
6792                                                  (__mmask8) __U);
6793 }
6794
6795 extern __inline __m256i
6796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6797 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6798                         __m256i __Y)
6799 {
6800   return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6801                                                  (__v8si) __Y,
6802                                                  (__v8si) __W,
6803                                                  (__mmask8) __U);
6804 }
6805
6806 extern __inline __m256i
6807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6808 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6809 {
6810   return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6811                                                  (__v8si) __Y,
6812                                                  (__v8si)
6813                                                  _mm256_setzero_si256 (),
6814                                                  (__mmask8) __U);
6815 }
6816
6817 extern __inline __m128i
6818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6819 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6820                      __m128i __Y)
6821 {
6822   return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6823                                                  (__v4si) __Y,
6824                                                  (__v4si) __W,
6825                                                  (__mmask8) __U);
6826 }
6827
6828 extern __inline __m128i
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6831 {
6832   return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6833                                                  (__v4si) __Y,
6834                                                  (__v4si)
6835                                                  _mm_setzero_si128 (),
6836                                                  (__mmask8) __U);
6837 }
6838
6839 extern __inline __m256i
6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6841 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6842                         __m256i __Y)
6843 {
6844   return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6845                                                  (__v8si) __Y,
6846                                                  (__v8si) __W,
6847                                                  (__mmask8) __U);
6848 }
6849
6850 extern __inline __m256i
6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6852 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6853 {
6854   return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6855                                                  (__v8si) __Y,
6856                                                  (__v8si)
6857                                                  _mm256_setzero_si256 (),
6858                                                  (__mmask8) __U);
6859 }
6860
6861 extern __inline __m128i
6862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6863 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6864                      __m128i __Y)
6865 {
6866   return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6867                                                  (__v4si) __Y,
6868                                                  (__v4si) __W,
6869                                                  (__mmask8) __U);
6870 }
6871
6872 extern __inline __m128i
6873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6874 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6875 {
6876   return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6877                                                  (__v4si) __Y,
6878                                                  (__v4si)
6879                                                  _mm_setzero_si128 (),
6880                                                  (__mmask8) __U);
6881 }
6882
6883 extern __inline __m256i
6884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6885 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6886                         __m256i __Y)
6887 {
6888   return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6889                                                  (__v4di) __Y,
6890                                                  (__v4di) __W,
6891                                                  (__mmask8) __U);
6892 }
6893
6894 extern __inline __m256i
6895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6896 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6897 {
6898   return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6899                                                  (__v4di) __Y,
6900                                                  (__v4di)
6901                                                  _mm256_setzero_si256 (),
6902                                                  (__mmask8) __U);
6903 }
6904
6905 extern __inline __m128i
6906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6907 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6908                      __m128i __Y)
6909 {
6910   return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6911                                                  (__v2di) __Y,
6912                                                  (__v2di) __W,
6913                                                  (__mmask8) __U);
6914 }
6915
6916 extern __inline __m128i
6917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6918 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6919 {
6920   return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6921                                                  (__v2di) __Y,
6922                                                  (__v2di)
6923                                                  _mm_setzero_di (),
6924                                                  (__mmask8) __U);
6925 }
6926
6927 extern __inline __m256i
6928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6929 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
6930 {
6931   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6932                                                   (__v8si) __B,
6933                                                   (__v8si)
6934                                                   _mm256_setzero_si256 (),
6935                                                   (__mmask8) -1);
6936 }
6937
6938 extern __inline __m256i
6939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6940 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6941                         __m256i __B)
6942 {
6943   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6944                                                   (__v8si) __B,
6945                                                   (__v8si) __W,
6946                                                   (__mmask8) __U);
6947 }
6948
6949 extern __inline __m256i
6950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6951 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6952 {
6953   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6954                                                   (__v8si) __B,
6955                                                   (__v8si)
6956                                                   _mm256_setzero_si256 (),
6957                                                   (__mmask8) __U);
6958 }
6959
6960 extern __inline __m128i
6961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6962 _mm_rolv_epi32 (__m128i __A, __m128i __B)
6963 {
6964   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6965                                                   (__v4si) __B,
6966                                                   (__v4si)
6967                                                   _mm_setzero_si128 (),
6968                                                   (__mmask8) -1);
6969 }
6970
6971 extern __inline __m128i
6972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6973 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6974                      __m128i __B)
6975 {
6976   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6977                                                   (__v4si) __B,
6978                                                   (__v4si) __W,
6979                                                   (__mmask8) __U);
6980 }
6981
6982 extern __inline __m128i
6983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6984 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6985 {
6986   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6987                                                   (__v4si) __B,
6988                                                   (__v4si)
6989                                                   _mm_setzero_si128 (),
6990                                                   (__mmask8) __U);
6991 }
6992
6993 extern __inline __m256i
6994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6995 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
6996 {
6997   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
6998                                                   (__v8si) __B,
6999                                                   (__v8si)
7000                                                   _mm256_setzero_si256 (),
7001                                                   (__mmask8) -1);
7002 }
7003
7004 extern __inline __m256i
7005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7006 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7007                         __m256i __B)
7008 {
7009   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7010                                                   (__v8si) __B,
7011                                                   (__v8si) __W,
7012                                                   (__mmask8) __U);
7013 }
7014
7015 extern __inline __m256i
7016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7017 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7018 {
7019   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7020                                                   (__v8si) __B,
7021                                                   (__v8si)
7022                                                   _mm256_setzero_si256 (),
7023                                                   (__mmask8) __U);
7024 }
7025
7026 extern __inline __m128i
7027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7028 _mm_rorv_epi32 (__m128i __A, __m128i __B)
7029 {
7030   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7031                                                   (__v4si) __B,
7032                                                   (__v4si)
7033                                                   _mm_setzero_si128 (),
7034                                                   (__mmask8) -1);
7035 }
7036
7037 extern __inline __m128i
7038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7039 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7040                      __m128i __B)
7041 {
7042   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7043                                                   (__v4si) __B,
7044                                                   (__v4si) __W,
7045                                                   (__mmask8) __U);
7046 }
7047
7048 extern __inline __m128i
7049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7050 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7051 {
7052   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7053                                                   (__v4si) __B,
7054                                                   (__v4si)
7055                                                   _mm_setzero_si128 (),
7056                                                   (__mmask8) __U);
7057 }
7058
7059 extern __inline __m256i
7060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7061 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
7062 {
7063   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7064                                                   (__v4di) __B,
7065                                                   (__v4di)
7066                                                   _mm256_setzero_si256 (),
7067                                                   (__mmask8) -1);
7068 }
7069
7070 extern __inline __m256i
7071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7072 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7073                         __m256i __B)
7074 {
7075   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7076                                                   (__v4di) __B,
7077                                                   (__v4di) __W,
7078                                                   (__mmask8) __U);
7079 }
7080
7081 extern __inline __m256i
7082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7083 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7084 {
7085   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7086                                                   (__v4di) __B,
7087                                                   (__v4di)
7088                                                   _mm256_setzero_si256 (),
7089                                                   (__mmask8) __U);
7090 }
7091
7092 extern __inline __m128i
7093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7094 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7095 {
7096   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7097                                                   (__v2di) __B,
7098                                                   (__v2di)
7099                                                   _mm_setzero_di (),
7100                                                   (__mmask8) -1);
7101 }
7102
7103 extern __inline __m128i
7104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7105 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7106                      __m128i __B)
7107 {
7108   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7109                                                   (__v2di) __B,
7110                                                   (__v2di) __W,
7111                                                   (__mmask8) __U);
7112 }
7113
7114 extern __inline __m128i
7115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7116 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7117 {
7118   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7119                                                   (__v2di) __B,
7120                                                   (__v2di)
7121                                                   _mm_setzero_di (),
7122                                                   (__mmask8) __U);
7123 }
7124
7125 extern __inline __m256i
7126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7127 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7128 {
7129   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7130                                                   (__v4di) __B,
7131                                                   (__v4di)
7132                                                   _mm256_setzero_si256 (),
7133                                                   (__mmask8) -1);
7134 }
7135
7136 extern __inline __m256i
7137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7138 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7139                         __m256i __B)
7140 {
7141   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7142                                                   (__v4di) __B,
7143                                                   (__v4di) __W,
7144                                                   (__mmask8) __U);
7145 }
7146
7147 extern __inline __m256i
7148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7149 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7150 {
7151   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7152                                                   (__v4di) __B,
7153                                                   (__v4di)
7154                                                   _mm256_setzero_si256 (),
7155                                                   (__mmask8) __U);
7156 }
7157
7158 extern __inline __m128i
7159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7160 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7161 {
7162   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7163                                                   (__v2di) __B,
7164                                                   (__v2di)
7165                                                   _mm_setzero_di (),
7166                                                   (__mmask8) -1);
7167 }
7168
7169 extern __inline __m128i
7170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7171 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7172                      __m128i __B)
7173 {
7174   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7175                                                   (__v2di) __B,
7176                                                   (__v2di) __W,
7177                                                   (__mmask8) __U);
7178 }
7179
7180 extern __inline __m128i
7181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7182 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7183 {
7184   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7185                                                   (__v2di) __B,
7186                                                   (__v2di)
7187                                                   _mm_setzero_di (),
7188                                                   (__mmask8) __U);
7189 }
7190
7191 extern __inline __m256i
7192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7193 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7194 {
7195   return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7196                                                   (__v4di) __Y,
7197                                                   (__v4di)
7198                                                   _mm256_setzero_si256 (),
7199                                                   (__mmask8) -1);
7200 }
7201
7202 extern __inline __m256i
7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7205                         __m256i __Y)
7206 {
7207   return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7208                                                   (__v4di) __Y,
7209                                                   (__v4di) __W,
7210                                                   (__mmask8) __U);
7211 }
7212
7213 extern __inline __m256i
7214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7215 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7216 {
7217   return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7218                                                   (__v4di) __Y,
7219                                                   (__v4di)
7220                                                   _mm256_setzero_si256 (),
7221                                                   (__mmask8) __U);
7222 }
7223
7224 extern __inline __m256i
7225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7226 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7227                        __m256i __B)
7228 {
7229   return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7230                                                  (__v4di) __B,
7231                                                  (__v4di) __W, __U);
7232 }
7233
7234 extern __inline __m256i
7235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7236 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7237 {
7238   return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7239                                                  (__v4di) __B,
7240                                                  (__v4di)
7241                                                  _mm256_setzero_pd (),
7242                                                  __U);
7243 }
7244
7245 extern __inline __m128i
7246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7247 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7248                     __m128i __B)
7249 {
7250   return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7251                                                  (__v2di) __B,
7252                                                  (__v2di) __W, __U);
7253 }
7254
7255 extern __inline __m128i
7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7257 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7258 {
7259   return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7260                                                  (__v2di) __B,
7261                                                  (__v2di)
7262                                                  _mm_setzero_pd (),
7263                                                  __U);
7264 }
7265
7266 extern __inline __m256i
7267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7268 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7269                           __m256i __B)
7270 {
7271   return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7272                                                   (__v4di) __B,
7273                                                   (__v4di) __W, __U);
7274 }
7275
7276 extern __inline __m256i
7277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7278 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7279 {
7280   return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7281                                                   (__v4di) __B,
7282                                                   (__v4di)
7283                                                   _mm256_setzero_pd (),
7284                                                   __U);
7285 }
7286
7287 extern __inline __m128i
7288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7289 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7290                        __m128i __B)
7291 {
7292   return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7293                                                   (__v2di) __B,
7294                                                   (__v2di) __W, __U);
7295 }
7296
7297 extern __inline __m128i
7298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7299 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7300 {
7301   return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7302                                                   (__v2di) __B,
7303                                                   (__v2di)
7304                                                   _mm_setzero_pd (),
7305                                                   __U);
7306 }
7307
7308 extern __inline __m256i
7309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7310 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7311                       __m256i __B)
7312 {
7313   return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7314                                                 (__v4di) __B,
7315                                                 (__v4di) __W,
7316                                                 (__mmask8) __U);
7317 }
7318
7319 extern __inline __m256i
7320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7321 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7322 {
7323   return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7324                                                 (__v4di) __B,
7325                                                 (__v4di)
7326                                                 _mm256_setzero_si256 (),
7327                                                 (__mmask8) __U);
7328 }
7329
7330 extern __inline __m128i
7331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7332 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7333 {
7334   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7335                                                 (__v2di) __B,
7336                                                 (__v2di) __W,
7337                                                 (__mmask8) __U);
7338 }
7339
7340 extern __inline __m128i
7341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7342 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7343 {
7344   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7345                                                 (__v2di) __B,
7346                                                 (__v2di)
7347                                                 _mm_setzero_si128 (),
7348                                                 (__mmask8) __U);
7349 }
7350
7351 extern __inline __m256i
7352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7353 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7354                        __m256i __B)
7355 {
7356   return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7357                                                  (__v4di) __B,
7358                                                  (__v4di) __W,
7359                                                  (__mmask8) __U);
7360 }
7361
7362 extern __inline __m256i
7363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7364 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7365 {
7366   return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7367                                                  (__v4di) __B,
7368                                                  (__v4di)
7369                                                  _mm256_setzero_si256 (),
7370                                                  (__mmask8) __U);
7371 }
7372
7373 extern __inline __m128i
7374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7376                     __m128i __B)
7377 {
7378   return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7379                                                  (__v2di) __B,
7380                                                  (__v2di) __W,
7381                                                  (__mmask8) __U);
7382 }
7383
7384 extern __inline __m128i
7385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7386 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7387 {
7388   return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7389                                                  (__v2di) __B,
7390                                                  (__v2di)
7391                                                  _mm_setzero_si128 (),
7392                                                  (__mmask8) __U);
7393 }
7394
7395 extern __inline __m256d
7396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7397 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7398                     __m256d __B)
7399 {
7400   return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7401                                                  (__v4df) __B,
7402                                                  (__v4df) __W,
7403                                                  (__mmask8) __U);
7404 }
7405
7406 extern __inline __m256d
7407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7408 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7409 {
7410   return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7411                                                  (__v4df) __B,
7412                                                  (__v4df)
7413                                                  _mm256_setzero_pd (),
7414                                                  (__mmask8) __U);
7415 }
7416
7417 extern __inline __m256
7418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7419 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7420 {
7421   return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7422                                                 (__v8sf) __B,
7423                                                 (__v8sf) __W,
7424                                                 (__mmask8) __U);
7425 }
7426
7427 extern __inline __m256
7428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7429 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7430 {
7431   return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7432                                                 (__v8sf) __B,
7433                                                 (__v8sf)
7434                                                 _mm256_setzero_ps (),
7435                                                 (__mmask8) __U);
7436 }
7437
7438 extern __inline __m128
7439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7440 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7441 {
7442   return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7443                                              (__v4sf) __B,
7444                                              (__v4sf) __W,
7445                                              (__mmask8) __U);
7446 }
7447
7448 extern __inline __m128
7449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7451 {
7452   return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7453                                              (__v4sf) __B,
7454                                              (__v4sf)
7455                                              _mm_setzero_ps (),
7456                                              (__mmask8) __U);
7457 }
7458
7459 extern __inline __m128d
7460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7461 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7462 {
7463   return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7464                                               (__v2df) __B,
7465                                               (__v2df) __W,
7466                                               (__mmask8) __U);
7467 }
7468
7469 extern __inline __m128d
7470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7471 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7472 {
7473   return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7474                                               (__v2df) __B,
7475                                               (__v2df)
7476                                               _mm_setzero_pd (),
7477                                               (__mmask8) __U);
7478 }
7479
7480 extern __inline __m256d
7481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7483                     __m256d __B)
7484 {
7485   return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7486                                                  (__v4df) __B,
7487                                                  (__v4df) __W,
7488                                                  (__mmask8) __U);
7489 }
7490
7491 extern __inline __m256d
7492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7493 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7494                     __m256d __B)
7495 {
7496   return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7497                                                  (__v4df) __B,
7498                                                  (__v4df) __W,
7499                                                  (__mmask8) __U);
7500 }
7501
7502 extern __inline __m256d
7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7504 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7505 {
7506   return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7507                                                  (__v4df) __B,
7508                                                  (__v4df)
7509                                                  _mm256_setzero_pd (),
7510                                                  (__mmask8) __U);
7511 }
7512
7513 extern __inline __m256
7514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7515 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7516 {
7517   return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7518                                                 (__v8sf) __B,
7519                                                 (__v8sf) __W,
7520                                                 (__mmask8) __U);
7521 }
7522
7523 extern __inline __m256d
7524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7525 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7526 {
7527   return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7528                                                  (__v4df) __B,
7529                                                  (__v4df)
7530                                                  _mm256_setzero_pd (),
7531                                                  (__mmask8) __U);
7532 }
7533
7534 extern __inline __m256
7535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7536 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7537 {
7538   return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7539                                                 (__v8sf) __B,
7540                                                 (__v8sf) __W,
7541                                                 (__mmask8) __U);
7542 }
7543
7544 extern __inline __m256
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7547 {
7548   return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7549                                                 (__v8sf) __B,
7550                                                 (__v8sf)
7551                                                 _mm256_setzero_ps (),
7552                                                 (__mmask8) __U);
7553 }
7554
7555 extern __inline __m256
7556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7557 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7558 {
7559   return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7560                                                 (__v8sf) __B,
7561                                                 (__v8sf)
7562                                                 _mm256_setzero_ps (),
7563                                                 (__mmask8) __U);
7564 }
7565
7566 extern __inline __m128
7567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7568 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7569 {
7570   return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7571                                              (__v4sf) __B,
7572                                              (__v4sf) __W,
7573                                              (__mmask8) __U);
7574 }
7575
7576 extern __inline __m128
7577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7578 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7579 {
7580   return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7581                                              (__v4sf) __B,
7582                                              (__v4sf) __W,
7583                                              (__mmask8) __U);
7584 }
7585
7586 extern __inline __m128
7587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7588 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7589 {
7590   return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7591                                              (__v4sf) __B,
7592                                              (__v4sf)
7593                                              _mm_setzero_ps (),
7594                                              (__mmask8) __U);
7595 }
7596
7597 extern __inline __m128
7598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7600 {
7601   return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7602                                              (__v4sf) __B,
7603                                              (__v4sf)
7604                                              _mm_setzero_ps (),
7605                                              (__mmask8) __U);
7606 }
7607
7608 extern __inline __m128
7609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7610 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7611 {
7612   return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7613                                              (__v4sf) __B,
7614                                              (__v4sf) __W,
7615                                              (__mmask8) __U);
7616 }
7617
7618 extern __inline __m128
7619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7620 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7621 {
7622   return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7623                                              (__v4sf) __B,
7624                                              (__v4sf)
7625                                              _mm_setzero_ps (),
7626                                              (__mmask8) __U);
7627 }
7628
7629 extern __inline __m128d
7630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7631 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7632 {
7633   return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7634                                               (__v2df) __B,
7635                                               (__v2df) __W,
7636                                               (__mmask8) __U);
7637 }
7638
7639 extern __inline __m128d
7640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7641 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7642 {
7643   return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7644                                               (__v2df) __B,
7645                                               (__v2df)
7646                                               _mm_setzero_pd (),
7647                                               (__mmask8) __U);
7648 }
7649
7650 extern __inline __m128d
7651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7652 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7653 {
7654   return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7655                                               (__v2df) __B,
7656                                               (__v2df) __W,
7657                                               (__mmask8) __U);
7658 }
7659
7660 extern __inline __m128d
7661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7662 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7663 {
7664   return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7665                                               (__v2df) __B,
7666                                               (__v2df)
7667                                               _mm_setzero_pd (),
7668                                               (__mmask8) __U);
7669 }
7670
7671 extern __inline __m128d
7672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7673 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7674 {
7675   return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7676                                               (__v2df) __B,
7677                                               (__v2df) __W,
7678                                               (__mmask8) __U);
7679 }
7680
7681 extern __inline __m128d
7682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7683 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7684 {
7685   return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7686                                               (__v2df) __B,
7687                                               (__v2df)
7688                                               _mm_setzero_pd (),
7689                                               (__mmask8) __U);
7690 }
7691
7692 extern __inline __m256
7693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7694 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7695 {
7696   return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7697                                                 (__v8sf) __B,
7698                                                 (__v8sf) __W,
7699                                                 (__mmask8) __U);
7700 }
7701
7702 extern __inline __m256
7703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7704 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7705 {
7706   return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7707                                                 (__v8sf) __B,
7708                                                 (__v8sf)
7709                                                 _mm256_setzero_ps (),
7710                                                 (__mmask8) __U);
7711 }
7712
7713 extern __inline __m256d
7714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7715 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7716                     __m256d __B)
7717 {
7718   return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7719                                                  (__v4df) __B,
7720                                                  (__v4df) __W,
7721                                                  (__mmask8) __U);
7722 }
7723
7724 extern __inline __m256d
7725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7726 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7727 {
7728   return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7729                                                  (__v4df) __B,
7730                                                  (__v4df)
7731                                                  _mm256_setzero_pd (),
7732                                                  (__mmask8) __U);
7733 }
7734
7735 extern __inline __m256i
7736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7737 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7738 {
7739   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7740                                                   (__v4di) __B,
7741                                                   (__v4di)
7742                                                   _mm256_setzero_si256 (),
7743                                                   __M);
7744 }
7745
7746 extern __inline __m256i
7747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7748 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7749                        __m256i __B)
7750 {
7751   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7752                                                   (__v4di) __B,
7753                                                   (__v4di) __W, __M);
7754 }
7755
7756 extern __inline __m256i
7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7758 _mm256_min_epi64 (__m256i __A, __m256i __B)
7759 {
7760   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7761                                                   (__v4di) __B,
7762                                                   (__v4di)
7763                                                   _mm256_setzero_si256 (),
7764                                                   (__mmask8) -1);
7765 }
7766
7767 extern __inline __m256i
7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7770                        __m256i __B)
7771 {
7772   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7773                                                   (__v4di) __B,
7774                                                   (__v4di) __W, __M);
7775 }
7776
7777 extern __inline __m256i
7778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7779 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7780 {
7781   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7782                                                   (__v4di) __B,
7783                                                   (__v4di)
7784                                                   _mm256_setzero_si256 (),
7785                                                   __M);
7786 }
7787
7788 extern __inline __m256i
7789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7790 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7791 {
7792   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7793                                                   (__v4di) __B,
7794                                                   (__v4di)
7795                                                   _mm256_setzero_si256 (),
7796                                                   __M);
7797 }
7798
7799 extern __inline __m256i
7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801 _mm256_max_epi64 (__m256i __A, __m256i __B)
7802 {
7803   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7804                                                   (__v4di) __B,
7805                                                   (__v4di)
7806                                                   _mm256_setzero_si256 (),
7807                                                   (__mmask8) -1);
7808 }
7809
7810 extern __inline __m256i
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm256_max_epu64 (__m256i __A, __m256i __B)
7813 {
7814   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7815                                                   (__v4di) __B,
7816                                                   (__v4di)
7817                                                   _mm256_setzero_si256 (),
7818                                                   (__mmask8) -1);
7819 }
7820
7821 extern __inline __m256i
7822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7823 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7824                        __m256i __B)
7825 {
7826   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7827                                                   (__v4di) __B,
7828                                                   (__v4di) __W, __M);
7829 }
7830
7831 extern __inline __m256i
7832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7833 _mm256_min_epu64 (__m256i __A, __m256i __B)
7834 {
7835   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7836                                                   (__v4di) __B,
7837                                                   (__v4di)
7838                                                   _mm256_setzero_si256 (),
7839                                                   (__mmask8) -1);
7840 }
7841
7842 extern __inline __m256i
7843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7844 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7845                        __m256i __B)
7846 {
7847   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7848                                                   (__v4di) __B,
7849                                                   (__v4di) __W, __M);
7850 }
7851
7852 extern __inline __m256i
7853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7854 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7855 {
7856   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7857                                                   (__v4di) __B,
7858                                                   (__v4di)
7859                                                   _mm256_setzero_si256 (),
7860                                                   __M);
7861 }
7862
7863 extern __inline __m256i
7864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7865 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7866 {
7867   return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7868                                                   (__v8si) __B,
7869                                                   (__v8si)
7870                                                   _mm256_setzero_si256 (),
7871                                                   __M);
7872 }
7873
7874 extern __inline __m256i
7875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7876 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7877                        __m256i __B)
7878 {
7879   return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7880                                                   (__v8si) __B,
7881                                                   (__v8si) __W, __M);
7882 }
7883
7884 extern __inline __m256i
7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7887 {
7888   return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7889                                                   (__v8si) __B,
7890                                                   (__v8si)
7891                                                   _mm256_setzero_si256 (),
7892                                                   __M);
7893 }
7894
7895 extern __inline __m256i
7896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7897 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7898                        __m256i __B)
7899 {
7900   return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7901                                                   (__v8si) __B,
7902                                                   (__v8si) __W, __M);
7903 }
7904
7905 extern __inline __m256i
7906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7907 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7908 {
7909   return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7910                                                   (__v8si) __B,
7911                                                   (__v8si)
7912                                                   _mm256_setzero_si256 (),
7913                                                   __M);
7914 }
7915
7916 extern __inline __m256i
7917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7918 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7919                        __m256i __B)
7920 {
7921   return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7922                                                   (__v8si) __B,
7923                                                   (__v8si) __W, __M);
7924 }
7925
7926 extern __inline __m256i
7927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7928 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7929 {
7930   return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7931                                                   (__v8si) __B,
7932                                                   (__v8si)
7933                                                   _mm256_setzero_si256 (),
7934                                                   __M);
7935 }
7936
7937 extern __inline __m256i
7938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7939 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7940                        __m256i __B)
7941 {
7942   return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7943                                                   (__v8si) __B,
7944                                                   (__v8si) __W, __M);
7945 }
7946
7947 extern __inline __m128i
7948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7949 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7950 {
7951   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7952                                                   (__v2di) __B,
7953                                                   (__v2di)
7954                                                   _mm_setzero_si128 (),
7955                                                   __M);
7956 }
7957
7958 extern __inline __m128i
7959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7960 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7961                     __m128i __B)
7962 {
7963   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7964                                                   (__v2di) __B,
7965                                                   (__v2di) __W, __M);
7966 }
7967
7968 extern __inline __m128i
7969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7970 _mm_min_epi64 (__m128i __A, __m128i __B)
7971 {
7972   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7973                                                   (__v2di) __B,
7974                                                   (__v2di)
7975                                                   _mm_setzero_di (),
7976                                                   (__mmask8) -1);
7977 }
7978
7979 extern __inline __m128i
7980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7981 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7982                     __m128i __B)
7983 {
7984   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7985                                                   (__v2di) __B,
7986                                                   (__v2di) __W, __M);
7987 }
7988
7989 extern __inline __m128i
7990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7991 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7992 {
7993   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7994                                                   (__v2di) __B,
7995                                                   (__v2di)
7996                                                   _mm_setzero_si128 (),
7997                                                   __M);
7998 }
7999
8000 extern __inline __m128i
8001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8002 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8003 {
8004   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8005                                                   (__v2di) __B,
8006                                                   (__v2di)
8007                                                   _mm_setzero_si128 (),
8008                                                   __M);
8009 }
8010
8011 extern __inline __m128i
8012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8013 _mm_max_epi64 (__m128i __A, __m128i __B)
8014 {
8015   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8016                                                   (__v2di) __B,
8017                                                   (__v2di)
8018                                                   _mm_setzero_di (),
8019                                                   (__mmask8) -1);
8020 }
8021
8022 extern __inline __m128i
8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024 _mm_max_epu64 (__m128i __A, __m128i __B)
8025 {
8026   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8027                                                   (__v2di) __B,
8028                                                   (__v2di)
8029                                                   _mm_setzero_di (),
8030                                                   (__mmask8) -1);
8031 }
8032
8033 extern __inline __m128i
8034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8035 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8036                     __m128i __B)
8037 {
8038   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8039                                                   (__v2di) __B,
8040                                                   (__v2di) __W, __M);
8041 }
8042
8043 extern __inline __m128i
8044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8045 _mm_min_epu64 (__m128i __A, __m128i __B)
8046 {
8047   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8048                                                   (__v2di) __B,
8049                                                   (__v2di)
8050                                                   _mm_setzero_di (),
8051                                                   (__mmask8) -1);
8052 }
8053
8054 extern __inline __m128i
8055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8056 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8057                     __m128i __B)
8058 {
8059   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8060                                                   (__v2di) __B,
8061                                                   (__v2di) __W, __M);
8062 }
8063
8064 extern __inline __m128i
8065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8066 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8067 {
8068   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8069                                                   (__v2di) __B,
8070                                                   (__v2di)
8071                                                   _mm_setzero_si128 (),
8072                                                   __M);
8073 }
8074
8075 extern __inline __m128i
8076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8077 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8078 {
8079   return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8080                                                   (__v4si) __B,
8081                                                   (__v4si)
8082                                                   _mm_setzero_si128 (),
8083                                                   __M);
8084 }
8085
8086 extern __inline __m128i
8087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8088 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8089                     __m128i __B)
8090 {
8091   return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8092                                                   (__v4si) __B,
8093                                                   (__v4si) __W, __M);
8094 }
8095
8096 extern __inline __m128i
8097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8098 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8099 {
8100   return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8101                                                   (__v4si) __B,
8102                                                   (__v4si)
8103                                                   _mm_setzero_si128 (),
8104                                                   __M);
8105 }
8106
8107 extern __inline __m128i
8108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8109 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8110                     __m128i __B)
8111 {
8112   return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8113                                                   (__v4si) __B,
8114                                                   (__v4si) __W, __M);
8115 }
8116
8117 extern __inline __m128i
8118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8119 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8120 {
8121   return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8122                                                   (__v4si) __B,
8123                                                   (__v4si)
8124                                                   _mm_setzero_si128 (),
8125                                                   __M);
8126 }
8127
8128 extern __inline __m128i
8129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8130 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8131                     __m128i __B)
8132 {
8133   return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8134                                                   (__v4si) __B,
8135                                                   (__v4si) __W, __M);
8136 }
8137
8138 extern __inline __m128i
8139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8140 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8141 {
8142   return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8143                                                   (__v4si) __B,
8144                                                   (__v4si)
8145                                                   _mm_setzero_si128 (),
8146                                                   __M);
8147 }
8148
8149 extern __inline __m128i
8150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8151 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8152                     __m128i __B)
8153 {
8154   return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8155                                                   (__v4si) __B,
8156                                                   (__v4si) __W, __M);
8157 }
8158
8159 #ifndef __AVX512CD__
8160 #pragma GCC push_options
8161 #pragma GCC target("avx512vl,avx512cd")
8162 #define __DISABLE_AVX512VLCD__
8163 #endif
8164
8165 extern __inline __m128i
8166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167 _mm_broadcastmb_epi64 (__mmask8 __A)
8168 {
8169   return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8170 }
8171
8172 extern __inline __m256i
8173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8174 _mm256_broadcastmb_epi64 (__mmask8 __A)
8175 {
8176   return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8177 }
8178
8179 extern __inline __m128i
8180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8181 _mm_broadcastmw_epi32 (__mmask16 __A)
8182 {
8183   return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8184 }
8185
8186 extern __inline __m256i
8187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8188 _mm256_broadcastmw_epi32 (__mmask16 __A)
8189 {
8190   return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8191 }
8192
8193 extern __inline __m256i
8194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8195 _mm256_lzcnt_epi32 (__m256i __A)
8196 {
8197   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8198                                                      (__v8si)
8199                                                      _mm256_setzero_si256 (),
8200                                                      (__mmask8) -1);
8201 }
8202
8203 extern __inline __m256i
8204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8205 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8206 {
8207   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8208                                                      (__v8si) __W,
8209                                                      (__mmask8) __U);
8210 }
8211
8212 extern __inline __m256i
8213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8214 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8215 {
8216   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8217                                                      (__v8si)
8218                                                      _mm256_setzero_si256 (),
8219                                                      (__mmask8) __U);
8220 }
8221
8222 extern __inline __m256i
8223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8224 _mm256_lzcnt_epi64 (__m256i __A)
8225 {
8226   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8227                                                      (__v4di)
8228                                                      _mm256_setzero_si256 (),
8229                                                      (__mmask8) -1);
8230 }
8231
8232 extern __inline __m256i
8233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8234 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8235 {
8236   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8237                                                      (__v4di) __W,
8238                                                      (__mmask8) __U);
8239 }
8240
8241 extern __inline __m256i
8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8244 {
8245   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8246                                                      (__v4di)
8247                                                      _mm256_setzero_si256 (),
8248                                                      (__mmask8) __U);
8249 }
8250
8251 extern __inline __m256i
8252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8253 _mm256_conflict_epi64 (__m256i __A)
8254 {
8255   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8256                                                          (__v4di)
8257                                                          _mm256_setzero_si256 (),
8258                                                          (__mmask8) -1);
8259 }
8260
8261 extern __inline __m256i
8262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8263 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8264 {
8265   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8266                                                          (__v4di) __W,
8267                                                          (__mmask8)
8268                                                          __U);
8269 }
8270
8271 extern __inline __m256i
8272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8273 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8274 {
8275   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8276                                                          (__v4di)
8277                                                          _mm256_setzero_si256 (),
8278                                                          (__mmask8)
8279                                                          __U);
8280 }
8281
8282 extern __inline __m256i
8283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8284 _mm256_conflict_epi32 (__m256i __A)
8285 {
8286   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8287                                                          (__v8si)
8288                                                          _mm256_setzero_si256 (),
8289                                                          (__mmask8) -1);
8290 }
8291
8292 extern __inline __m256i
8293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8294 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8295 {
8296   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8297                                                          (__v8si) __W,
8298                                                          (__mmask8)
8299                                                          __U);
8300 }
8301
8302 extern __inline __m256i
8303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8305 {
8306   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8307                                                          (__v8si)
8308                                                          _mm256_setzero_si256 (),
8309                                                          (__mmask8)
8310                                                          __U);
8311 }
8312
8313 extern __inline __m128i
8314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8315 _mm_lzcnt_epi32 (__m128i __A)
8316 {
8317   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8318                                                      (__v4si)
8319                                                      _mm_setzero_si128 (),
8320                                                      (__mmask8) -1);
8321 }
8322
8323 extern __inline __m128i
8324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8325 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8326 {
8327   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8328                                                      (__v4si) __W,
8329                                                      (__mmask8) __U);
8330 }
8331
8332 extern __inline __m128i
8333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8335 {
8336   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8337                                                      (__v4si)
8338                                                      _mm_setzero_si128 (),
8339                                                      (__mmask8) __U);
8340 }
8341
8342 extern __inline __m128i
8343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8344 _mm_lzcnt_epi64 (__m128i __A)
8345 {
8346   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8347                                                      (__v2di)
8348                                                      _mm_setzero_di (),
8349                                                      (__mmask8) -1);
8350 }
8351
8352 extern __inline __m128i
8353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8354 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8355 {
8356   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8357                                                      (__v2di) __W,
8358                                                      (__mmask8) __U);
8359 }
8360
8361 extern __inline __m128i
8362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8363 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8364 {
8365   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8366                                                      (__v2di)
8367                                                      _mm_setzero_di (),
8368                                                      (__mmask8) __U);
8369 }
8370
8371 extern __inline __m128i
8372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373 _mm_conflict_epi64 (__m128i __A)
8374 {
8375   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8376                                                          (__v2di)
8377                                                          _mm_setzero_di (),
8378                                                          (__mmask8) -1);
8379 }
8380
8381 extern __inline __m128i
8382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8383 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8384 {
8385   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8386                                                          (__v2di) __W,
8387                                                          (__mmask8)
8388                                                          __U);
8389 }
8390
8391 extern __inline __m128i
8392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8393 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8394 {
8395   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8396                                                          (__v2di)
8397                                                          _mm_setzero_di (),
8398                                                          (__mmask8)
8399                                                          __U);
8400 }
8401
8402 extern __inline __m128i
8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8404 _mm_conflict_epi32 (__m128i __A)
8405 {
8406   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8407                                                          (__v4si)
8408                                                          _mm_setzero_si128 (),
8409                                                          (__mmask8) -1);
8410 }
8411
8412 extern __inline __m128i
8413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8414 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8415 {
8416   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8417                                                          (__v4si) __W,
8418                                                          (__mmask8)
8419                                                          __U);
8420 }
8421
8422 extern __inline __m128i
8423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8425 {
8426   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8427                                                          (__v4si)
8428                                                          _mm_setzero_si128 (),
8429                                                          (__mmask8)
8430                                                          __U);
8431 }
8432
8433 #ifdef __DISABLE_AVX512VLCD__
8434 #pragma GCC pop_options
8435 #endif
8436
8437 extern __inline __m256d
8438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8440                          __m256d __B)
8441 {
8442   return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8443                                                     (__v4df) __B,
8444                                                     (__v4df) __W,
8445                                                     (__mmask8) __U);
8446 }
8447
8448 extern __inline __m256d
8449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8450 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8451 {
8452   return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8453                                                     (__v4df) __B,
8454                                                     (__v4df)
8455                                                     _mm256_setzero_pd (),
8456                                                     (__mmask8) __U);
8457 }
8458
8459 extern __inline __m128d
8460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8461 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8462                       __m128d __B)
8463 {
8464   return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8465                                                     (__v2df) __B,
8466                                                     (__v2df) __W,
8467                                                     (__mmask8) __U);
8468 }
8469
8470 extern __inline __m128d
8471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8472 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8473 {
8474   return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8475                                                     (__v2df) __B,
8476                                                     (__v2df)
8477                                                     _mm_setzero_pd (),
8478                                                     (__mmask8) __U);
8479 }
8480
8481 extern __inline __m256
8482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8483 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8484                          __m256 __B)
8485 {
8486   return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8487                                                    (__v8sf) __B,
8488                                                    (__v8sf) __W,
8489                                                    (__mmask8) __U);
8490 }
8491
8492 extern __inline __m256d
8493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8494 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8495                          __m256d __B)
8496 {
8497   return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8498                                                     (__v4df) __B,
8499                                                     (__v4df) __W,
8500                                                     (__mmask8) __U);
8501 }
8502
8503 extern __inline __m256d
8504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8505 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8506 {
8507   return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8508                                                     (__v4df) __B,
8509                                                     (__v4df)
8510                                                     _mm256_setzero_pd (),
8511                                                     (__mmask8) __U);
8512 }
8513
8514 extern __inline __m128d
8515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8516 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8517                       __m128d __B)
8518 {
8519   return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8520                                                     (__v2df) __B,
8521                                                     (__v2df) __W,
8522                                                     (__mmask8) __U);
8523 }
8524
8525 extern __inline __m128d
8526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8527 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8528 {
8529   return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8530                                                     (__v2df) __B,
8531                                                     (__v2df)
8532                                                     _mm_setzero_pd (),
8533                                                     (__mmask8) __U);
8534 }
8535
8536 extern __inline __m256
8537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8538 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8539                          __m256 __B)
8540 {
8541   return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8542                                                    (__v8sf) __B,
8543                                                    (__v8sf) __W,
8544                                                    (__mmask8) __U);
8545 }
8546
8547 extern __inline __m256
8548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8549 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8550 {
8551   return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8552                                                    (__v8sf) __B,
8553                                                    (__v8sf)
8554                                                    _mm256_setzero_ps (),
8555                                                    (__mmask8) __U);
8556 }
8557
8558 extern __inline __m128
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8561 {
8562   return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8563                                                    (__v4sf) __B,
8564                                                    (__v4sf) __W,
8565                                                    (__mmask8) __U);
8566 }
8567
8568 extern __inline __m128
8569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8570 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8571 {
8572   return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8573                                                    (__v4sf) __B,
8574                                                    (__v4sf)
8575                                                    _mm_setzero_ps (),
8576                                                    (__mmask8) __U);
8577 }
8578
8579 extern __inline __m128
8580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8581 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8582 {
8583   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8584                                                  (__v4sf) __W,
8585                                                  (__mmask8) __U);
8586 }
8587
8588 extern __inline __m128
8589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8590 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8591 {
8592   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8593                                                  (__v4sf)
8594                                                  _mm_setzero_ps (),
8595                                                  (__mmask8) __U);
8596 }
8597
8598 extern __inline __m256
8599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8600 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8601 {
8602   return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8603                                                    (__v8sf) __B,
8604                                                    (__v8sf)
8605                                                    _mm256_setzero_ps (),
8606                                                    (__mmask8) __U);
8607 }
8608
8609 extern __inline __m256
8610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8611 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8612 {
8613   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8614                                                     (__v8sf) __W,
8615                                                     (__mmask8) __U);
8616 }
8617
8618 extern __inline __m256
8619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8620 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8621 {
8622   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8623                                                     (__v8sf)
8624                                                     _mm256_setzero_ps (),
8625                                                     (__mmask8) __U);
8626 }
8627
8628 extern __inline __m128
8629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8630 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8631 {
8632   return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8633                                                    (__v4sf) __B,
8634                                                    (__v4sf) __W,
8635                                                    (__mmask8) __U);
8636 }
8637
8638 extern __inline __m128
8639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8640 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8641 {
8642   return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8643                                                    (__v4sf) __B,
8644                                                    (__v4sf)
8645                                                    _mm_setzero_ps (),
8646                                                    (__mmask8) __U);
8647 }
8648
8649 extern __inline __m256i
8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8652                        __m128i __B)
8653 {
8654   return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8655                                                  (__v4si) __B,
8656                                                  (__v8si) __W,
8657                                                  (__mmask8) __U);
8658 }
8659
8660 extern __inline __m256i
8661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8662 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8663 {
8664   return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8665                                                  (__v4si) __B,
8666                                                  (__v8si)
8667                                                  _mm256_setzero_si256 (),
8668                                                  (__mmask8) __U);
8669 }
8670
8671 extern __inline __m128i
8672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8674                     __m128i __B)
8675 {
8676   return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8677                                                  (__v4si) __B,
8678                                                  (__v4si) __W,
8679                                                  (__mmask8) __U);
8680 }
8681
8682 extern __inline __m128i
8683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8684 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8685 {
8686   return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8687                                                  (__v4si) __B,
8688                                                  (__v4si)
8689                                                  _mm_setzero_si128 (),
8690                                                  (__mmask8) __U);
8691 }
8692
8693 extern __inline __m256i
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8696 {
8697   return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8698                                                  (__v2di) __B,
8699                                                  (__v4di)
8700                                                  _mm256_setzero_si256 (),
8701                                                  (__mmask8) -1);
8702 }
8703
8704 extern __inline __m256i
8705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8706 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8707                        __m128i __B)
8708 {
8709   return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8710                                                  (__v2di) __B,
8711                                                  (__v4di) __W,
8712                                                  (__mmask8) __U);
8713 }
8714
8715 extern __inline __m256i
8716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8718 {
8719   return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8720                                                  (__v2di) __B,
8721                                                  (__v4di)
8722                                                  _mm256_setzero_si256 (),
8723                                                  (__mmask8) __U);
8724 }
8725
8726 extern __inline __m128i
8727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8728 _mm_sra_epi64 (__m128i __A, __m128i __B)
8729 {
8730   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8731                                                  (__v2di) __B,
8732                                                  (__v2di)
8733                                                  _mm_setzero_di (),
8734                                                  (__mmask8) -1);
8735 }
8736
8737 extern __inline __m128i
8738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8740                     __m128i __B)
8741 {
8742   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8743                                                  (__v2di) __B,
8744                                                  (__v2di) __W,
8745                                                  (__mmask8) __U);
8746 }
8747
8748 extern __inline __m128i
8749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8750 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8751 {
8752   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8753                                                  (__v2di) __B,
8754                                                  (__v2di)
8755                                                  _mm_setzero_di (),
8756                                                  (__mmask8) __U);
8757 }
8758
8759 extern __inline __m128i
8760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8761 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8762                     __m128i __B)
8763 {
8764   return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8765                                                  (__v4si) __B,
8766                                                  (__v4si) __W,
8767                                                  (__mmask8) __U);
8768 }
8769
8770 extern __inline __m128i
8771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8772 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8773 {
8774   return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8775                                                  (__v4si) __B,
8776                                                  (__v4si)
8777                                                  _mm_setzero_si128 (),
8778                                                  (__mmask8) __U);
8779 }
8780
8781 extern __inline __m128i
8782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8783 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8784                     __m128i __B)
8785 {
8786   return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8787                                                  (__v2di) __B,
8788                                                  (__v2di) __W,
8789                                                  (__mmask8) __U);
8790 }
8791
8792 extern __inline __m128i
8793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8794 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8795 {
8796   return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8797                                                  (__v2di) __B,
8798                                                  (__v2di)
8799                                                  _mm_setzero_di (),
8800                                                  (__mmask8) __U);
8801 }
8802
8803 extern __inline __m256i
8804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8805 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8806                        __m128i __B)
8807 {
8808   return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8809                                                  (__v4si) __B,
8810                                                  (__v8si) __W,
8811                                                  (__mmask8) __U);
8812 }
8813
8814 extern __inline __m256i
8815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8816 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8817 {
8818   return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8819                                                  (__v4si) __B,
8820                                                  (__v8si)
8821                                                  _mm256_setzero_si256 (),
8822                                                  (__mmask8) __U);
8823 }
8824
8825 extern __inline __m256i
8826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8827 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8828                        __m128i __B)
8829 {
8830   return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8831                                                  (__v2di) __B,
8832                                                  (__v4di) __W,
8833                                                  (__mmask8) __U);
8834 }
8835
8836 extern __inline __m256i
8837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8839 {
8840   return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8841                                                  (__v2di) __B,
8842                                                  (__v4di)
8843                                                  _mm256_setzero_si256 (),
8844                                                  (__mmask8) __U);
8845 }
8846
8847 extern __inline __m256
8848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8849 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8850                             __m256 __Y)
8851 {
8852   return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8853                                                     (__v8si) __X,
8854                                                     (__v8sf) __W,
8855                                                     (__mmask8) __U);
8856 }
8857
8858 extern __inline __m256
8859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8860 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8861 {
8862   return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8863                                                     (__v8si) __X,
8864                                                     (__v8sf)
8865                                                     _mm256_setzero_ps (),
8866                                                     (__mmask8) __U);
8867 }
8868
8869 extern __inline __m256d
8870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8871 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8872 {
8873   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8874                                                      (__v4di) __X,
8875                                                      (__v4df)
8876                                                      _mm256_setzero_pd (),
8877                                                      (__mmask8) -1);
8878 }
8879
8880 extern __inline __m256d
8881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8882 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8883                             __m256d __Y)
8884 {
8885   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8886                                                      (__v4di) __X,
8887                                                      (__v4df) __W,
8888                                                      (__mmask8) __U);
8889 }
8890
8891 extern __inline __m256d
8892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8893 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8894 {
8895   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8896                                                      (__v4di) __X,
8897                                                      (__v4df)
8898                                                      _mm256_setzero_pd (),
8899                                                      (__mmask8) __U);
8900 }
8901
8902 extern __inline __m256d
8903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8904 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8905                            __m256i __C)
8906 {
8907   return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8908                                                         (__v4di) __C,
8909                                                         (__v4df) __W,
8910                                                         (__mmask8)
8911                                                         __U);
8912 }
8913
8914 extern __inline __m256d
8915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8916 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8917 {
8918   return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8919                                                         (__v4di) __C,
8920                                                         (__v4df)
8921                                                         _mm256_setzero_pd (),
8922                                                         (__mmask8)
8923                                                         __U);
8924 }
8925
8926 extern __inline __m256
8927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8929                            __m256i __C)
8930 {
8931   return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8932                                                        (__v8si) __C,
8933                                                        (__v8sf) __W,
8934                                                        (__mmask8) __U);
8935 }
8936
8937 extern __inline __m256
8938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8939 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8940 {
8941   return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8942                                                        (__v8si) __C,
8943                                                        (__v8sf)
8944                                                        _mm256_setzero_ps (),
8945                                                        (__mmask8) __U);
8946 }
8947
8948 extern __inline __m128d
8949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8950 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8951                         __m128i __C)
8952 {
8953   return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8954                                                      (__v2di) __C,
8955                                                      (__v2df) __W,
8956                                                      (__mmask8) __U);
8957 }
8958
8959 extern __inline __m128d
8960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8961 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8962 {
8963   return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8964                                                      (__v2di) __C,
8965                                                      (__v2df)
8966                                                      _mm_setzero_pd (),
8967                                                      (__mmask8) __U);
8968 }
8969
8970 extern __inline __m128
8971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8972 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8973                         __m128i __C)
8974 {
8975   return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8976                                                     (__v4si) __C,
8977                                                     (__v4sf) __W,
8978                                                     (__mmask8) __U);
8979 }
8980
8981 extern __inline __m128
8982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8983 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8984 {
8985   return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8986                                                     (__v4si) __C,
8987                                                     (__v4sf)
8988                                                     _mm_setzero_ps (),
8989                                                     (__mmask8) __U);
8990 }
8991
8992 extern __inline __m256i
8993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8994 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
8995 {
8996   return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
8997                                                   (__v8si) __B,
8998                                                   (__v8si)
8999                                                   _mm256_setzero_si256 (),
9000                                                   __M);
9001 }
9002
9003 extern __inline __m256i
9004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9005 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9006 {
9007   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9008                                                      (__v4di) __X,
9009                                                      (__v4di)
9010                                                      _mm256_setzero_si256 (),
9011                                                      __M);
9012 }
9013
9014 extern __inline __m256i
9015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9016 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9017                          __m256i __B)
9018 {
9019   return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9020                                                   (__v8si) __B,
9021                                                   (__v8si) __W, __M);
9022 }
9023
9024 extern __inline __m128i
9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9027 {
9028   return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9029                                                   (__v4si) __B,
9030                                                   (__v4si)
9031                                                   _mm_setzero_si128 (),
9032                                                   __M);
9033 }
9034
9035 extern __inline __m128i
9036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9037 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9038                       __m128i __B)
9039 {
9040   return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9041                                                   (__v4si) __B,
9042                                                   (__v4si) __W, __M);
9043 }
9044
9045 extern __inline __m256i
9046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9047 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9048                        __m256i __Y)
9049 {
9050   return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9051                                                   (__v8si) __Y,
9052                                                   (__v4di) __W, __M);
9053 }
9054
9055 extern __inline __m256i
9056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9057 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9058 {
9059   return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9060                                                   (__v8si) __Y,
9061                                                   (__v4di)
9062                                                   _mm256_setzero_si256 (),
9063                                                   __M);
9064 }
9065
9066 extern __inline __m128i
9067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9068 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9069                     __m128i __Y)
9070 {
9071   return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9072                                                   (__v4si) __Y,
9073                                                   (__v2di) __W, __M);
9074 }
9075
9076 extern __inline __m128i
9077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9078 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9079 {
9080   return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9081                                                   (__v4si) __Y,
9082                                                   (__v2di)
9083                                                   _mm_setzero_si128 (),
9084                                                   __M);
9085 }
9086
9087 extern __inline __m256i
9088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9089 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9090                                __m256i __Y)
9091 {
9092   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9093                                                      (__v4di) __X,
9094                                                      (__v4di) __W,
9095                                                      __M);
9096 }
9097
9098 extern __inline __m256i
9099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9100 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9101                        __m256i __Y)
9102 {
9103   return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9104                                                    (__v8si) __Y,
9105                                                    (__v4di) __W, __M);
9106 }
9107
9108 extern __inline __m256i
9109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9110 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9111 {
9112   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9113                                                      (__v8si) __X,
9114                                                      (__v8si)
9115                                                      _mm256_setzero_si256 (),
9116                                                      __M);
9117 }
9118
9119 extern __inline __m256i
9120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9122 {
9123   return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9124                                                    (__v8si) __Y,
9125                                                    (__v4di)
9126                                                    _mm256_setzero_si256 (),
9127                                                    __M);
9128 }
9129
9130 extern __inline __m128i
9131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9132 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9133                     __m128i __Y)
9134 {
9135   return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9136                                                    (__v4si) __Y,
9137                                                    (__v2di) __W, __M);
9138 }
9139
9140 extern __inline __m128i
9141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9142 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9143 {
9144   return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9145                                                    (__v4si) __Y,
9146                                                    (__v2di)
9147                                                    _mm_setzero_si128 (),
9148                                                    __M);
9149 }
9150
9151 extern __inline __m256i
9152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9153 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9154                                __m256i __Y)
9155 {
9156   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9157                                                      (__v8si) __X,
9158                                                      (__v8si) __W,
9159                                                      __M);
9160 }
9161
9162 #ifdef __OPTIMIZE__
9163 extern __inline __m256i
9164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9165 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9166                             __m256i __X, const int __I)
9167 {
9168   return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9169                                                   __I,
9170                                                   (__v4di) __W,
9171                                                   (__mmask8) __M);
9172 }
9173
9174 extern __inline __m256i
9175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9176 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9177 {
9178   return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9179                                                   __I,
9180                                                   (__v4di)
9181                                                   _mm256_setzero_si256 (),
9182                                                   (__mmask8) __M);
9183 }
9184
9185 extern __inline __m256d
9186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9187 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9188                         __m256d __B, const int __imm)
9189 {
9190   return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9191                                                   (__v4df) __B, __imm,
9192                                                   (__v4df) __W,
9193                                                   (__mmask8) __U);
9194 }
9195
9196 extern __inline __m256d
9197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9198 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9199                          const int __imm)
9200 {
9201   return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9202                                                   (__v4df) __B, __imm,
9203                                                   (__v4df)
9204                                                   _mm256_setzero_pd (),
9205                                                   (__mmask8) __U);
9206 }
9207
9208 extern __inline __m128d
9209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9210 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9211                      __m128d __B, const int __imm)
9212 {
9213   return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9214                                                   (__v2df) __B, __imm,
9215                                                   (__v2df) __W,
9216                                                   (__mmask8) __U);
9217 }
9218
9219 extern __inline __m128d
9220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9221 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9222                       const int __imm)
9223 {
9224   return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9225                                                   (__v2df) __B, __imm,
9226                                                   (__v2df)
9227                                                   _mm_setzero_pd (),
9228                                                   (__mmask8) __U);
9229 }
9230
9231 extern __inline __m256
9232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9233 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9234                         __m256 __B, const int __imm)
9235 {
9236   return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9237                                                  (__v8sf) __B, __imm,
9238                                                  (__v8sf) __W,
9239                                                  (__mmask8) __U);
9240 }
9241
9242 extern __inline __m256
9243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9244 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9245                          const int __imm)
9246 {
9247   return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9248                                                  (__v8sf) __B, __imm,
9249                                                  (__v8sf)
9250                                                  _mm256_setzero_ps (),
9251                                                  (__mmask8) __U);
9252 }
9253
9254 extern __inline __m128
9255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9257                      const int __imm)
9258 {
9259   return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9260                                                  (__v4sf) __B, __imm,
9261                                                  (__v4sf) __W,
9262                                                  (__mmask8) __U);
9263 }
9264
9265 extern __inline __m128
9266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9267 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9268                       const int __imm)
9269 {
9270   return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9271                                                  (__v4sf) __B, __imm,
9272                                                  (__v4sf)
9273                                                  _mm_setzero_ps (),
9274                                                  (__mmask8) __U);
9275 }
9276
9277 extern __inline __m256i
9278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9279 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9280 {
9281   return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9282                                                         (__v4si) __B,
9283                                                         __imm,
9284                                                         (__v8si)
9285                                                         _mm256_setzero_si256 (),
9286                                                         (__mmask8) -1);
9287 }
9288
9289 extern __inline __m256i
9290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9291 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9292                          __m128i __B, const int __imm)
9293 {
9294   return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9295                                                         (__v4si) __B,
9296                                                         __imm,
9297                                                         (__v8si) __W,
9298                                                         (__mmask8)
9299                                                         __U);
9300 }
9301
9302 extern __inline __m256i
9303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9304 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9305                           const int __imm)
9306 {
9307   return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9308                                                         (__v4si) __B,
9309                                                         __imm,
9310                                                         (__v8si)
9311                                                         _mm256_setzero_si256 (),
9312                                                         (__mmask8)
9313                                                         __U);
9314 }
9315
9316 extern __inline __m256
9317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9318 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9319 {
9320   return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9321                                                        (__v4sf) __B,
9322                                                        __imm,
9323                                                        (__v8sf)
9324                                                        _mm256_setzero_ps (),
9325                                                        (__mmask8) -1);
9326 }
9327
9328 extern __inline __m256
9329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9330 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9331                          __m128 __B, const int __imm)
9332 {
9333   return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9334                                                        (__v4sf) __B,
9335                                                        __imm,
9336                                                        (__v8sf) __W,
9337                                                        (__mmask8) __U);
9338 }
9339
9340 extern __inline __m256
9341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9342 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9343                           const int __imm)
9344 {
9345   return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9346                                                        (__v4sf) __B,
9347                                                        __imm,
9348                                                        (__v8sf)
9349                                                        _mm256_setzero_ps (),
9350                                                        (__mmask8) __U);
9351 }
9352
9353 extern __inline __m128i
9354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9355 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9356 {
9357   return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9358                                                          __imm,
9359                                                          (__v4si)
9360                                                          _mm_setzero_si128 (),
9361                                                          (__mmask8) -1);
9362 }
9363
9364 extern __inline __m128i
9365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9366 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9367                                 const int __imm)
9368 {
9369   return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9370                                                          __imm,
9371                                                          (__v4si) __W,
9372                                                          (__mmask8)
9373                                                          __U);
9374 }
9375
9376 extern __inline __m128i
9377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9378 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9379                                  const int __imm)
9380 {
9381   return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9382                                                          __imm,
9383                                                          (__v4si)
9384                                                          _mm_setzero_si128 (),
9385                                                          (__mmask8)
9386                                                          __U);
9387 }
9388
9389 extern __inline __m128
9390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9391 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
9392 {
9393   return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9394                                                         __imm,
9395                                                         (__v4sf)
9396                                                         _mm_setzero_ps (),
9397                                                         (__mmask8) -1);
9398 }
9399
9400 extern __inline __m128
9401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9402 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9403                              const int __imm)
9404 {
9405   return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9406                                                         __imm,
9407                                                         (__v4sf) __W,
9408                                                         (__mmask8)
9409                                                         __U);
9410 }
9411
9412 extern __inline __m128
9413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9414 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9415                               const int __imm)
9416 {
9417   return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9418                                                         __imm,
9419                                                         (__v4sf)
9420                                                         _mm_setzero_ps (),
9421                                                         (__mmask8)
9422                                                         __U);
9423 }
9424
9425 extern __inline __m256i
9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9428 {
9429   return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9430                                                        (__v4di) __B,
9431                                                        __imm,
9432                                                        (__v4di)
9433                                                        _mm256_setzero_si256 (),
9434                                                        (__mmask8) -1);
9435 }
9436
9437 extern __inline __m256i
9438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9439 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9440                            __m256i __B, const int __imm)
9441 {
9442   return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9443                                                        (__v4di) __B,
9444                                                        __imm,
9445                                                        (__v4di) __W,
9446                                                        (__mmask8) __U);
9447 }
9448
9449 extern __inline __m256i
9450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9451 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9452                             const int __imm)
9453 {
9454   return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9455                                                        (__v4di) __B,
9456                                                        __imm,
9457                                                        (__v4di)
9458                                                        _mm256_setzero_si256 (),
9459                                                        (__mmask8) __U);
9460 }
9461
9462 extern __inline __m256i
9463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9464 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9465 {
9466   return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9467                                                        (__v8si) __B,
9468                                                        __imm,
9469                                                        (__v8si)
9470                                                        _mm256_setzero_si256 (),
9471                                                        (__mmask8) -1);
9472 }
9473
9474 extern __inline __m256i
9475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9476 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9477                            __m256i __B, const int __imm)
9478 {
9479   return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9480                                                        (__v8si) __B,
9481                                                        __imm,
9482                                                        (__v8si) __W,
9483                                                        (__mmask8) __U);
9484 }
9485
9486 extern __inline __m256i
9487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9488 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9489                             const int __imm)
9490 {
9491   return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9492                                                        (__v8si) __B,
9493                                                        __imm,
9494                                                        (__v8si)
9495                                                        _mm256_setzero_si256 (),
9496                                                        (__mmask8) __U);
9497 }
9498
9499 extern __inline __m256d
9500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9501 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9502 {
9503   return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9504                                                        (__v4df) __B,
9505                                                        __imm,
9506                                                        (__v4df)
9507                                                        _mm256_setzero_pd (),
9508                                                        (__mmask8) -1);
9509 }
9510
9511 extern __inline __m256d
9512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9513 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9514                            __m256d __B, const int __imm)
9515 {
9516   return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9517                                                        (__v4df) __B,
9518                                                        __imm,
9519                                                        (__v4df) __W,
9520                                                        (__mmask8) __U);
9521 }
9522
9523 extern __inline __m256d
9524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9525 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9526                             const int __imm)
9527 {
9528   return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9529                                                        (__v4df) __B,
9530                                                        __imm,
9531                                                        (__v4df)
9532                                                        _mm256_setzero_pd (),
9533                                                        (__mmask8) __U);
9534 }
9535
9536 extern __inline __m256
9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9539 {
9540   return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9541                                                       (__v8sf) __B,
9542                                                       __imm,
9543                                                       (__v8sf)
9544                                                       _mm256_setzero_ps (),
9545                                                       (__mmask8) -1);
9546 }
9547
9548 extern __inline __m256
9549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9550 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9551                            __m256 __B, const int __imm)
9552 {
9553   return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9554                                                       (__v8sf) __B,
9555                                                       __imm,
9556                                                       (__v8sf) __W,
9557                                                       (__mmask8) __U);
9558 }
9559
9560 extern __inline __m256
9561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9562 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9563                             const int __imm)
9564 {
9565   return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9566                                                       (__v8sf) __B,
9567                                                       __imm,
9568                                                       (__v8sf)
9569                                                       _mm256_setzero_ps (),
9570                                                       (__mmask8) __U);
9571 }
9572
9573 extern __inline __m256d
9574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9575 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9576                     const int __imm)
9577 {
9578   return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9579                                                       (__v4df) __B,
9580                                                       (__v4di) __C,
9581                                                       __imm,
9582                                                       (__mmask8) -1);
9583 }
9584
9585 extern __inline __m256d
9586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9587 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9588                          __m256i __C, const int __imm)
9589 {
9590   return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9591                                                       (__v4df) __B,
9592                                                       (__v4di) __C,
9593                                                       __imm,
9594                                                       (__mmask8) __U);
9595 }
9596
9597 extern __inline __m256d
9598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9599 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9600                           __m256i __C, const int __imm)
9601 {
9602   return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9603                                                        (__v4df) __B,
9604                                                        (__v4di) __C,
9605                                                        __imm,
9606                                                        (__mmask8) __U);
9607 }
9608
9609 extern __inline __m256
9610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9611 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9612                     const int __imm)
9613 {
9614   return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9615                                                      (__v8sf) __B,
9616                                                      (__v8si) __C,
9617                                                      __imm,
9618                                                      (__mmask8) -1);
9619 }
9620
9621 extern __inline __m256
9622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9623 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9624                          __m256i __C, const int __imm)
9625 {
9626   return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9627                                                      (__v8sf) __B,
9628                                                      (__v8si) __C,
9629                                                      __imm,
9630                                                      (__mmask8) __U);
9631 }
9632
9633 extern __inline __m256
9634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9635 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9636                           __m256i __C, const int __imm)
9637 {
9638   return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9639                                                       (__v8sf) __B,
9640                                                       (__v8si) __C,
9641                                                       __imm,
9642                                                       (__mmask8) __U);
9643 }
9644
9645 extern __inline __m128d
9646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9647 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9648                  const int __imm)
9649 {
9650   return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9651                                                       (__v2df) __B,
9652                                                       (__v2di) __C,
9653                                                       __imm,
9654                                                       (__mmask8) -1);
9655 }
9656
9657 extern __inline __m128d
9658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9659 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9660                       __m128i __C, const int __imm)
9661 {
9662   return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9663                                                       (__v2df) __B,
9664                                                       (__v2di) __C,
9665                                                       __imm,
9666                                                       (__mmask8) __U);
9667 }
9668
9669 extern __inline __m128d
9670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9671 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9672                        __m128i __C, const int __imm)
9673 {
9674   return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9675                                                        (__v2df) __B,
9676                                                        (__v2di) __C,
9677                                                        __imm,
9678                                                        (__mmask8) __U);
9679 }
9680
9681 extern __inline __m128
9682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9683 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9684 {
9685   return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9686                                                      (__v4sf) __B,
9687                                                      (__v4si) __C,
9688                                                      __imm,
9689                                                      (__mmask8) -1);
9690 }
9691
9692 extern __inline __m128
9693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9695                       __m128i __C, const int __imm)
9696 {
9697   return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9698                                                      (__v4sf) __B,
9699                                                      (__v4si) __C,
9700                                                      __imm,
9701                                                      (__mmask8) __U);
9702 }
9703
9704 extern __inline __m128
9705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9706 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9707                        __m128i __C, const int __imm)
9708 {
9709   return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9710                                                       (__v4sf) __B,
9711                                                       (__v4si) __C,
9712                                                       __imm,
9713                                                       (__mmask8) __U);
9714 }
9715
9716 extern __inline __m256i
9717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9718 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9719                         const int __imm)
9720 {
9721   return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9722                                                   (__v8si) __W,
9723                                                   (__mmask8) __U);
9724 }
9725
9726 extern __inline __m256i
9727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9728 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9729 {
9730   return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9731                                                   (__v8si)
9732                                                   _mm256_setzero_si256 (),
9733                                                   (__mmask8) __U);
9734 }
9735
9736 extern __inline __m128i
9737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9738 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9739                      const int __imm)
9740 {
9741   return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9742                                                   (__v4si) __W,
9743                                                   (__mmask8) __U);
9744 }
9745
9746 extern __inline __m128i
9747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9748 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9749 {
9750   return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9751                                                   (__v4si)
9752                                                   _mm_setzero_si128 (),
9753                                                   (__mmask8) __U);
9754 }
9755
9756 extern __inline __m256i
9757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9758 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9759                         const int __imm)
9760 {
9761   return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9762                                                   (__v4di) __W,
9763                                                   (__mmask8) __U);
9764 }
9765
9766 extern __inline __m256i
9767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9768 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9769 {
9770   return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9771                                                   (__v4di)
9772                                                   _mm256_setzero_si256 (),
9773                                                   (__mmask8) __U);
9774 }
9775
9776 extern __inline __m128i
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9779                      const int __imm)
9780 {
9781   return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9782                                                   (__v2di) __W,
9783                                                   (__mmask8) __U);
9784 }
9785
9786 extern __inline __m128i
9787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9788 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9789 {
9790   return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9791                                                   (__v2di)
9792                                                   _mm_setzero_si128 (),
9793                                                   (__mmask8) __U);
9794 }
9795
9796 extern __inline __m256i
9797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9798 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9799                            const int imm)
9800 {
9801   return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9802                                                      (__v4di) __B,
9803                                                      (__v4di) __C, imm,
9804                                                      (__mmask8) -1);
9805 }
9806
9807 extern __inline __m256i
9808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9809 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9810                                 __m256i __B, __m256i __C,
9811                                 const int imm)
9812 {
9813   return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9814                                                      (__v4di) __B,
9815                                                      (__v4di) __C, imm,
9816                                                      (__mmask8) __U);
9817 }
9818
9819 extern __inline __m256i
9820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9821 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9822                                  __m256i __B, __m256i __C,
9823                                  const int imm)
9824 {
9825   return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9826                                                       (__v4di) __B,
9827                                                       (__v4di) __C,
9828                                                       imm,
9829                                                       (__mmask8) __U);
9830 }
9831
9832 extern __inline __m256i
9833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9834 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9835                            const int imm)
9836 {
9837   return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9838                                                      (__v8si) __B,
9839                                                      (__v8si) __C, imm,
9840                                                      (__mmask8) -1);
9841 }
9842
9843 extern __inline __m256i
9844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9846                                 __m256i __B, __m256i __C,
9847                                 const int imm)
9848 {
9849   return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9850                                                      (__v8si) __B,
9851                                                      (__v8si) __C, imm,
9852                                                      (__mmask8) __U);
9853 }
9854
9855 extern __inline __m256i
9856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9857 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9858                                  __m256i __B, __m256i __C,
9859                                  const int imm)
9860 {
9861   return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9862                                                       (__v8si) __B,
9863                                                       (__v8si) __C,
9864                                                       imm,
9865                                                       (__mmask8) __U);
9866 }
9867
9868 extern __inline __m128i
9869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9871                         const int imm)
9872 {
9873   return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9874                                                      (__v2di) __B,
9875                                                      (__v2di) __C, imm,
9876                                                      (__mmask8) -1);
9877 }
9878
9879 extern __inline __m128i
9880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9882                              __m128i __B, __m128i __C, const int imm)
9883 {
9884   return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9885                                                      (__v2di) __B,
9886                                                      (__v2di) __C, imm,
9887                                                      (__mmask8) __U);
9888 }
9889
9890 extern __inline __m128i
9891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9893                               __m128i __B, __m128i __C, const int imm)
9894 {
9895   return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9896                                                       (__v2di) __B,
9897                                                       (__v2di) __C,
9898                                                       imm,
9899                                                       (__mmask8) __U);
9900 }
9901
9902 extern __inline __m128i
9903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9904 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9905                         const int imm)
9906 {
9907   return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9908                                                      (__v4si) __B,
9909                                                      (__v4si) __C, imm,
9910                                                      (__mmask8) -1);
9911 }
9912
9913 extern __inline __m128i
9914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9915 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9916                              __m128i __B, __m128i __C, const int imm)
9917 {
9918   return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9919                                                      (__v4si) __B,
9920                                                      (__v4si) __C, imm,
9921                                                      (__mmask8) __U);
9922 }
9923
9924 extern __inline __m128i
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9927                               __m128i __B, __m128i __C, const int imm)
9928 {
9929   return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9930                                                       (__v4si) __B,
9931                                                       (__v4si) __C,
9932                                                       imm,
9933                                                       (__mmask8) __U);
9934 }
9935
9936 extern __inline __m256
9937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9938 _mm256_roundscale_ps (__m256 __A, const int __imm)
9939 {
9940   return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9941                                                       __imm,
9942                                                       (__v8sf)
9943                                                       _mm256_setzero_ps (),
9944                                                       (__mmask8) -1);
9945 }
9946
9947 extern __inline __m256
9948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9950                            const int __imm)
9951 {
9952   return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9953                                                       __imm,
9954                                                       (__v8sf) __W,
9955                                                       (__mmask8) __U);
9956 }
9957
9958 extern __inline __m256
9959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9961 {
9962   return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9963                                                       __imm,
9964                                                       (__v8sf)
9965                                                       _mm256_setzero_ps (),
9966                                                       (__mmask8) __U);
9967 }
9968
9969 extern __inline __m256d
9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971 _mm256_roundscale_pd (__m256d __A, const int __imm)
9972 {
9973   return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9974                                                        __imm,
9975                                                        (__v4df)
9976                                                        _mm256_setzero_pd (),
9977                                                        (__mmask8) -1);
9978 }
9979
9980 extern __inline __m256d
9981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9982 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
9983                            const int __imm)
9984 {
9985   return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9986                                                        __imm,
9987                                                        (__v4df) __W,
9988                                                        (__mmask8) __U);
9989 }
9990
9991 extern __inline __m256d
9992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
9994 {
9995   return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9996                                                        __imm,
9997                                                        (__v4df)
9998                                                        _mm256_setzero_pd (),
9999                                                        (__mmask8) __U);
10000 }
10001
10002 extern __inline __m128
10003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10004 _mm_roundscale_ps (__m128 __A, const int __imm)
10005 {
10006   return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10007                                                       __imm,
10008                                                       (__v4sf)
10009                                                       _mm_setzero_ps (),
10010                                                       (__mmask8) -1);
10011 }
10012
10013 extern __inline __m128
10014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10016                         const int __imm)
10017 {
10018   return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10019                                                       __imm,
10020                                                       (__v4sf) __W,
10021                                                       (__mmask8) __U);
10022 }
10023
10024 extern __inline __m128
10025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10027 {
10028   return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10029                                                       __imm,
10030                                                       (__v4sf)
10031                                                       _mm_setzero_ps (),
10032                                                       (__mmask8) __U);
10033 }
10034
10035 extern __inline __m128d
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm_roundscale_pd (__m128d __A, const int __imm)
10038 {
10039   return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10040                                                        __imm,
10041                                                        (__v2df)
10042                                                        _mm_setzero_pd (),
10043                                                        (__mmask8) -1);
10044 }
10045
10046 extern __inline __m128d
10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10049                         const int __imm)
10050 {
10051   return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10052                                                        __imm,
10053                                                        (__v2df) __W,
10054                                                        (__mmask8) __U);
10055 }
10056
10057 extern __inline __m128d
10058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10059 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10060 {
10061   return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10062                                                        __imm,
10063                                                        (__v2df)
10064                                                        _mm_setzero_pd (),
10065                                                        (__mmask8) __U);
10066 }
10067
10068 extern __inline __m256
10069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10070 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10071                    _MM_MANTISSA_SIGN_ENUM __C)
10072 {
10073   return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10074                                                     (__C << 2) | __B,
10075                                                     (__v8sf)
10076                                                     _mm256_setzero_ps (),
10077                                                     (__mmask8) -1);
10078 }
10079
10080 extern __inline __m256
10081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10082 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10083                         _MM_MANTISSA_NORM_ENUM __B,
10084                         _MM_MANTISSA_SIGN_ENUM __C)
10085 {
10086   return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10087                                                     (__C << 2) | __B,
10088                                                     (__v8sf) __W,
10089                                                     (__mmask8) __U);
10090 }
10091
10092 extern __inline __m256
10093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10094 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10095                          _MM_MANTISSA_NORM_ENUM __B,
10096                          _MM_MANTISSA_SIGN_ENUM __C)
10097 {
10098   return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10099                                                     (__C << 2) | __B,
10100                                                     (__v8sf)
10101                                                     _mm256_setzero_ps (),
10102                                                     (__mmask8) __U);
10103 }
10104
10105 extern __inline __m128
10106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10107 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10108                 _MM_MANTISSA_SIGN_ENUM __C)
10109 {
10110   return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10111                                                     (__C << 2) | __B,
10112                                                     (__v4sf)
10113                                                     _mm_setzero_ps (),
10114                                                     (__mmask8) -1);
10115 }
10116
10117 extern __inline __m128
10118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10119 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10120                      _MM_MANTISSA_NORM_ENUM __B,
10121                      _MM_MANTISSA_SIGN_ENUM __C)
10122 {
10123   return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10124                                                     (__C << 2) | __B,
10125                                                     (__v4sf) __W,
10126                                                     (__mmask8) __U);
10127 }
10128
10129 extern __inline __m128
10130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10131 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10132                       _MM_MANTISSA_NORM_ENUM __B,
10133                       _MM_MANTISSA_SIGN_ENUM __C)
10134 {
10135   return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10136                                                     (__C << 2) | __B,
10137                                                     (__v4sf)
10138                                                     _mm_setzero_ps (),
10139                                                     (__mmask8) __U);
10140 }
10141
10142 extern __inline __m256d
10143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10144 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10145                    _MM_MANTISSA_SIGN_ENUM __C)
10146 {
10147   return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10148                                                      (__C << 2) | __B,
10149                                                      (__v4df)
10150                                                      _mm256_setzero_pd (),
10151                                                      (__mmask8) -1);
10152 }
10153
10154 extern __inline __m256d
10155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10156 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10157                         _MM_MANTISSA_NORM_ENUM __B,
10158                         _MM_MANTISSA_SIGN_ENUM __C)
10159 {
10160   return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10161                                                      (__C << 2) | __B,
10162                                                      (__v4df) __W,
10163                                                      (__mmask8) __U);
10164 }
10165
10166 extern __inline __m256d
10167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10168 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10169                          _MM_MANTISSA_NORM_ENUM __B,
10170                          _MM_MANTISSA_SIGN_ENUM __C)
10171 {
10172   return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10173                                                      (__C << 2) | __B,
10174                                                      (__v4df)
10175                                                      _mm256_setzero_pd (),
10176                                                      (__mmask8) __U);
10177 }
10178
10179 extern __inline __m128d
10180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10181 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10182                 _MM_MANTISSA_SIGN_ENUM __C)
10183 {
10184   return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10185                                                      (__C << 2) | __B,
10186                                                      (__v2df)
10187                                                      _mm_setzero_pd (),
10188                                                      (__mmask8) -1);
10189 }
10190
10191 extern __inline __m128d
10192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10193 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10194                      _MM_MANTISSA_NORM_ENUM __B,
10195                      _MM_MANTISSA_SIGN_ENUM __C)
10196 {
10197   return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10198                                                      (__C << 2) | __B,
10199                                                      (__v2df) __W,
10200                                                      (__mmask8) __U);
10201 }
10202
10203 extern __inline __m128d
10204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10206                       _MM_MANTISSA_NORM_ENUM __B,
10207                       _MM_MANTISSA_SIGN_ENUM __C)
10208 {
10209   return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10210                                                      (__C << 2) | __B,
10211                                                      (__v2df)
10212                                                      _mm_setzero_pd (),
10213                                                      (__mmask8) __U);
10214 }
10215
10216 extern __inline __m256
10217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10218 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10219                            __m256i __index, float const *__addr,
10220                            int __scale)
10221 {
10222   return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10223                                                 __addr,
10224                                                 (__v8si) __index,
10225                                                 __mask, __scale);
10226 }
10227
10228 extern __inline __m128
10229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10230 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10231                         __m128i __index, float const *__addr,
10232                         int __scale)
10233 {
10234   return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10235                                                 __addr,
10236                                                 (__v4si) __index,
10237                                                 __mask, __scale);
10238 }
10239
10240 extern __inline __m256d
10241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10242 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10243                            __m128i __index, double const *__addr,
10244                            int __scale)
10245 {
10246   return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10247                                                  __addr,
10248                                                  (__v4si) __index,
10249                                                  __mask, __scale);
10250 }
10251
10252 extern __inline __m128d
10253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10254 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10255                         __m128i __index, double const *__addr,
10256                         int __scale)
10257 {
10258   return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10259                                                  __addr,
10260                                                  (__v4si) __index,
10261                                                  __mask, __scale);
10262 }
10263
10264 extern __inline __m128
10265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10267                            __m256i __index, float const *__addr,
10268                            int __scale)
10269 {
10270   return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10271                                                 __addr,
10272                                                 (__v4di) __index,
10273                                                 __mask, __scale);
10274 }
10275
10276 extern __inline __m128
10277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10278 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10279                         __m128i __index, float const *__addr,
10280                         int __scale)
10281 {
10282   return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10283                                                 __addr,
10284                                                 (__v2di) __index,
10285                                                 __mask, __scale);
10286 }
10287
10288 extern __inline __m256d
10289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10290 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10291                            __m256i __index, double const *__addr,
10292                            int __scale)
10293 {
10294   return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10295                                                  __addr,
10296                                                  (__v4di) __index,
10297                                                  __mask, __scale);
10298 }
10299
10300 extern __inline __m128d
10301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10302 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10303                         __m128i __index, double const *__addr,
10304                         int __scale)
10305 {
10306   return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10307                                                  __addr,
10308                                                  (__v2di) __index,
10309                                                  __mask, __scale);
10310 }
10311
10312 extern __inline __m256i
10313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10314 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10315                               __m256i __index, int const *__addr,
10316                               int __scale)
10317 {
10318   return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10319                                                  __addr,
10320                                                  (__v8si) __index,
10321                                                  __mask, __scale);
10322 }
10323
10324 extern __inline __m128i
10325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10326 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10327                            __m128i __index, int const *__addr,
10328                            int __scale)
10329 {
10330   return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10331                                                  __addr,
10332                                                  (__v4si) __index,
10333                                                  __mask, __scale);
10334 }
10335
10336 extern __inline __m256i
10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10339                               __m128i __index, long long const *__addr,
10340                               int __scale)
10341 {
10342   return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10343                                                  __addr,
10344                                                  (__v4si) __index,
10345                                                  __mask, __scale);
10346 }
10347
10348 extern __inline __m128i
10349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10350 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10351                            __m128i __index, long long const *__addr,
10352                            int __scale)
10353 {
10354   return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10355                                                  __addr,
10356                                                  (__v4si) __index,
10357                                                  __mask, __scale);
10358 }
10359
10360 extern __inline __m128i
10361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10362 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10363                               __m256i __index, int const *__addr,
10364                               int __scale)
10365 {
10366   return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10367                                                  __addr,
10368                                                  (__v4di) __index,
10369                                                  __mask, __scale);
10370 }
10371
10372 extern __inline __m128i
10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10375                            __m128i __index, int const *__addr,
10376                            int __scale)
10377 {
10378   return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10379                                                  __addr,
10380                                                  (__v2di) __index,
10381                                                  __mask, __scale);
10382 }
10383
10384 extern __inline __m256i
10385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10386 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10387                               __m256i __index, long long const *__addr,
10388                               int __scale)
10389 {
10390   return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10391                                                  __addr,
10392                                                  (__v4di) __index,
10393                                                  __mask, __scale);
10394 }
10395
10396 extern __inline __m128i
10397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10399                            __m128i __index, long long const *__addr,
10400                            int __scale)
10401 {
10402   return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10403                                                  __addr,
10404                                                  (__v2di) __index,
10405                                                  __mask, __scale);
10406 }
10407
10408 extern __inline void
10409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410 _mm256_i32scatter_ps (float *__addr, __m256i __index,
10411                       __m256 __v1, const int __scale)
10412 {
10413   __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10414                                 (__v8si) __index, (__v8sf) __v1,
10415                                 __scale);
10416 }
10417
10418 extern __inline void
10419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10420 _mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10421                            __m256i __index, __m256 __v1,
10422                            const int __scale)
10423 {
10424   __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10425                                 (__v8sf) __v1, __scale);
10426 }
10427
10428 extern __inline void
10429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10430 _mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10431                    const int __scale)
10432 {
10433   __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10434                                 (__v4si) __index, (__v4sf) __v1,
10435                                 __scale);
10436 }
10437
10438 extern __inline void
10439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10440 _mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10441                         __m128i __index, __m128 __v1,
10442                         const int __scale)
10443 {
10444   __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10445                                 (__v4sf) __v1, __scale);
10446 }
10447
10448 extern __inline void
10449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10450 _mm256_i32scatter_pd (double *__addr, __m128i __index,
10451                       __m256d __v1, const int __scale)
10452 {
10453   __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10454                                 (__v4si) __index, (__v4df) __v1,
10455                                 __scale);
10456 }
10457
10458 extern __inline void
10459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10460 _mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10461                            __m128i __index, __m256d __v1,
10462                            const int __scale)
10463 {
10464   __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10465                                 (__v4df) __v1, __scale);
10466 }
10467
10468 extern __inline void
10469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10470 _mm_i32scatter_pd (double *__addr, __m128i __index,
10471                    __m128d __v1, const int __scale)
10472 {
10473   __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10474                                 (__v4si) __index, (__v2df) __v1,
10475                                 __scale);
10476 }
10477
10478 extern __inline void
10479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10480 _mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10481                         __m128i __index, __m128d __v1,
10482                         const int __scale)
10483 {
10484   __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10485                                 (__v2df) __v1, __scale);
10486 }
10487
10488 extern __inline void
10489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490 _mm256_i64scatter_ps (float *__addr, __m256i __index,
10491                       __m128 __v1, const int __scale)
10492 {
10493   __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10494                                 (__v4di) __index, (__v4sf) __v1,
10495                                 __scale);
10496 }
10497
10498 extern __inline void
10499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500 _mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10501                            __m256i __index, __m128 __v1,
10502                            const int __scale)
10503 {
10504   __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10505                                 (__v4sf) __v1, __scale);
10506 }
10507
10508 extern __inline void
10509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10510 _mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10511                    const int __scale)
10512 {
10513   __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10514                                 (__v2di) __index, (__v4sf) __v1,
10515                                 __scale);
10516 }
10517
10518 extern __inline void
10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520 _mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10521                         __m128i __index, __m128 __v1,
10522                         const int __scale)
10523 {
10524   __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10525                                 (__v4sf) __v1, __scale);
10526 }
10527
10528 extern __inline void
10529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10530 _mm256_i64scatter_pd (double *__addr, __m256i __index,
10531                       __m256d __v1, const int __scale)
10532 {
10533   __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10534                                 (__v4di) __index, (__v4df) __v1,
10535                                 __scale);
10536 }
10537
10538 extern __inline void
10539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540 _mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10541                            __m256i __index, __m256d __v1,
10542                            const int __scale)
10543 {
10544   __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10545                                 (__v4df) __v1, __scale);
10546 }
10547
10548 extern __inline void
10549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550 _mm_i64scatter_pd (double *__addr, __m128i __index,
10551                    __m128d __v1, const int __scale)
10552 {
10553   __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10554                                 (__v2di) __index, (__v2df) __v1,
10555                                 __scale);
10556 }
10557
10558 extern __inline void
10559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10560 _mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10561                         __m128i __index, __m128d __v1,
10562                         const int __scale)
10563 {
10564   __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10565                                 (__v2df) __v1, __scale);
10566 }
10567
10568 extern __inline void
10569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10570 _mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10571                          __m256i __v1, const int __scale)
10572 {
10573   __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10574                                 (__v8si) __index, (__v8si) __v1,
10575                                 __scale);
10576 }
10577
10578 extern __inline void
10579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10580 _mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10581                               __m256i __index, __m256i __v1,
10582                               const int __scale)
10583 {
10584   __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10585                                 (__v8si) __v1, __scale);
10586 }
10587
10588 extern __inline void
10589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10590 _mm_i32scatter_epi32 (int *__addr, __m128i __index,
10591                       __m128i __v1, const int __scale)
10592 {
10593   __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10594                                 (__v4si) __index, (__v4si) __v1,
10595                                 __scale);
10596 }
10597
10598 extern __inline void
10599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10600 _mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10601                            __m128i __index, __m128i __v1,
10602                            const int __scale)
10603 {
10604   __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10605                                 (__v4si) __v1, __scale);
10606 }
10607
10608 extern __inline void
10609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10610 _mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10611                          __m256i __v1, const int __scale)
10612 {
10613   __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10614                                 (__v4si) __index, (__v4di) __v1,
10615                                 __scale);
10616 }
10617
10618 extern __inline void
10619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620 _mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10621                               __m128i __index, __m256i __v1,
10622                               const int __scale)
10623 {
10624   __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10625                                 (__v4di) __v1, __scale);
10626 }
10627
10628 extern __inline void
10629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10630 _mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10631                       __m128i __v1, const int __scale)
10632 {
10633   __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10634                                 (__v4si) __index, (__v2di) __v1,
10635                                 __scale);
10636 }
10637
10638 extern __inline void
10639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10640 _mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10641                            __m128i __index, __m128i __v1,
10642                            const int __scale)
10643 {
10644   __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10645                                 (__v2di) __v1, __scale);
10646 }
10647
10648 extern __inline void
10649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10650 _mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10651                          __m128i __v1, const int __scale)
10652 {
10653   __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10654                                 (__v4di) __index, (__v4si) __v1,
10655                                 __scale);
10656 }
10657
10658 extern __inline void
10659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10660 _mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10661                               __m256i __index, __m128i __v1,
10662                               const int __scale)
10663 {
10664   __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10665                                 (__v4si) __v1, __scale);
10666 }
10667
10668 extern __inline void
10669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10670 _mm_i64scatter_epi32 (int *__addr, __m128i __index,
10671                       __m128i __v1, const int __scale)
10672 {
10673   __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10674                                 (__v2di) __index, (__v4si) __v1,
10675                                 __scale);
10676 }
10677
10678 extern __inline void
10679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10680 _mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10681                            __m128i __index, __m128i __v1,
10682                            const int __scale)
10683 {
10684   __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10685                                 (__v4si) __v1, __scale);
10686 }
10687
10688 extern __inline void
10689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10690 _mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10691                          __m256i __v1, const int __scale)
10692 {
10693   __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10694                                 (__v4di) __index, (__v4di) __v1,
10695                                 __scale);
10696 }
10697
10698 extern __inline void
10699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10700 _mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10701                               __m256i __index, __m256i __v1,
10702                               const int __scale)
10703 {
10704   __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10705                                 (__v4di) __v1, __scale);
10706 }
10707
10708 extern __inline void
10709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10710 _mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10711                       __m128i __v1, const int __scale)
10712 {
10713   __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10714                                 (__v2di) __index, (__v2di) __v1,
10715                                 __scale);
10716 }
10717
10718 extern __inline void
10719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10720 _mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10721                            __m128i __index, __m128i __v1,
10722                            const int __scale)
10723 {
10724   __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10725                                 (__v2di) __v1, __scale);
10726 }
10727
10728 extern __inline __m256i
10729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10730 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10731                            _MM_PERM_ENUM __mask)
10732 {
10733   return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10734                                                   (__v8si) __W,
10735                                                   (__mmask8) __U);
10736 }
10737
10738 extern __inline __m256i
10739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10740 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10741                             _MM_PERM_ENUM __mask)
10742 {
10743   return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10744                                                   (__v8si)
10745                                                   _mm256_setzero_si256 (),
10746                                                   (__mmask8) __U);
10747 }
10748
10749 extern __inline __m128i
10750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10751 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10752                         _MM_PERM_ENUM __mask)
10753 {
10754   return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10755                                                   (__v4si) __W,
10756                                                   (__mmask8) __U);
10757 }
10758
10759 extern __inline __m128i
10760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10761 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10762                          _MM_PERM_ENUM __mask)
10763 {
10764   return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10765                                                   (__v4si)
10766                                                   _mm_setzero_si128 (),
10767                                                   (__mmask8) __U);
10768 }
10769
10770 extern __inline __m256i
10771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772 _mm256_rol_epi32 (__m256i __A, const int __B)
10773 {
10774   return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10775                                                  (__v8si)
10776                                                  _mm256_setzero_si256 (),
10777                                                  (__mmask8) -1);
10778 }
10779
10780 extern __inline __m256i
10781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10782 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10783                        const int __B)
10784 {
10785   return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10786                                                  (__v8si) __W,
10787                                                  (__mmask8) __U);
10788 }
10789
10790 extern __inline __m256i
10791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10792 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10793 {
10794   return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10795                                                  (__v8si)
10796                                                  _mm256_setzero_si256 (),
10797                                                  (__mmask8) __U);
10798 }
10799
10800 extern __inline __m128i
10801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10802 _mm_rol_epi32 (__m128i __A, const int __B)
10803 {
10804   return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10805                                                  (__v4si)
10806                                                  _mm_setzero_si128 (),
10807                                                  (__mmask8) -1);
10808 }
10809
10810 extern __inline __m128i
10811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10812 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10813                     const int __B)
10814 {
10815   return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10816                                                  (__v4si) __W,
10817                                                  (__mmask8) __U);
10818 }
10819
10820 extern __inline __m128i
10821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10822 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10823 {
10824   return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10825                                                  (__v4si)
10826                                                  _mm_setzero_si128 (),
10827                                                  (__mmask8) __U);
10828 }
10829
10830 extern __inline __m256i
10831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10832 _mm256_ror_epi32 (__m256i __A, const int __B)
10833 {
10834   return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10835                                                  (__v8si)
10836                                                  _mm256_setzero_si256 (),
10837                                                  (__mmask8) -1);
10838 }
10839
10840 extern __inline __m256i
10841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10842 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10843                        const int __B)
10844 {
10845   return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10846                                                  (__v8si) __W,
10847                                                  (__mmask8) __U);
10848 }
10849
10850 extern __inline __m256i
10851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10852 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10853 {
10854   return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10855                                                  (__v8si)
10856                                                  _mm256_setzero_si256 (),
10857                                                  (__mmask8) __U);
10858 }
10859
10860 extern __inline __m128i
10861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10862 _mm_ror_epi32 (__m128i __A, const int __B)
10863 {
10864   return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10865                                                  (__v4si)
10866                                                  _mm_setzero_si128 (),
10867                                                  (__mmask8) -1);
10868 }
10869
10870 extern __inline __m128i
10871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10872 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10873                     const int __B)
10874 {
10875   return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10876                                                  (__v4si) __W,
10877                                                  (__mmask8) __U);
10878 }
10879
10880 extern __inline __m128i
10881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10882 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10883 {
10884   return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10885                                                  (__v4si)
10886                                                  _mm_setzero_si128 (),
10887                                                  (__mmask8) __U);
10888 }
10889
10890 extern __inline __m256i
10891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10892 _mm256_rol_epi64 (__m256i __A, const int __B)
10893 {
10894   return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10895                                                  (__v4di)
10896                                                  _mm256_setzero_si256 (),
10897                                                  (__mmask8) -1);
10898 }
10899
10900 extern __inline __m256i
10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10903                        const int __B)
10904 {
10905   return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10906                                                  (__v4di) __W,
10907                                                  (__mmask8) __U);
10908 }
10909
10910 extern __inline __m256i
10911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10912 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10913 {
10914   return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10915                                                  (__v4di)
10916                                                  _mm256_setzero_si256 (),
10917                                                  (__mmask8) __U);
10918 }
10919
10920 extern __inline __m128i
10921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10922 _mm_rol_epi64 (__m128i __A, const int __B)
10923 {
10924   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10925                                                  (__v2di)
10926                                                  _mm_setzero_di (),
10927                                                  (__mmask8) -1);
10928 }
10929
10930 extern __inline __m128i
10931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10932 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10933                     const int __B)
10934 {
10935   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10936                                                  (__v2di) __W,
10937                                                  (__mmask8) __U);
10938 }
10939
10940 extern __inline __m128i
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10943 {
10944   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10945                                                  (__v2di)
10946                                                  _mm_setzero_di (),
10947                                                  (__mmask8) __U);
10948 }
10949
10950 extern __inline __m256i
10951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10952 _mm256_ror_epi64 (__m256i __A, const int __B)
10953 {
10954   return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10955                                                  (__v4di)
10956                                                  _mm256_setzero_si256 (),
10957                                                  (__mmask8) -1);
10958 }
10959
10960 extern __inline __m256i
10961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10962 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10963                        const int __B)
10964 {
10965   return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10966                                                  (__v4di) __W,
10967                                                  (__mmask8) __U);
10968 }
10969
10970 extern __inline __m256i
10971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
10973 {
10974   return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10975                                                  (__v4di)
10976                                                  _mm256_setzero_si256 (),
10977                                                  (__mmask8) __U);
10978 }
10979
10980 extern __inline __m128i
10981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10982 _mm_ror_epi64 (__m128i __A, const int __B)
10983 {
10984   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10985                                                  (__v2di)
10986                                                  _mm_setzero_di (),
10987                                                  (__mmask8) -1);
10988 }
10989
10990 extern __inline __m128i
10991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10992 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10993                     const int __B)
10994 {
10995   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10996                                                  (__v2di) __W,
10997                                                  (__mmask8) __U);
10998 }
10999
11000 extern __inline __m128i
11001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11002 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11003 {
11004   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11005                                                  (__v2di)
11006                                                  _mm_setzero_di (),
11007                                                  (__mmask8) __U);
11008 }
11009
11010 extern __inline __m128i
11011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11013 {
11014   return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11015                                                   (__v4si) __B, __imm,
11016                                                   (__v4si)
11017                                                   _mm_setzero_si128 (),
11018                                                   (__mmask8) -1);
11019 }
11020
11021 extern __inline __m128i
11022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11024                        __m128i __B, const int __imm)
11025 {
11026   return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11027                                                   (__v4si) __B, __imm,
11028                                                   (__v4si) __W,
11029                                                   (__mmask8) __U);
11030 }
11031
11032 extern __inline __m128i
11033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11034 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11035                         const int __imm)
11036 {
11037   return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11038                                                   (__v4si) __B, __imm,
11039                                                   (__v4si)
11040                                                   _mm_setzero_si128 (),
11041                                                   (__mmask8) __U);
11042 }
11043
11044 extern __inline __m128i
11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11047 {
11048   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11049                                                   (__v2di) __B, __imm,
11050                                                   (__v2di)
11051                                                   _mm_setzero_di (),
11052                                                   (__mmask8) -1);
11053 }
11054
11055 extern __inline __m128i
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11058                        __m128i __B, const int __imm)
11059 {
11060   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11061                                                   (__v2di) __B, __imm,
11062                                                   (__v2di) __W,
11063                                                   (__mmask8) __U);
11064 }
11065
11066 extern __inline __m128i
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11069                         const int __imm)
11070 {
11071   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11072                                                   (__v2di) __B, __imm,
11073                                                   (__v2di)
11074                                                   _mm_setzero_di (),
11075                                                   (__mmask8) __U);
11076 }
11077
11078 extern __inline __m256i
11079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11080 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11081 {
11082   return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11083                                                   (__v8si) __B, __imm,
11084                                                   (__v8si)
11085                                                   _mm256_setzero_si256 (),
11086                                                   (__mmask8) -1);
11087 }
11088
11089 extern __inline __m256i
11090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11092                           __m256i __B, const int __imm)
11093 {
11094   return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11095                                                   (__v8si) __B, __imm,
11096                                                   (__v8si) __W,
11097                                                   (__mmask8) __U);
11098 }
11099
11100 extern __inline __m256i
11101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11102 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11103                            const int __imm)
11104 {
11105   return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11106                                                   (__v8si) __B, __imm,
11107                                                   (__v8si)
11108                                                   _mm256_setzero_si256 (),
11109                                                   (__mmask8) __U);
11110 }
11111
11112 extern __inline __m256i
11113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11114 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11115 {
11116   return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11117                                                   (__v4di) __B, __imm,
11118                                                   (__v4di)
11119                                                   _mm256_setzero_si256 (),
11120                                                   (__mmask8) -1);
11121 }
11122
11123 extern __inline __m256i
11124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11126                           __m256i __B, const int __imm)
11127 {
11128   return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11129                                                   (__v4di) __B, __imm,
11130                                                   (__v4di) __W,
11131                                                   (__mmask8) __U);
11132 }
11133
11134 extern __inline __m256i
11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11137                            const int __imm)
11138 {
11139   return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11140                                                   (__v4di) __B, __imm,
11141                                                   (__v4di)
11142                                                   _mm256_setzero_si256 (),
11143                                                   (__mmask8) __U);
11144 }
11145
11146 extern __inline __m128i
11147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11149                    const int __I)
11150 {
11151   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11152                                                   (__v8hi) __W,
11153                                                   (__mmask8) __U);
11154 }
11155
11156 extern __inline __m128i
11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11159 {
11160   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11161                                                   (__v8hi)
11162                                                   _mm_setzero_hi (),
11163                                                   (__mmask8) __U);
11164 }
11165
11166 extern __inline __m128i
11167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11168 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11169                       const int __I)
11170 {
11171   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11172                                                      (__v8hi) __W,
11173                                                      (__mmask8) __U);
11174 }
11175
11176 extern __inline __m128i
11177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11178 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11179 {
11180   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11181                                                      (__v8hi)
11182                                                      _mm_setzero_hi (),
11183                                                      (__mmask8) __U);
11184 }
11185
11186 extern __inline __m256i
11187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11188 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11189                         const int __imm)
11190 {
11191   return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11192                                                   (__v8si) __W,
11193                                                   (__mmask8) __U);
11194 }
11195
11196 extern __inline __m256i
11197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11198 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11199 {
11200   return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11201                                                   (__v8si)
11202                                                   _mm256_setzero_si256 (),
11203                                                   (__mmask8) __U);
11204 }
11205
11206 extern __inline __m128i
11207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11208 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11209                      const int __imm)
11210 {
11211   return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11212                                                   (__v4si) __W,
11213                                                   (__mmask8) __U);
11214 }
11215
11216 extern __inline __m128i
11217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11218 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11219 {
11220   return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11221                                                   (__v4si)
11222                                                   _mm_setzero_si128 (),
11223                                                   (__mmask8) __U);
11224 }
11225
11226 extern __inline __m256i
11227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11228 _mm256_srai_epi64 (__m256i __A, const int __imm)
11229 {
11230   return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11231                                                   (__v4di)
11232                                                   _mm256_setzero_si256 (),
11233                                                   (__mmask8) -1);
11234 }
11235
11236 extern __inline __m256i
11237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11238 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11239                         const int __imm)
11240 {
11241   return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11242                                                   (__v4di) __W,
11243                                                   (__mmask8) __U);
11244 }
11245
11246 extern __inline __m256i
11247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11248 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11249 {
11250   return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11251                                                   (__v4di)
11252                                                   _mm256_setzero_si256 (),
11253                                                   (__mmask8) __U);
11254 }
11255
11256 extern __inline __m128i
11257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258 _mm_srai_epi64 (__m128i __A, const int __imm)
11259 {
11260   return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11261                                                   (__v2di)
11262                                                   _mm_setzero_di (),
11263                                                   (__mmask8) -1);
11264 }
11265
11266 extern __inline __m128i
11267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11269                      const int __imm)
11270 {
11271   return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11272                                                   (__v2di) __W,
11273                                                   (__mmask8) __U);
11274 }
11275
11276 extern __inline __m128i
11277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11278 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11279 {
11280   return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11281                                                   (__v2di)
11282                                                   _mm_setzero_si128 (),
11283                                                   (__mmask8) __U);
11284 }
11285
11286 extern __inline __m128i
11287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11288 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11289 {
11290   return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11291                                                   (__v4si) __W,
11292                                                   (__mmask8) __U);
11293 }
11294
11295 extern __inline __m128i
11296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11297 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11298 {
11299   return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11300                                                   (__v4si)
11301                                                   _mm_setzero_si128 (),
11302                                                   (__mmask8) __U);
11303 }
11304
11305 extern __inline __m128i
11306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11307 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11308 {
11309   return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11310                                                   (__v2di) __W,
11311                                                   (__mmask8) __U);
11312 }
11313
11314 extern __inline __m128i
11315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11316 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11317 {
11318   return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11319                                                   (__v2di)
11320                                                   _mm_setzero_di (),
11321                                                   (__mmask8) __U);
11322 }
11323
11324 extern __inline __m256i
11325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11326 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11327                         int __B)
11328 {
11329   return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11330                                                   (__v8si) __W,
11331                                                   (__mmask8) __U);
11332 }
11333
11334 extern __inline __m256i
11335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11336 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11337 {
11338   return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11339                                                   (__v8si)
11340                                                   _mm256_setzero_si256 (),
11341                                                   (__mmask8) __U);
11342 }
11343
11344 extern __inline __m256i
11345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11346 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11347                         int __B)
11348 {
11349   return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11350                                                   (__v4di) __W,
11351                                                   (__mmask8) __U);
11352 }
11353
11354 extern __inline __m256i
11355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11357 {
11358   return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11359                                                   (__v4di)
11360                                                   _mm256_setzero_si256 (),
11361                                                   (__mmask8) __U);
11362 }
11363
11364 extern __inline __m256d
11365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11366 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11367                          const int __imm)
11368 {
11369   return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11370                                                   (__v4df) __W,
11371                                                   (__mmask8) __U);
11372 }
11373
11374 extern __inline __m256d
11375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11376 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11377 {
11378   return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11379                                                   (__v4df)
11380                                                   _mm256_setzero_pd (),
11381                                                   (__mmask8) __U);
11382 }
11383
11384 extern __inline __m256d
11385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11386 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11387                         const int __C)
11388 {
11389   return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11390                                                      (__v4df) __W,
11391                                                      (__mmask8) __U);
11392 }
11393
11394 extern __inline __m256d
11395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11396 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11397 {
11398   return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11399                                                      (__v4df)
11400                                                      _mm256_setzero_pd (),
11401                                                      (__mmask8) __U);
11402 }
11403
11404 extern __inline __m128d
11405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11406 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11407                      const int __C)
11408 {
11409   return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11410                                                   (__v2df) __W,
11411                                                   (__mmask8) __U);
11412 }
11413
11414 extern __inline __m128d
11415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11416 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11417 {
11418   return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11419                                                   (__v2df)
11420                                                   _mm_setzero_pd (),
11421                                                   (__mmask8) __U);
11422 }
11423
11424 extern __inline __m256
11425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11426 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11427                         const int __C)
11428 {
11429   return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11430                                                     (__v8sf) __W,
11431                                                     (__mmask8) __U);
11432 }
11433
11434 extern __inline __m256
11435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11436 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11437 {
11438   return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11439                                                     (__v8sf)
11440                                                     _mm256_setzero_ps (),
11441                                                     (__mmask8) __U);
11442 }
11443
11444 extern __inline __m128
11445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11446 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11447                      const int __C)
11448 {
11449   return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11450                                                  (__v4sf) __W,
11451                                                  (__mmask8) __U);
11452 }
11453
11454 extern __inline __m128
11455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11456 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11457 {
11458   return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11459                                                  (__v4sf)
11460                                                  _mm_setzero_ps (),
11461                                                  (__mmask8) __U);
11462 }
11463
11464 extern __inline __m256d
11465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11467 {
11468   return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11469                                                      (__v4df) __W,
11470                                                      (__mmask8) __U);
11471 }
11472
11473 extern __inline __m256
11474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11475 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11476 {
11477   return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11478                                                     (__v8sf) __W,
11479                                                     (__mmask8) __U);
11480 }
11481
11482 extern __inline __m256i
11483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11484 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11485 {
11486   return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11487                                                     (__v4di) __W,
11488                                                     (__mmask8) __U);
11489 }
11490
11491 extern __inline __m256i
11492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11493 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11494 {
11495   return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11496                                                     (__v8si) __W,
11497                                                     (__mmask8) __U);
11498 }
11499
11500 extern __inline __m128d
11501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11503 {
11504   return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11505                                                      (__v2df) __W,
11506                                                      (__mmask8) __U);
11507 }
11508
11509 extern __inline __m128
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11512 {
11513   return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11514                                                     (__v4sf) __W,
11515                                                     (__mmask8) __U);
11516 }
11517
11518 extern __inline __m128i
11519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11520 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11521 {
11522   return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11523                                                     (__v2di) __W,
11524                                                     (__mmask8) __U);
11525 }
11526
11527 extern __inline __m128i
11528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11529 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11530 {
11531   return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11532                                                     (__v4si) __W,
11533                                                     (__mmask8) __U);
11534 }
11535
11536 extern __inline __mmask8
11537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11538 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11539 {
11540   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11541                                                  (__v4di) __Y, __P,
11542                                                  (__mmask8) -1);
11543 }
11544
11545 extern __inline __mmask8
11546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11547 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11548 {
11549   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11550                                                  (__v8si) __Y, __P,
11551                                                  (__mmask8) -1);
11552 }
11553
11554 extern __inline __mmask8
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11557 {
11558   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11559                                                   (__v4di) __Y, __P,
11560                                                   (__mmask8) -1);
11561 }
11562
11563 extern __inline __mmask8
11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11566 {
11567   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11568                                                   (__v8si) __Y, __P,
11569                                                   (__mmask8) -1);
11570 }
11571
11572 extern __inline __mmask8
11573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11574 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11575 {
11576   return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11577                                                   (__v4df) __Y, __P,
11578                                                   (__mmask8) -1);
11579 }
11580
11581 extern __inline __mmask8
11582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11583 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11584 {
11585   return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11586                                                   (__v8sf) __Y, __P,
11587                                                   (__mmask8) -1);
11588 }
11589
11590 extern __inline __mmask8
11591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11592 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11593                             const int __P)
11594 {
11595   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11596                                                  (__v4di) __Y, __P,
11597                                                  (__mmask8) __U);
11598 }
11599
11600 extern __inline __mmask8
11601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11602 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11603                             const int __P)
11604 {
11605   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11606                                                  (__v8si) __Y, __P,
11607                                                  (__mmask8) __U);
11608 }
11609
11610 extern __inline __mmask8
11611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11613                             const int __P)
11614 {
11615   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11616                                                   (__v4di) __Y, __P,
11617                                                   (__mmask8) __U);
11618 }
11619
11620 extern __inline __mmask8
11621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11623                             const int __P)
11624 {
11625   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11626                                                   (__v8si) __Y, __P,
11627                                                   (__mmask8) __U);
11628 }
11629
11630 extern __inline __mmask8
11631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11633                          const int __P)
11634 {
11635   return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11636                                                   (__v4df) __Y, __P,
11637                                                   (__mmask8) __U);
11638 }
11639
11640 extern __inline __mmask8
11641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11643                          const int __P)
11644 {
11645   return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11646                                                   (__v8sf) __Y, __P,
11647                                                   (__mmask8) __U);
11648 }
11649
11650 extern __inline __mmask8
11651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11652 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11653 {
11654   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11655                                                  (__v2di) __Y, __P,
11656                                                  (__mmask8) -1);
11657 }
11658
11659 extern __inline __mmask8
11660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11661 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11662 {
11663   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11664                                                  (__v4si) __Y, __P,
11665                                                  (__mmask8) -1);
11666 }
11667
11668 extern __inline __mmask8
11669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11670 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11671 {
11672   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11673                                                   (__v2di) __Y, __P,
11674                                                   (__mmask8) -1);
11675 }
11676
11677 extern __inline __mmask8
11678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11679 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11680 {
11681   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11682                                                   (__v4si) __Y, __P,
11683                                                   (__mmask8) -1);
11684 }
11685
11686 extern __inline __mmask8
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11689 {
11690   return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11691                                                   (__v2df) __Y, __P,
11692                                                   (__mmask8) -1);
11693 }
11694
11695 extern __inline __mmask8
11696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11697 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11698 {
11699   return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11700                                                   (__v4sf) __Y, __P,
11701                                                   (__mmask8) -1);
11702 }
11703
11704 extern __inline __mmask8
11705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11706 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11707                          const int __P)
11708 {
11709   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11710                                                  (__v2di) __Y, __P,
11711                                                  (__mmask8) __U);
11712 }
11713
11714 extern __inline __mmask8
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11717                          const int __P)
11718 {
11719   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11720                                                  (__v4si) __Y, __P,
11721                                                  (__mmask8) __U);
11722 }
11723
11724 extern __inline __mmask8
11725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11726 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11727                          const int __P)
11728 {
11729   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11730                                                   (__v2di) __Y, __P,
11731                                                   (__mmask8) __U);
11732 }
11733
11734 extern __inline __mmask8
11735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11736 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11737                          const int __P)
11738 {
11739   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11740                                                   (__v4si) __Y, __P,
11741                                                   (__mmask8) __U);
11742 }
11743
11744 extern __inline __mmask8
11745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11746 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11747                       const int __P)
11748 {
11749   return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11750                                                   (__v2df) __Y, __P,
11751                                                   (__mmask8) __U);
11752 }
11753
11754 extern __inline __mmask8
11755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11756 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11757                       const int __P)
11758 {
11759   return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11760                                                   (__v4sf) __Y, __P,
11761                                                   (__mmask8) __U);
11762 }
11763
11764 extern __inline __m256d
11765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11766 _mm256_permutex_pd (__m256d __X, const int __M)
11767 {
11768   return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11769                                                   (__v4df)
11770                                                   _mm256_undefined_pd (),
11771                                                   (__mmask8) -1);
11772 }
11773
11774 extern __inline __mmask8
11775   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11777 {
11778   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11779                                                   (__v8si) __Y, 4,
11780                                                   (__mmask8) __M);
11781 }
11782
11783 extern __inline __mmask8
11784   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11785 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11786 {
11787   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11788                                                   (__v8si) __Y, 4,
11789                                                   (__mmask8) -1);
11790 }
11791
11792 extern __inline __mmask8
11793   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11794 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11795 {
11796   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11797                                                   (__v8si) __Y, 1,
11798                                                   (__mmask8) __M);
11799 }
11800
11801 extern __inline __mmask8
11802   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11804 {
11805   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11806                                                   (__v8si) __Y, 1,
11807                                                   (__mmask8) -1);
11808 }
11809
11810 extern __inline __mmask8
11811   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11812 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11813 {
11814   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11815                                                   (__v8si) __Y, 5,
11816                                                   (__mmask8) __M);
11817 }
11818
11819 extern __inline __mmask8
11820   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11821 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11822 {
11823   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11824                                                   (__v8si) __Y, 5,
11825                                                   (__mmask8) -1);
11826 }
11827
11828 extern __inline __mmask8
11829   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11830 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11831 {
11832   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11833                                                   (__v8si) __Y, 2,
11834                                                   (__mmask8) __M);
11835 }
11836
11837 extern __inline __mmask8
11838   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11839 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11840 {
11841   return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11842                                                   (__v8si) __Y, 2,
11843                                                   (__mmask8) -1);
11844 }
11845
11846 extern __inline __mmask8
11847   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11848 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11849 {
11850   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11851                                                   (__v4di) __Y, 4,
11852                                                   (__mmask8) __M);
11853 }
11854
11855 extern __inline __mmask8
11856   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11857 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11858 {
11859   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11860                                                   (__v4di) __Y, 4,
11861                                                   (__mmask8) -1);
11862 }
11863
11864 extern __inline __mmask8
11865   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11866 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11867 {
11868   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11869                                                   (__v4di) __Y, 1,
11870                                                   (__mmask8) __M);
11871 }
11872
11873 extern __inline __mmask8
11874   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11875 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11876 {
11877   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11878                                                   (__v4di) __Y, 1,
11879                                                   (__mmask8) -1);
11880 }
11881
11882 extern __inline __mmask8
11883   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11885 {
11886   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11887                                                   (__v4di) __Y, 5,
11888                                                   (__mmask8) __M);
11889 }
11890
11891 extern __inline __mmask8
11892   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11893 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11894 {
11895   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11896                                                   (__v4di) __Y, 5,
11897                                                   (__mmask8) -1);
11898 }
11899
11900 extern __inline __mmask8
11901   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11902 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11903 {
11904   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11905                                                   (__v4di) __Y, 2,
11906                                                   (__mmask8) __M);
11907 }
11908
11909 extern __inline __mmask8
11910   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11911 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11912 {
11913   return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11914                                                   (__v4di) __Y, 2,
11915                                                   (__mmask8) -1);
11916 }
11917
11918 extern __inline __mmask8
11919   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11921 {
11922   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11923                                                  (__v8si) __Y, 4,
11924                                                  (__mmask8) __M);
11925 }
11926
11927 extern __inline __mmask8
11928   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11929 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11930 {
11931   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11932                                                  (__v8si) __Y, 4,
11933                                                  (__mmask8) -1);
11934 }
11935
11936 extern __inline __mmask8
11937   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11938 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11939 {
11940   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11941                                                  (__v8si) __Y, 1,
11942                                                  (__mmask8) __M);
11943 }
11944
11945 extern __inline __mmask8
11946   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11947 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11948 {
11949   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11950                                                  (__v8si) __Y, 1,
11951                                                  (__mmask8) -1);
11952 }
11953
11954 extern __inline __mmask8
11955   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11957 {
11958   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11959                                                  (__v8si) __Y, 5,
11960                                                  (__mmask8) __M);
11961 }
11962
11963 extern __inline __mmask8
11964   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11965 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
11966 {
11967   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11968                                                  (__v8si) __Y, 5,
11969                                                  (__mmask8) -1);
11970 }
11971
11972 extern __inline __mmask8
11973   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11974 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11975 {
11976   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11977                                                  (__v8si) __Y, 2,
11978                                                  (__mmask8) __M);
11979 }
11980
11981 extern __inline __mmask8
11982   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11983 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
11984 {
11985   return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11986                                                  (__v8si) __Y, 2,
11987                                                  (__mmask8) -1);
11988 }
11989
11990 extern __inline __mmask8
11991   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11992 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11993 {
11994   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11995                                                  (__v4di) __Y, 4,
11996                                                  (__mmask8) __M);
11997 }
11998
11999 extern __inline __mmask8
12000   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12001 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
12002 {
12003   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12004                                                  (__v4di) __Y, 4,
12005                                                  (__mmask8) -1);
12006 }
12007
12008 extern __inline __mmask8
12009   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12011 {
12012   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12013                                                  (__v4di) __Y, 1,
12014                                                  (__mmask8) __M);
12015 }
12016
12017 extern __inline __mmask8
12018   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12019 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
12020 {
12021   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12022                                                  (__v4di) __Y, 1,
12023                                                  (__mmask8) -1);
12024 }
12025
12026 extern __inline __mmask8
12027   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12028 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12029 {
12030   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12031                                                  (__v4di) __Y, 5,
12032                                                  (__mmask8) __M);
12033 }
12034
12035 extern __inline __mmask8
12036   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12037 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
12038 {
12039   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12040                                                  (__v4di) __Y, 5,
12041                                                  (__mmask8) -1);
12042 }
12043
12044 extern __inline __mmask8
12045   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12047 {
12048   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12049                                                  (__v4di) __Y, 2,
12050                                                  (__mmask8) __M);
12051 }
12052
12053 extern __inline __mmask8
12054   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12055 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
12056 {
12057   return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12058                                                  (__v4di) __Y, 2,
12059                                                  (__mmask8) -1);
12060 }
12061
12062 extern __inline __mmask8
12063   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12065 {
12066   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12067                                                   (__v4si) __Y, 4,
12068                                                   (__mmask8) __M);
12069 }
12070
12071 extern __inline __mmask8
12072   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
12074 {
12075   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12076                                                   (__v4si) __Y, 4,
12077                                                   (__mmask8) -1);
12078 }
12079
12080 extern __inline __mmask8
12081   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12082 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12083 {
12084   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12085                                                   (__v4si) __Y, 1,
12086                                                   (__mmask8) __M);
12087 }
12088
12089 extern __inline __mmask8
12090   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12091 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
12092 {
12093   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12094                                                   (__v4si) __Y, 1,
12095                                                   (__mmask8) -1);
12096 }
12097
12098 extern __inline __mmask8
12099   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12100 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12101 {
12102   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12103                                                   (__v4si) __Y, 5,
12104                                                   (__mmask8) __M);
12105 }
12106
12107 extern __inline __mmask8
12108   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
12110 {
12111   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12112                                                   (__v4si) __Y, 5,
12113                                                   (__mmask8) -1);
12114 }
12115
12116 extern __inline __mmask8
12117   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12118 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12119 {
12120   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12121                                                   (__v4si) __Y, 2,
12122                                                   (__mmask8) __M);
12123 }
12124
12125 extern __inline __mmask8
12126   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12127 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
12128 {
12129   return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12130                                                   (__v4si) __Y, 2,
12131                                                   (__mmask8) -1);
12132 }
12133
12134 extern __inline __mmask8
12135   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12136 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12137 {
12138   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12139                                                   (__v2di) __Y, 4,
12140                                                   (__mmask8) __M);
12141 }
12142
12143 extern __inline __mmask8
12144   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
12146 {
12147   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12148                                                   (__v2di) __Y, 4,
12149                                                   (__mmask8) -1);
12150 }
12151
12152 extern __inline __mmask8
12153   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12155 {
12156   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12157                                                   (__v2di) __Y, 1,
12158                                                   (__mmask8) __M);
12159 }
12160
12161 extern __inline __mmask8
12162   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12163 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
12164 {
12165   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12166                                                   (__v2di) __Y, 1,
12167                                                   (__mmask8) -1);
12168 }
12169
12170 extern __inline __mmask8
12171   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12172 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12173 {
12174   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12175                                                   (__v2di) __Y, 5,
12176                                                   (__mmask8) __M);
12177 }
12178
12179 extern __inline __mmask8
12180   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12181 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
12182 {
12183   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12184                                                   (__v2di) __Y, 5,
12185                                                   (__mmask8) -1);
12186 }
12187
12188 extern __inline __mmask8
12189   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12191 {
12192   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12193                                                   (__v2di) __Y, 2,
12194                                                   (__mmask8) __M);
12195 }
12196
12197 extern __inline __mmask8
12198   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
12200 {
12201   return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12202                                                   (__v2di) __Y, 2,
12203                                                   (__mmask8) -1);
12204 }
12205
12206 extern __inline __mmask8
12207   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12209 {
12210   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12211                                                  (__v4si) __Y, 4,
12212                                                  (__mmask8) __M);
12213 }
12214
12215 extern __inline __mmask8
12216   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
12218 {
12219   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12220                                                  (__v4si) __Y, 4,
12221                                                  (__mmask8) -1);
12222 }
12223
12224 extern __inline __mmask8
12225   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12226 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12227 {
12228   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12229                                                  (__v4si) __Y, 1,
12230                                                  (__mmask8) __M);
12231 }
12232
12233 extern __inline __mmask8
12234   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
12236 {
12237   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12238                                                  (__v4si) __Y, 1,
12239                                                  (__mmask8) -1);
12240 }
12241
12242 extern __inline __mmask8
12243   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12245 {
12246   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12247                                                  (__v4si) __Y, 5,
12248                                                  (__mmask8) __M);
12249 }
12250
12251 extern __inline __mmask8
12252   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
12254 {
12255   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12256                                                  (__v4si) __Y, 5,
12257                                                  (__mmask8) -1);
12258 }
12259
12260 extern __inline __mmask8
12261   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12262 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12263 {
12264   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12265                                                  (__v4si) __Y, 2,
12266                                                  (__mmask8) __M);
12267 }
12268
12269 extern __inline __mmask8
12270   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12271 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
12272 {
12273   return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12274                                                  (__v4si) __Y, 2,
12275                                                  (__mmask8) -1);
12276 }
12277
12278 extern __inline __mmask8
12279   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12281 {
12282   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12283                                                  (__v2di) __Y, 4,
12284                                                  (__mmask8) __M);
12285 }
12286
12287 extern __inline __mmask8
12288   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12289 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
12290 {
12291   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12292                                                  (__v2di) __Y, 4,
12293                                                  (__mmask8) -1);
12294 }
12295
12296 extern __inline __mmask8
12297   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12298 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12299 {
12300   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12301                                                  (__v2di) __Y, 1,
12302                                                  (__mmask8) __M);
12303 }
12304
12305 extern __inline __mmask8
12306   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12307 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
12308 {
12309   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12310                                                  (__v2di) __Y, 1,
12311                                                  (__mmask8) -1);
12312 }
12313
12314 extern __inline __mmask8
12315   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12316 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12317 {
12318   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12319                                                  (__v2di) __Y, 5,
12320                                                  (__mmask8) __M);
12321 }
12322
12323 extern __inline __mmask8
12324   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12325 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
12326 {
12327   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12328                                                  (__v2di) __Y, 5,
12329                                                  (__mmask8) -1);
12330 }
12331
12332 extern __inline __mmask8
12333   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12335 {
12336   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12337                                                  (__v2di) __Y, 2,
12338                                                  (__mmask8) __M);
12339 }
12340
12341 extern __inline __mmask8
12342   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12343 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
12344 {
12345   return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12346                                                  (__v2di) __Y, 2,
12347                                                  (__mmask8) -1);
12348 }
12349
12350 #else
12351 #define _mm256_permutex_pd(X, M)                                                \
12352   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),     \
12353                                             (__v4df)(__m256d)_mm256_undefined_pd(),\
12354                                             (__mmask8)-1))
12355
12356 #define _mm256_maskz_permutex_epi64(M, X, I)                    \
12357   ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
12358                                             (int)(I),                \
12359                                             (__v4di)(__m256i)        \
12360                                             (_mm256_setzero_si256()),\
12361                                             (__mmask8)(M)))
12362
12363 #define _mm256_mask_permutex_epi64(W, M, X, I)               \
12364   ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12365                                             (int)(I),             \
12366                                             (__v4di)(__m256i)(W), \
12367                                             (__mmask8)(M)))
12368
12369 #define _mm256_insertf32x4(X, Y, C)                                     \
12370   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12371     (__v4sf)(__m128) (Y), (int) (C),                                    \
12372     (__v8sf)(__m256)_mm256_setzero_ps(),                                \
12373     (__mmask8)-1))
12374
12375 #define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
12376   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12377     (__v4sf)(__m128) (Y), (int) (C),                                    \
12378     (__v8sf)(__m256)(W),                                                \
12379     (__mmask8)(U)))
12380
12381 #define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
12382   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12383     (__v4sf)(__m128) (Y), (int) (C),                                    \
12384     (__v8sf)(__m256)_mm256_setzero_ps(),                                \
12385     (__mmask8)(U)))
12386
12387 #define _mm256_inserti32x4(X, Y, C)                                     \
12388   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12389     (__v4si)(__m128i) (Y), (int) (C),                                   \
12390     (__v8si)(__m256i)_mm256_setzero_si256(),                            \
12391     (__mmask8)-1))
12392
12393 #define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
12394   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12395     (__v4si)(__m128i) (Y), (int) (C),                                   \
12396     (__v8si)(__m256i)(W),                                               \
12397     (__mmask8)(U)))
12398
12399 #define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
12400   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12401     (__v4si)(__m128i) (Y), (int) (C),                                   \
12402     (__v8si)(__m256i)_mm256_setzero_si256(),                            \
12403     (__mmask8)(U)))
12404
12405 #define _mm256_extractf32x4_ps(X, C)                                    \
12406   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12407     (int) (C),                                                          \
12408     (__v4sf)(__m128)_mm_setzero_ps(),                                   \
12409     (__mmask8)-1))
12410
12411 #define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
12412   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12413     (int) (C),                                                          \
12414     (__v4sf)(__m128)(W),                                                \
12415     (__mmask8)(U)))
12416
12417 #define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
12418   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12419     (int) (C),                                                          \
12420     (__v4sf)(__m128)_mm_setzero_ps(),                                   \
12421     (__mmask8)(U)))
12422
12423 #define _mm256_extracti32x4_epi32(X, C)                                 \
12424   ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12425     (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12426
12427 #define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
12428   ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12429     (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12430
12431 #define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
12432   ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12433     (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12434
12435 #define _mm256_shuffle_i64x2(X, Y, C)                                                   \
12436   ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12437                                                   (__v4di)(__m256i)(Y), (int)(C),       \
12438                                                   (__v4di)(__m256i)_mm256_setzero_si256 (), \
12439                                                   (__mmask8)-1))
12440
12441 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
12442   ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12443                                                   (__v4di)(__m256i)(Y), (int)(C),       \
12444                                                   (__v4di)(__m256i)(W),\
12445                                                   (__mmask8)(U)))
12446
12447 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
12448   ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12449                                                   (__v4di)(__m256i)(Y), (int)(C),       \
12450                                                   (__v4di)(__m256i)_mm256_setzero_si256 (), \
12451                                                   (__mmask8)(U)))
12452
12453 #define _mm256_shuffle_i32x4(X, Y, C)                                                   \
12454   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12455                                                   (__v8si)(__m256i)(Y), (int)(C),       \
12456                                                   (__v8si)(__m256i)_mm256_setzero_si256(), \
12457                                                   (__mmask8)-1))
12458
12459 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
12460   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12461                                                   (__v8si)(__m256i)(Y), (int)(C),       \
12462                                                   (__v8si)(__m256i)(W),                 \
12463                                                   (__mmask8)(U)))
12464
12465 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
12466   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12467                                                   (__v8si)(__m256i)(Y), (int)(C),       \
12468                                                   (__v8si)(__m256i)_mm256_setzero_si256(), \
12469                                                   (__mmask8)(U)))
12470
12471 #define _mm256_shuffle_f64x2(X, Y, C)                                                   \
12472   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12473                                                   (__v4df)(__m256d)(Y), (int)(C),       \
12474                                                   (__v4df)(__m256d)_mm256_setzero_pd(), \
12475                                                   (__mmask8)-1))
12476
12477 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
12478   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12479                                                   (__v4df)(__m256d)(Y), (int)(C),       \
12480                                                   (__v4df)(__m256d)(W),                 \
12481                                                   (__mmask8)(U)))
12482
12483 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
12484   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12485                                                   (__v4df)(__m256d)(Y), (int)(C),       \
12486                                                   (__v4df)(__m256d)_mm256_setzero_pd(), \
12487                                                   (__mmask8)(U)))
12488
12489 #define _mm256_shuffle_f32x4(X, Y, C)                                                   \
12490   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12491                                                  (__v8sf)(__m256)(Y), (int)(C),         \
12492                                                  (__v8sf)(__m256)_mm256_setzero_ps(),   \
12493                                                  (__mmask8)-1))
12494
12495 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
12496   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12497                                                  (__v8sf)(__m256)(Y), (int)(C),         \
12498                                                  (__v8sf)(__m256)(W),                   \
12499                                                  (__mmask8)(U)))
12500
12501 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
12502   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12503                                                  (__v8sf)(__m256)(Y), (int)(C),         \
12504                                                  (__v8sf)(__m256)_mm256_setzero_ps(),   \
12505                                                  (__mmask8)(U)))
12506
12507 #define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
12508   ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12509                                            (__v4df)(__m256d)(B), (int)(C),      \
12510                                            (__v4df)(__m256d)(W),                \
12511                                            (__mmask8)(U)))
12512
12513 #define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
12514   ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12515                                            (__v4df)(__m256d)(B), (int)(C),      \
12516                                            (__v4df)(__m256d)_mm256_setzero_pd(),\
12517                                            (__mmask8)(U)))
12518
12519 #define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
12520   ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12521                                            (__v2df)(__m128d)(B), (int)(C),      \
12522                                            (__v2df)(__m128d)(W),                \
12523                                            (__mmask8)(U)))
12524
12525 #define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
12526   ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12527                                            (__v2df)(__m128d)(B), (int)(C),      \
12528                                            (__v2df)(__m128d)_mm_setzero_pd(),   \
12529                                            (__mmask8)(U)))
12530
12531 #define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
12532   ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12533                                            (__v8sf)(__m256)(B), (int)(C),       \
12534                                            (__v8sf)(__m256)(W),                 \
12535                                            (__mmask8)(U)))
12536
12537 #define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
12538   ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12539                                            (__v8sf)(__m256)(B), (int)(C),       \
12540                                            (__v8sf)(__m256)_mm256_setzero_ps(), \
12541                                            (__mmask8)(U)))
12542
12543 #define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
12544   ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12545                                            (__v4sf)(__m128)(B), (int)(C),       \
12546                                            (__v4sf)(__m128)(W),                 \
12547                                            (__mmask8)(U)))
12548
12549 #define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
12550   ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12551                                            (__v4sf)(__m128)(B), (int)(C),       \
12552                                            (__v4sf)(__m128)_mm_setzero_ps(),    \
12553                                            (__mmask8)(U)))
12554
12555 #define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
12556   ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),            \
12557                                                (__v4df)(__m256d)(Y),            \
12558                                                (__v4di)(__m256i)(Z), (int)(C),  \
12559                                                (__mmask8)(-1)))
12560
12561 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C)                                  \
12562    ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),           \
12563                                                 (__v4df)(__m256d)(Y),           \
12564                                                 (__v4di)(__m256i)(Z), (int)(C), \
12565                                                 (__mmask8)(U)))
12566
12567 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C)                                 \
12568    ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X),          \
12569                                                  (__v4df)(__m256d)(Y),          \
12570                                                  (__v4di)(__m256i)(Z), (int)(C),\
12571                                                  (__mmask8)(U)))
12572
12573 #define _mm256_fixupimm_ps(X, Y, Z, C)                                          \
12574   ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),              \
12575                                               (__v8sf)(__m256)(Y),              \
12576                                               (__v8si)(__m256i)(Z), (int)(C),   \
12577                                               (__mmask8)(-1)))
12578
12579
12580 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C)                                  \
12581     ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),            \
12582                                                 (__v8sf)(__m256)(Y),            \
12583                                                 (__v8si)(__m256i)(Z), (int)(C), \
12584                                                 (__mmask8)(U)))
12585
12586 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C)                                 \
12587     ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X),           \
12588                                                  (__v8sf)(__m256)(Y),           \
12589                                                  (__v8si)(__m256i)(Z), (int)(C),\
12590                                                  (__mmask8)(U)))
12591
12592 #define _mm_fixupimm_pd(X, Y, Z, C)                                             \
12593   ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),            \
12594                                                (__v2df)(__m128d)(Y),            \
12595                                                (__v2di)(__m128i)(Z), (int)(C),  \
12596                                                (__mmask8)(-1)))
12597
12598
12599 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C)                                       \
12600      ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),           \
12601                                                   (__v2df)(__m128d)(Y),           \
12602                                                   (__v2di)(__m128i)(Z), (int)(C), \
12603                                                   (__mmask8)(U)))
12604
12605 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C)                                      \
12606      ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X),          \
12607                                                    (__v2df)(__m128d)(Y),          \
12608                                                    (__v2di)(__m128i)(Z), (int)(C),\
12609                                                    (__mmask8)(U)))
12610
12611 #define _mm_fixupimm_ps(X, Y, Z, C)                                             \
12612    ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),             \
12613                                                (__v4sf)(__m128)(Y),             \
12614                                                (__v4si)(__m128i)(Z), (int)(C),  \
12615                                                (__mmask8)(-1)))
12616
12617 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C)                                      \
12618       ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),           \
12619                                                   (__v4sf)(__m128)(Y),           \
12620                                                   (__v4si)(__m128i)(Z), (int)(C),\
12621                                                   (__mmask8)(U)))
12622
12623 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C)                                      \
12624       ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X),           \
12625                                                    (__v4sf)(__m128)(Y),           \
12626                                                    (__v4si)(__m128i)(Z), (int)(C),\
12627                                                    (__mmask8)(U)))
12628
12629 #define _mm256_mask_srli_epi32(W, U, A, B)                              \
12630   ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),       \
12631     (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12632
12633 #define _mm256_maskz_srli_epi32(U, A, B)                                \
12634   ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),       \
12635     (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12636
12637 #define _mm_mask_srli_epi32(W, U, A, B)                                 \
12638   ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12639     (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12640
12641 #define _mm_maskz_srli_epi32(U, A, B)                                   \
12642   ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12643     (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12644
12645 #define _mm256_mask_srli_epi64(W, U, A, B)                              \
12646   ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),       \
12647     (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12648
12649 #define _mm256_maskz_srli_epi64(U, A, B)                                \
12650   ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),       \
12651     (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12652
12653 #define _mm_mask_srli_epi64(W, U, A, B)                                 \
12654   ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12655     (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12656
12657 #define _mm_maskz_srli_epi64(U, A, B)                                   \
12658   ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12659     (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12660
12661 #define _mm256_mask_slli_epi32(W, U, X, C)                                \
12662   ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12663     (__v8si)(__m256i)(W),\
12664     (__mmask8)(U)))
12665
12666 #define _mm256_maskz_slli_epi32(U, X, C)                                  \
12667   ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12668     (__v8si)(__m256i)_mm256_setzero_si256(),\
12669     (__mmask8)(U)))
12670
12671 #define _mm256_mask_slli_epi64(W, U, X, C)                                \
12672   ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12673     (__v4di)(__m256i)(W),\
12674     (__mmask8)(U)))
12675
12676 #define _mm256_maskz_slli_epi64(U, X, C)                                  \
12677   ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12678     (__v4di)(__m256i)_mm256_setzero_si256 (),\
12679     (__mmask8)(U)))
12680
12681 #define _mm_mask_slli_epi32(W, U, X, C)                                   \
12682   ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12683     (__v4si)(__m128i)(W),\
12684     (__mmask8)(U)))
12685
12686 #define _mm_maskz_slli_epi32(U, X, C)                                     \
12687   ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12688     (__v4si)(__m128i)_mm_setzero_si128 (),\
12689     (__mmask8)(U)))
12690
12691 #define _mm_mask_slli_epi64(W, U, X, C)                                   \
12692   ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12693     (__v2di)(__m128i)(W),\
12694     (__mmask8)(U)))
12695
12696 #define _mm_maskz_slli_epi64(U, X, C)                                     \
12697   ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12698     (__v2di)(__m128i)_mm_setzero_di(),\
12699     (__mmask8)(U)))
12700
12701 #define _mm256_ternarylogic_epi64(A, B, C, I)                           \
12702   ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),    \
12703     (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12704
12705 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I)                   \
12706   ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),    \
12707     (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12708
12709 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I)                  \
12710   ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A),   \
12711     (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12712
12713 #define _mm256_ternarylogic_epi32(A, B, C, I)                           \
12714   ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),    \
12715     (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12716
12717 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I)                   \
12718   ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),    \
12719     (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12720
12721 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I)                  \
12722   ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A),   \
12723     (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12724
12725 #define _mm_ternarylogic_epi64(A, B, C, I)                              \
12726   ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),    \
12727     (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12728
12729 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I)                      \
12730   ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),    \
12731     (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12732
12733 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I)                     \
12734   ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A),   \
12735     (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12736
12737 #define _mm_ternarylogic_epi32(A, B, C, I)                              \
12738   ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),    \
12739     (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12740
12741 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I)                      \
12742   ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),    \
12743     (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12744
12745 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I)                     \
12746   ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A),   \
12747     (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12748
12749 #define _mm256_roundscale_ps(A, B)                                      \
12750   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12751     (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12752
12753 #define _mm256_mask_roundscale_ps(W, U, A, B)                           \
12754   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12755     (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12756
12757 #define _mm256_maskz_roundscale_ps(U, A, B)                             \
12758   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12759     (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12760
12761 #define _mm256_roundscale_pd(A, B)                                      \
12762   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12763     (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12764
12765 #define _mm256_mask_roundscale_pd(W, U, A, B)                           \
12766   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12767     (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12768
12769 #define _mm256_maskz_roundscale_pd(U, A, B)                             \
12770   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12771     (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12772
12773 #define _mm_roundscale_ps(A, B)                                         \
12774   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12775     (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12776
12777 #define _mm_mask_roundscale_ps(W, U, A, B)                              \
12778   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12779     (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12780
12781 #define _mm_maskz_roundscale_ps(U, A, B)                                \
12782   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12783     (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12784
12785 #define _mm_roundscale_pd(A, B)                                         \
12786   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12787     (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12788
12789 #define _mm_mask_roundscale_pd(W, U, A, B)                              \
12790   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12791     (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12792
12793 #define _mm_maskz_roundscale_pd(U, A, B)                                \
12794   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12795     (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12796
12797 #define _mm256_getmant_ps(X, B, C)                                              \
12798   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12799                                          (int)(((C)<<2) | (B)),                 \
12800                                          (__v8sf)(__m256)_mm256_setzero_ps(),   \
12801                                          (__mmask8)-1))
12802
12803 #define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
12804   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12805                                          (int)(((C)<<2) | (B)),                 \
12806                                          (__v8sf)(__m256)(W),                   \
12807                                          (__mmask8)(U)))
12808
12809 #define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
12810   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12811                                          (int)(((C)<<2) | (B)),                 \
12812                                          (__v8sf)(__m256)_mm256_setzero_ps(),   \
12813                                          (__mmask8)(U)))
12814
12815 #define _mm_getmant_ps(X, B, C)                                                 \
12816   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12817                                          (int)(((C)<<2) | (B)),                 \
12818                                          (__v4sf)(__m128)_mm_setzero_ps(),      \
12819                                          (__mmask8)-1))
12820
12821 #define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
12822   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12823                                          (int)(((C)<<2) | (B)),                 \
12824                                          (__v4sf)(__m128)(W),                   \
12825                                          (__mmask8)(U)))
12826
12827 #define _mm_maskz_getmant_ps(U, X, B, C)                                        \
12828   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12829                                          (int)(((C)<<2) | (B)),                 \
12830                                          (__v4sf)(__m128)_mm_setzero_ps(),      \
12831                                          (__mmask8)(U)))
12832
12833 #define _mm256_getmant_pd(X, B, C)                                              \
12834   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12835                                          (int)(((C)<<2) | (B)),                 \
12836                                           (__v4df)(__m256d)_mm256_setzero_pd(), \
12837                                           (__mmask8)-1))
12838
12839 #define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
12840   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12841                                          (int)(((C)<<2) | (B)),                 \
12842                                           (__v4df)(__m256d)(W),                 \
12843                                           (__mmask8)(U)))
12844
12845 #define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
12846   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12847                                          (int)(((C)<<2) | (B)),                 \
12848                                           (__v4df)(__m256d)_mm256_setzero_pd(), \
12849                                           (__mmask8)(U)))
12850
12851 #define _mm_getmant_pd(X, B, C)                                                 \
12852   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12853                                          (int)(((C)<<2) | (B)),                 \
12854                                           (__v2df)(__m128d)_mm_setzero_pd(),    \
12855                                           (__mmask8)-1))
12856
12857 #define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
12858   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12859                                          (int)(((C)<<2) | (B)),                 \
12860                                           (__v2df)(__m128d)(W),                 \
12861                                           (__mmask8)(U)))
12862
12863 #define _mm_maskz_getmant_pd(U, X, B, C)                                        \
12864   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12865                                          (int)(((C)<<2) | (B)),                 \
12866                                           (__v2df)(__m128d)_mm_setzero_pd(),    \
12867                                           (__mmask8)(U)))
12868
12869 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12870   (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD,         \
12871                                          (float const *)ADDR,           \
12872                                          (__v8si)(__m256i)INDEX,        \
12873                                          (__mmask8)MASK, (int)SCALE)
12874
12875 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)         \
12876   (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD,         \
12877                                          (float const *)ADDR,           \
12878                                          (__v4si)(__m128i)INDEX,        \
12879                                          (__mmask8)MASK, (int)SCALE)
12880
12881 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12882   (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD,       \
12883                                           (double const *)ADDR,         \
12884                                           (__v4si)(__m128i)INDEX,       \
12885                                           (__mmask8)MASK, (int)SCALE)
12886
12887 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)         \
12888   (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD,       \
12889                                           (double const *)ADDR,         \
12890                                           (__v4si)(__m128i)INDEX,       \
12891                                           (__mmask8)MASK, (int)SCALE)
12892
12893 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12894   (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD,         \
12895                                          (float const *)ADDR,           \
12896                                          (__v4di)(__m256i)INDEX,        \
12897                                          (__mmask8)MASK, (int)SCALE)
12898
12899 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)         \
12900   (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD,         \
12901                                          (float const *)ADDR,           \
12902                                          (__v2di)(__m128i)INDEX,        \
12903                                          (__mmask8)MASK, (int)SCALE)
12904
12905 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12906   (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD,       \
12907                                           (double const *)ADDR,         \
12908                                           (__v4di)(__m256i)INDEX,       \
12909                                           (__mmask8)MASK, (int)SCALE)
12910
12911 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)         \
12912   (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD,       \
12913                                           (double const *)ADDR,         \
12914                                           (__v2di)(__m128i)INDEX,       \
12915                                           (__mmask8)MASK, (int)SCALE)
12916
12917 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)   \
12918   (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD,       \
12919                                           (int const *)ADDR,            \
12920                                           (__v8si)(__m256i)INDEX,       \
12921                                           (__mmask8)MASK, (int)SCALE)
12922
12923 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12924   (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD,       \
12925                                           (int const *)ADDR,            \
12926                                           (__v4si)(__m128i)INDEX,       \
12927                                           (__mmask8)MASK, (int)SCALE)
12928
12929 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)   \
12930   (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD,       \
12931                                           (long long const *)ADDR,      \
12932                                           (__v4si)(__m128i)INDEX,       \
12933                                           (__mmask8)MASK, (int)SCALE)
12934
12935 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12936   (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD,       \
12937                                           (long long const *)ADDR,      \
12938                                           (__v4si)(__m128i)INDEX,       \
12939                                           (__mmask8)MASK, (int)SCALE)
12940
12941 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)   \
12942   (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD,       \
12943                                           (int const *)ADDR,            \
12944                                           (__v4di)(__m256i)INDEX,       \
12945                                           (__mmask8)MASK, (int)SCALE)
12946
12947 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12948   (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD,       \
12949                                           (int const *)ADDR,            \
12950                                           (__v2di)(__m128i)INDEX,       \
12951                                           (__mmask8)MASK, (int)SCALE)
12952
12953 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)   \
12954   (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD,       \
12955                                           (long long const *)ADDR,      \
12956                                           (__v4di)(__m256i)INDEX,       \
12957                                           (__mmask8)MASK, (int)SCALE)
12958
12959 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)      \
12960   (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD,       \
12961                                           (long long const *)ADDR,      \
12962                                           (__v2di)(__m128i)INDEX,       \
12963                                           (__mmask8)MASK, (int)SCALE)
12964
12965 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)                    \
12966   __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF,          \
12967                                 (__v8si)(__m256i)INDEX,                 \
12968                                 (__v8sf)(__m256)V1, (int)SCALE)
12969
12970 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
12971   __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK,          \
12972                                 (__v8si)(__m256i)INDEX,                 \
12973                                 (__v8sf)(__m256)V1, (int)SCALE)
12974
12975 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)                       \
12976   __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF,          \
12977                                 (__v4si)(__m128i)INDEX,                 \
12978                                 (__v4sf)(__m128)V1, (int)SCALE)
12979
12980 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)            \
12981   __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK,          \
12982                                 (__v4si)(__m128i)INDEX,                 \
12983                                 (__v4sf)(__m128)V1, (int)SCALE)
12984
12985 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)                    \
12986   __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF,         \
12987                                 (__v4si)(__m128i)INDEX,                 \
12988                                 (__v4df)(__m256d)V1, (int)SCALE)
12989
12990 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
12991   __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK,         \
12992                                 (__v4si)(__m128i)INDEX,                 \
12993                                 (__v4df)(__m256d)V1, (int)SCALE)
12994
12995 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)                       \
12996   __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF,         \
12997                                 (__v4si)(__m128i)INDEX,                 \
12998                                 (__v2df)(__m128d)V1, (int)SCALE)
12999
13000 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)            \
13001   __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK,         \
13002                                 (__v4si)(__m128i)INDEX,                 \
13003                                 (__v2df)(__m128d)V1, (int)SCALE)
13004
13005 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)                    \
13006   __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF,          \
13007                                 (__v4di)(__m256i)INDEX,                 \
13008                                 (__v4sf)(__m128)V1, (int)SCALE)
13009
13010 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
13011   __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK,          \
13012                                 (__v4di)(__m256i)INDEX,                 \
13013                                 (__v4sf)(__m128)V1, (int)SCALE)
13014
13015 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)                       \
13016   __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF,          \
13017                                 (__v2di)(__m128i)INDEX,                 \
13018                                 (__v4sf)(__m128)V1, (int)SCALE)
13019
13020 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)            \
13021   __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK,          \
13022                                 (__v2di)(__m128i)INDEX,                 \
13023                                 (__v4sf)(__m128)V1, (int)SCALE)
13024
13025 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)                    \
13026   __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF,         \
13027                                 (__v4di)(__m256i)INDEX,                 \
13028                                 (__v4df)(__m256d)V1, (int)SCALE)
13029
13030 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
13031   __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK,         \
13032                                 (__v4di)(__m256i)INDEX,                 \
13033                                 (__v4df)(__m256d)V1, (int)SCALE)
13034
13035 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)                       \
13036   __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF,         \
13037                                 (__v2di)(__m128i)INDEX,                 \
13038                                 (__v2df)(__m128d)V1, (int)SCALE)
13039
13040 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)            \
13041   __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK,         \
13042                                 (__v2di)(__m128i)INDEX,                 \
13043                                 (__v2df)(__m128d)V1, (int)SCALE)
13044
13045 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
13046   __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF,            \
13047                                 (__v8si)(__m256i)INDEX,                 \
13048                                 (__v8si)(__m256i)V1, (int)SCALE)
13049
13050 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
13051   __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK,            \
13052                                 (__v8si)(__m256i)INDEX,                 \
13053                                 (__v8si)(__m256i)V1, (int)SCALE)
13054
13055 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                    \
13056   __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF,            \
13057                                 (__v4si)(__m128i)INDEX,                 \
13058                                 (__v4si)(__m128i)V1, (int)SCALE)
13059
13060 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)         \
13061   __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK,            \
13062                                 (__v4si)(__m128i)INDEX,                 \
13063                                 (__v4si)(__m128i)V1, (int)SCALE)
13064
13065 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
13066   __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF,      \
13067                                 (__v4si)(__m128i)INDEX,                 \
13068                                 (__v4di)(__m256i)V1, (int)SCALE)
13069
13070 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
13071   __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK,      \
13072                                 (__v4si)(__m128i)INDEX,                 \
13073                                 (__v4di)(__m256i)V1, (int)SCALE)
13074
13075 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                    \
13076   __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF,      \
13077                                 (__v4si)(__m128i)INDEX,                 \
13078                                 (__v2di)(__m128i)V1, (int)SCALE)
13079
13080 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)         \
13081   __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK,      \
13082                                 (__v4si)(__m128i)INDEX,                 \
13083                                 (__v2di)(__m128i)V1, (int)SCALE)
13084
13085 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
13086   __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF,            \
13087                                 (__v4di)(__m256i)INDEX,                 \
13088                                 (__v4si)(__m128i)V1, (int)SCALE)
13089
13090 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
13091   __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK,            \
13092                                 (__v4di)(__m256i)INDEX,                 \
13093                                 (__v4si)(__m128i)V1, (int)SCALE)
13094
13095 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                    \
13096   __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF,            \
13097                                 (__v2di)(__m128i)INDEX,                 \
13098                                 (__v4si)(__m128i)V1, (int)SCALE)
13099
13100 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)         \
13101   __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK,            \
13102                                 (__v2di)(__m128i)INDEX,                 \
13103                                 (__v4si)(__m128i)V1, (int)SCALE)
13104
13105 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
13106   __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF,      \
13107                                 (__v4di)(__m256i)INDEX,                 \
13108                                 (__v4di)(__m256i)V1, (int)SCALE)
13109
13110 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
13111   __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK,      \
13112                                 (__v4di)(__m256i)INDEX,                 \
13113                                 (__v4di)(__m256i)V1, (int)SCALE)
13114
13115 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                    \
13116   __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF,      \
13117                                 (__v2di)(__m128i)INDEX,                 \
13118                                 (__v2di)(__m128i)V1, (int)SCALE)
13119
13120 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)         \
13121   __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK,      \
13122                                 (__v2di)(__m128i)INDEX,                 \
13123                                 (__v2di)(__m128i)V1, (int)SCALE)
13124
13125 #define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
13126   ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13127                                              (__v8si)(__m256i)(W),                  \
13128                                              (__mmask8)(U)))
13129
13130 #define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
13131   ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13132                                              (__v8si)(__m256i)_mm256_setzero_si256(),  \
13133                                              (__mmask8)(U)))
13134
13135 #define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
13136   ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13137                                              (__v4si)(__m128i)(W),                  \
13138                                              (__mmask8)(U)))
13139
13140 #define _mm_maskz_shuffle_epi32(U, X, C)                                            \
13141   ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13142                                              (__v4si)(__m128i)_mm_setzero_si128 (),     \
13143                                              (__mmask8)(U)))
13144
13145 #define _mm256_rol_epi64(A, B)                                                 \
13146   ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13147                                           (__v4di)(__m256i)_mm256_setzero_si256 (),\
13148                                           (__mmask8)-1))
13149
13150 #define _mm256_mask_rol_epi64(W, U, A, B)                                      \
13151   ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13152                                           (__v4di)(__m256i)(W),                \
13153                                           (__mmask8)(U)))
13154
13155 #define _mm256_maskz_rol_epi64(U, A, B)                                        \
13156   ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13157                                           (__v4di)(__m256i)_mm256_setzero_si256 (),\
13158                                           (__mmask8)(U)))
13159
13160 #define _mm_rol_epi64(A, B)                                                    \
13161   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13162                                           (__v2di)(__m128i)_mm_setzero_di(),   \
13163                                           (__mmask8)-1))
13164
13165 #define _mm_mask_rol_epi64(W, U, A, B)                                         \
13166   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13167                                           (__v2di)(__m128i)(W),                \
13168                                           (__mmask8)(U)))
13169
13170 #define _mm_maskz_rol_epi64(U, A, B)                                           \
13171   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13172                                           (__v2di)(__m128i)_mm_setzero_di(),   \
13173                                           (__mmask8)(U)))
13174
13175 #define _mm256_ror_epi64(A, B)                                                 \
13176   ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13177                                           (__v4di)(__m256i)_mm256_setzero_si256 (),\
13178                                           (__mmask8)-1))
13179
13180 #define _mm256_mask_ror_epi64(W, U, A, B)                                      \
13181   ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13182                                           (__v4di)(__m256i)(W),                \
13183                                           (__mmask8)(U)))
13184
13185 #define _mm256_maskz_ror_epi64(U, A, B)                                        \
13186   ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13187                                           (__v4di)(__m256i)_mm256_setzero_si256 (),\
13188                                           (__mmask8)(U)))
13189
13190 #define _mm_ror_epi64(A, B)                                                    \
13191   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13192                                           (__v2di)(__m128i)_mm_setzero_di(),   \
13193                                           (__mmask8)-1))
13194
13195 #define _mm_mask_ror_epi64(W, U, A, B)                                         \
13196   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13197                                           (__v2di)(__m128i)(W),                \
13198                                           (__mmask8)(U)))
13199
13200 #define _mm_maskz_ror_epi64(U, A, B)                                           \
13201   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13202                                           (__v2di)(__m128i)_mm_setzero_di(),   \
13203                                           (__mmask8)(U)))
13204
13205 #define _mm256_rol_epi32(A, B)                                                 \
13206   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13207                                           (__v8si)(__m256i)_mm256_setzero_si256(),\
13208                                           (__mmask8)-1))
13209
13210 #define _mm256_mask_rol_epi32(W, U, A, B)                                      \
13211   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13212                                           (__v8si)(__m256i)(W),                \
13213                                           (__mmask8)(U)))
13214
13215 #define _mm256_maskz_rol_epi32(U, A, B)                                        \
13216   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13217                                           (__v8si)(__m256i)_mm256_setzero_si256(),\
13218                                           (__mmask8)(U)))
13219
13220 #define _mm_rol_epi32(A, B)                                                    \
13221   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13222                                           (__v4si)(__m128i)_mm_setzero_si128 (),   \
13223                                           (__mmask8)-1))
13224
13225 #define _mm_mask_rol_epi32(W, U, A, B)                                         \
13226   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13227                                           (__v4si)(__m128i)(W),                \
13228                                           (__mmask8)(U)))
13229
13230 #define _mm_maskz_rol_epi32(U, A, B)                                           \
13231   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13232                                           (__v4si)(__m128i)_mm_setzero_si128 (),   \
13233                                           (__mmask8)(U)))
13234
13235 #define _mm256_ror_epi32(A, B)                                                 \
13236   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13237                                           (__v8si)(__m256i)_mm256_setzero_si256(),\
13238                                           (__mmask8)-1))
13239
13240 #define _mm256_mask_ror_epi32(W, U, A, B)                                      \
13241   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13242                                           (__v8si)(__m256i)(W),                \
13243                                           (__mmask8)(U)))
13244
13245 #define _mm256_maskz_ror_epi32(U, A, B)                                        \
13246   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13247                                           (__v8si)(__m256i)_mm256_setzero_si256(),\
13248                                           (__mmask8)(U)))
13249
13250 #define _mm_ror_epi32(A, B)                                                    \
13251   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13252                                           (__v4si)(__m128i)_mm_setzero_si128 (),   \
13253                                           (__mmask8)-1))
13254
13255 #define _mm_mask_ror_epi32(W, U, A, B)                                         \
13256   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13257                                           (__v4si)(__m128i)(W),                \
13258                                           (__mmask8)(U)))
13259
13260 #define _mm_maskz_ror_epi32(U, A, B)                                           \
13261   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13262                                           (__v4si)(__m128i)_mm_setzero_si128 (),   \
13263                                           (__mmask8)(U)))
13264
13265 #define _mm256_alignr_epi32(X, Y, C)                                        \
13266     ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13267         (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13268
13269 #define _mm256_mask_alignr_epi32(W, U, X, Y, C)                             \
13270     ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13271         (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13272
13273 #define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
13274     ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13275         (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13276         (__mmask8)(U)))
13277
13278 #define _mm256_alignr_epi64(X, Y, C)                                        \
13279     ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13280         (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13281
13282 #define _mm256_mask_alignr_epi64(W, U, X, Y, C)                             \
13283     ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13284         (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13285
13286 #define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
13287     ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13288         (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13289         (__mmask8)(U)))
13290
13291 #define _mm_alignr_epi32(X, Y, C)                                           \
13292     ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13293         (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13294
13295 #define _mm_mask_alignr_epi32(W, U, X, Y, C)                                \
13296     ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13297         (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13298
13299 #define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
13300     ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13301         (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
13302         (__mmask8)(U)))
13303
13304 #define _mm_alignr_epi64(X, Y, C)                                           \
13305     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13306         (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13307
13308 #define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
13309     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13310         (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13311
13312 #define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
13313     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13314         (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
13315         (__mmask8)(U)))
13316
13317 #define _mm_mask_cvtps_ph(W, U, A, I)                                           \
13318   ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
13319       (__v8hi)(__m128i) (W), (__mmask8) (U)))
13320
13321 #define _mm_maskz_cvtps_ph(U, A, I)                                             \
13322   ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
13323       (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13324
13325 #define _mm256_mask_cvtps_ph(W, U, A, I)                                        \
13326   ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),   \
13327       (__v8hi)(__m128i) (W), (__mmask8) (U)))
13328
13329 #define _mm256_maskz_cvtps_ph(U, A, I)                                          \
13330   ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),   \
13331       (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13332
13333 #define _mm256_mask_srai_epi32(W, U, A, B)                              \
13334   ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),       \
13335     (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13336
13337 #define _mm256_maskz_srai_epi32(U, A, B)                                \
13338   ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),       \
13339     (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
13340
13341 #define _mm_mask_srai_epi32(W, U, A, B)                                 \
13342   ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13343     (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13344
13345 #define _mm_maskz_srai_epi32(U, A, B)                                   \
13346   ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13347     (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
13348
13349 #define _mm256_srai_epi64(A, B)                                         \
13350   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),       \
13351     (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13352
13353 #define _mm256_mask_srai_epi64(W, U, A, B)                              \
13354   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),       \
13355     (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13356
13357 #define _mm256_maskz_srai_epi64(U, A, B)                                \
13358   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),       \
13359     (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13360
13361 #define _mm_srai_epi64(A, B)                                            \
13362   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13363     (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
13364
13365 #define _mm_mask_srai_epi64(W, U, A, B)                                 \
13366   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13367     (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13368
13369 #define _mm_maskz_srai_epi64(U, A, B)                                   \
13370   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13371     (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
13372
13373 #define _mm256_mask_permutex_pd(W, U, A, B)                             \
13374   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13375     (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13376
13377 #define _mm256_maskz_permutex_pd(U, A, B)                               \
13378   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13379     (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
13380
13381 #define _mm256_mask_permute_pd(W, U, X, C)                                          \
13382   ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),      \
13383                                               (__v4df)(__m256d)(W),                 \
13384                                               (__mmask8)(U)))
13385
13386 #define _mm256_maskz_permute_pd(U, X, C)                                            \
13387   ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),      \
13388                                               (__v4df)(__m256d)_mm256_setzero_pd(), \
13389                                               (__mmask8)(U)))
13390
13391 #define _mm256_mask_permute_ps(W, U, X, C)                                          \
13392   ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),        \
13393                                               (__v8sf)(__m256)(W), (__mmask8)(U)))
13394
13395 #define _mm256_maskz_permute_ps(U, X, C)                                            \
13396   ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),        \
13397                                               (__v8sf)(__m256)_mm256_setzero_ps(),  \
13398                                               (__mmask8)(U)))
13399
13400 #define _mm_mask_permute_pd(W, U, X, C)                                             \
13401   ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),         \
13402                                             (__v2df)(__m128d)(W), (__mmask8)(U)))
13403
13404 #define _mm_maskz_permute_pd(U, X, C)                                               \
13405   ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),         \
13406                                             (__v2df)(__m128d)_mm_setzero_pd(),      \
13407                                             (__mmask8)(U)))
13408
13409 #define _mm_mask_permute_ps(W, U, X, C)                                             \
13410   ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),           \
13411                                           (__v4sf)(__m128)(W), (__mmask8)(U)))
13412
13413 #define _mm_maskz_permute_ps(U, X, C)                                               \
13414   ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),           \
13415                                           (__v4sf)(__m128)_mm_setzero_ps(),         \
13416                                           (__mmask8)(U)))
13417
13418 #define _mm256_mask_blend_pd(__U, __A, __W)                           \
13419   ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A),        \
13420                                                      (__v4df) (__W),  \
13421                                                      (__mmask8) (__U)))
13422
13423 #define _mm256_mask_blend_ps(__U, __A, __W)                           \
13424   ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A),         \
13425                                                     (__v8sf) (__W),   \
13426                                                     (__mmask8) (__U)))
13427
13428 #define _mm256_mask_blend_epi64(__U, __A, __W)                        \
13429   ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A),         \
13430                                                     (__v4di) (__W),   \
13431                                                     (__mmask8) (__U)))
13432
13433 #define _mm256_mask_blend_epi32(__U, __A, __W)                        \
13434   ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A),         \
13435                                                     (__v8si) (__W),   \
13436                                                     (__mmask8) (__U)))
13437
13438 #define _mm_mask_blend_pd(__U, __A, __W)                              \
13439   ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A),        \
13440                                                      (__v2df) (__W),  \
13441                                                      (__mmask8) (__U)))
13442
13443 #define _mm_mask_blend_ps(__U, __A, __W)                              \
13444   ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A),         \
13445                                                     (__v4sf) (__W),   \
13446                                                     (__mmask8) (__U)))
13447
13448 #define _mm_mask_blend_epi64(__U, __A, __W)                           \
13449   ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A),         \
13450                                                     (__v2di) (__W),   \
13451                                                     (__mmask8) (__U)))
13452
13453 #define _mm_mask_blend_epi32(__U, __A, __W)                           \
13454   ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A),         \
13455                                                     (__v4si) (__W),   \
13456                                                     (__mmask8) (__U)))
13457
13458 #define _mm256_cmp_epu32_mask(X, Y, P)                                  \
13459   ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),       \
13460                                             (__v8si)(__m256i)(Y), (int)(P),\
13461                                             (__mmask8)-1))
13462
13463 #define _mm256_cmp_epi64_mask(X, Y, P)                                  \
13464   ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),        \
13465                                            (__v4di)(__m256i)(Y), (int)(P),\
13466                                            (__mmask8)-1))
13467
13468 #define _mm256_cmp_epi32_mask(X, Y, P)                                  \
13469   ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),        \
13470                                            (__v8si)(__m256i)(Y), (int)(P),\
13471                                            (__mmask8)-1))
13472
13473 #define _mm256_cmp_epu64_mask(X, Y, P)                                  \
13474   ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),       \
13475                                             (__v4di)(__m256i)(Y), (int)(P),\
13476                                             (__mmask8)-1))
13477
13478 #define _mm256_cmp_pd_mask(X, Y, P)                                     \
13479   ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),       \
13480                                             (__v4df)(__m256d)(Y), (int)(P),\
13481                                             (__mmask8)-1))
13482
13483 #define _mm256_cmp_ps_mask(X, Y, P)                                     \
13484   ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),        \
13485                                              (__v8sf)(__m256)(Y), (int)(P),\
13486                                              (__mmask8)-1))
13487
13488 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P)                          \
13489   ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),        \
13490                                            (__v4di)(__m256i)(Y), (int)(P),\
13491                                            (__mmask8)(M)))
13492
13493 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P)                          \
13494   ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),        \
13495                                            (__v8si)(__m256i)(Y), (int)(P),\
13496                                            (__mmask8)(M)))
13497
13498 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P)                          \
13499   ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),       \
13500                                             (__v4di)(__m256i)(Y), (int)(P),\
13501                                             (__mmask8)(M)))
13502
13503 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P)                          \
13504   ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),       \
13505                                             (__v8si)(__m256i)(Y), (int)(P),\
13506                                             (__mmask8)(M)))
13507
13508 #define _mm256_mask_cmp_pd_mask(M, X, Y, P)                             \
13509   ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),       \
13510                                             (__v4df)(__m256d)(Y), (int)(P),\
13511                                             (__mmask8)(M)))
13512
13513 #define _mm256_mask_cmp_ps_mask(M, X, Y, P)                             \
13514   ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),        \
13515                                              (__v8sf)(__m256)(Y), (int)(P),\
13516                                              (__mmask8)(M)))
13517
13518 #define _mm_cmp_epi64_mask(X, Y, P)                                     \
13519   ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),        \
13520                                            (__v2di)(__m128i)(Y), (int)(P),\
13521                                            (__mmask8)-1))
13522
13523 #define _mm_cmp_epi32_mask(X, Y, P)                                     \
13524   ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),        \
13525                                            (__v4si)(__m128i)(Y), (int)(P),\
13526                                            (__mmask8)-1))
13527
13528 #define _mm_cmp_epu64_mask(X, Y, P)                                     \
13529   ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),       \
13530                                             (__v2di)(__m128i)(Y), (int)(P),\
13531                                             (__mmask8)-1))
13532
13533 #define _mm_cmp_epu32_mask(X, Y, P)                                     \
13534   ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),       \
13535                                             (__v4si)(__m128i)(Y), (int)(P),\
13536                                             (__mmask8)-1))
13537
13538 #define _mm_cmp_pd_mask(X, Y, P)                                        \
13539   ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),       \
13540                                             (__v2df)(__m128d)(Y), (int)(P),\
13541                                             (__mmask8)-1))
13542
13543 #define _mm_cmp_ps_mask(X, Y, P)                                        \
13544   ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),        \
13545                                              (__v4sf)(__m128)(Y), (int)(P),\
13546                                              (__mmask8)-1))
13547
13548 #define _mm_mask_cmp_epi64_mask(M, X, Y, P)                             \
13549   ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),        \
13550                                            (__v2di)(__m128i)(Y), (int)(P),\
13551                                            (__mmask8)(M)))
13552
13553 #define _mm_mask_cmp_epi32_mask(M, X, Y, P)                             \
13554   ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),        \
13555                                            (__v4si)(__m128i)(Y), (int)(P),\
13556                                            (__mmask8)(M)))
13557
13558 #define _mm_mask_cmp_epu64_mask(M, X, Y, P)                             \
13559   ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),       \
13560                                             (__v2di)(__m128i)(Y), (int)(P),\
13561                                             (__mmask8)(M)))
13562
13563 #define _mm_mask_cmp_epu32_mask(M, X, Y, P)                             \
13564   ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),       \
13565                                             (__v4si)(__m128i)(Y), (int)(P),\
13566                                             (__mmask8)(M)))
13567
13568 #define _mm_mask_cmp_pd_mask(M, X, Y, P)                                \
13569   ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),       \
13570                                             (__v2df)(__m128d)(Y), (int)(P),\
13571                                             (__mmask8)(M)))
13572
13573 #define _mm_mask_cmp_ps_mask(M, X, Y, P)                                \
13574   ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),        \
13575                                              (__v4sf)(__m128)(Y), (int)(P),\
13576                                              (__mmask8)(M)))
13577
13578 #endif
13579
13580 #define _mm256_permutexvar_ps(A, B)     _mm256_permutevar8x32_ps((B), (A))
13581
13582 #ifdef __DISABLE_AVX512VL__
13583 #undef __DISABLE_AVX512VL__
13584 #pragma GCC pop_options
13585 #endif /* __DISABLE_AVX512VL__ */
13586
13587 #endif /* _AVX512VLINTRIN_H_INCLUDED */