1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
39 #include "stor-layout.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
54 #include "statistics.h"
56 #include "fixed-value.h"
64 #include "diagnostic-core.h"
67 #include "dominance.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
83 #include "plugin-api.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
91 #include "gimple-expr.h"
97 #include "tm-constrs.h"
101 #include "sched-int.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
819 100, /* number of parallel prefetches */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
901 MOVD reg64, xmmreg Double FADD 3
903 MOVD reg32, xmmreg Double FADD 3
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
913 100, /* number of parallel prefetches */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
996 MOVD reg64, xmmreg Double FADD 3
998 MOVD reg32, xmmreg Double FADD 3
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 2, /* cond_taken_branch_cost. */
1029 1, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1092 MOVD reg64, xmmreg Double FADD 3
1094 MOVD reg32, xmmreg Double FADD 3
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 2, /* cond_taken_branch_cost. */
1125 1, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 2, /* cond_taken_branch_cost. */
1212 1, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 2, /* cond_taken_branch_cost. */
1298 1, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1359 MOVD reg64, xmmreg Double FADD 3
1361 MOVD reg32, xmmreg Double FADD 3
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1445 MOVD reg64, xmmreg Double FADD 3
1447 MOVD reg32, xmmreg Double FADD 3
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2324 <- sse_regs_save_offset
2327 [va_arg registers] |
2331 [padding2] | = to_allocate
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2539 rest_of_handle_insert_vzeroupper (void)
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2718 char target_other[40];
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2799 opts[num++][1] = "387";
2803 opts[num++][1] = "sse";
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2827 for (j = 0; j < 2; j++)
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2836 for (i = 0; i < num; i++)
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2856 for (j = 0; j < 2; j++)
2859 memcpy (ptr, opts[i][j], len2[j]);
2861 line_len += len2[j];
2866 gcc_assert (ret + len >= ptr);
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2891 fprintf (stderr, "%s\n\n", opts);
2895 fputs ("<no options>\n\n", stderr);
2900 static const char *stringop_alg_names[] = {
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2960 next_range_str = strchr (curr_range_str, ',');
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2985 error ("wrong stringop strategy name %s specified for option %s",
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2991 input_ranges[n].max = maxs;
2992 input_ranges[n].alg = (stringop_alg) i;
2993 if (!strcmp (align, "align"))
2994 input_ranges[n].noalign = false;
2995 else if (!strcmp (align, "noalign"))
2996 input_ranges[n].noalign = true;
2999 error ("unknown alignment %s specified for option %s",
3000 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3004 curr_range_str = next_range_str;
3006 while (curr_range_str);
3008 if (input_ranges[n - 1].max != -1)
3010 error ("the max value for the last size range should be -1"
3012 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3016 if (n > MAX_STRINGOP_ALGS)
3018 error ("too many size ranges specified in option %s",
3019 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3023 /* Now override the default algs array. */
3024 for (i = 0; i < n; i++)
3026 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3027 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3028 = input_ranges[i].alg;
3029 *const_cast<int *>(&default_algs->size[i].noalign)
3030 = input_ranges[i].noalign;
3035 /* parse -mtune-ctrl= option. When DUMP is true,
3036 print the features that are explicitly set. */
3039 parse_mtune_ctrl_str (bool dump)
3041 if (!ix86_tune_ctrl_string)
3044 char *next_feature_string = NULL;
3045 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3046 char *orig = curr_feature_string;
3052 next_feature_string = strchr (curr_feature_string, ',');
3053 if (next_feature_string)
3054 *next_feature_string++ = '\0';
3055 if (*curr_feature_string == '^')
3057 curr_feature_string++;
3060 for (i = 0; i < X86_TUNE_LAST; i++)
3062 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3064 ix86_tune_features[i] = !clear;
3066 fprintf (stderr, "Explicitly %s feature %s\n",
3067 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3071 if (i == X86_TUNE_LAST)
3072 error ("Unknown parameter to option -mtune-ctrl: %s",
3073 clear ? curr_feature_string - 1 : curr_feature_string);
3074 curr_feature_string = next_feature_string;
3076 while (curr_feature_string);
3080 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3084 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3086 unsigned int ix86_tune_mask = 1u << ix86_tune;
3089 for (i = 0; i < X86_TUNE_LAST; ++i)
3091 if (ix86_tune_no_default)
3092 ix86_tune_features[i] = 0;
3094 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3099 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3100 for (i = 0; i < X86_TUNE_LAST; i++)
3101 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3102 ix86_tune_features[i] ? "on" : "off");
3105 parse_mtune_ctrl_str (dump);
3109 /* Override various settings based on options. If MAIN_ARGS_P, the
3110 options are from the command line, otherwise they are from
3114 ix86_option_override_internal (bool main_args_p,
3115 struct gcc_options *opts,
3116 struct gcc_options *opts_set)
3119 unsigned int ix86_arch_mask;
3120 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3125 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3126 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3127 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3128 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3129 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3130 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3131 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3132 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3133 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3134 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3135 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3136 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3137 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3138 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3139 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3140 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3141 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3142 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3143 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3144 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3145 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3146 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3147 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3148 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3149 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3150 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3151 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3152 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3153 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3154 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3155 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3156 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3157 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3158 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3159 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3160 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3161 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3162 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3163 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3164 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3165 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3166 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3167 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3168 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3169 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3170 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3171 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3172 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3173 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3174 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3175 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3176 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3177 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3178 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3179 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3180 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3181 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3184 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3185 | PTA_CX16 | PTA_FXSR)
3186 #define PTA_NEHALEM \
3187 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3188 #define PTA_WESTMERE \
3189 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3190 #define PTA_SANDYBRIDGE \
3191 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3192 #define PTA_IVYBRIDGE \
3193 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3194 #define PTA_HASWELL \
3195 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3196 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3197 #define PTA_BROADWELL \
3198 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3200 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3201 #define PTA_BONNELL \
3202 (PTA_CORE2 | PTA_MOVBE)
3203 #define PTA_SILVERMONT \
3204 (PTA_WESTMERE | PTA_MOVBE)
3206 /* if this reaches 64, need to widen struct pta flags below */
3210 const char *const name; /* processor name or nickname. */
3211 const enum processor_type processor;
3212 const enum attr_cpu schedule;
3213 const unsigned HOST_WIDE_INT flags;
3215 const processor_alias_table[] =
3217 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3218 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3219 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3221 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3222 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3223 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3225 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3226 PTA_MMX | PTA_SSE | PTA_FXSR},
3227 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3229 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3230 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3231 PTA_MMX | PTA_SSE | PTA_FXSR},
3232 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3233 PTA_MMX | PTA_SSE | PTA_FXSR},
3234 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3235 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3236 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3237 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3238 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3239 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3240 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3241 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3242 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3245 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3246 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3248 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3249 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3251 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3253 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3255 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3257 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3259 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3260 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3262 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3264 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3265 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3266 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3267 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3268 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3269 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3271 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3272 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3273 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3274 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3275 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3276 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3278 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3280 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3281 {"x86-64", PROCESSOR_K8, CPU_K8,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3283 {"k8", PROCESSOR_K8, CPU_K8,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3285 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3286 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3288 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3289 {"opteron", PROCESSOR_K8, CPU_K8,
3290 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3291 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3292 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3293 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3294 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3295 {"athlon64", PROCESSOR_K8, CPU_K8,
3296 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3297 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3298 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3299 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3302 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3305 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3306 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3307 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3308 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3309 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3310 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3312 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3313 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3314 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3315 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3318 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3319 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3320 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3321 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3323 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3324 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3325 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3326 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3327 | PTA_XSAVEOPT | PTA_FSGSBASE},
3328 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3329 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3330 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3331 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3332 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3333 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3334 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3336 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3337 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3339 | PTA_FXSR | PTA_XSAVE},
3340 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3341 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3342 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3343 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3344 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3345 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3347 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3349 | PTA_HLE /* flags are only used for -march switch. */ },
3352 /* -mrecip options. */
3355 const char *string; /* option name */
3356 unsigned int mask; /* mask bits to set */
3358 const recip_options[] =
3360 { "all", RECIP_MASK_ALL },
3361 { "none", RECIP_MASK_NONE },
3362 { "div", RECIP_MASK_DIV },
3363 { "sqrt", RECIP_MASK_SQRT },
3364 { "vec-div", RECIP_MASK_VEC_DIV },
3365 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3368 int const pta_size = ARRAY_SIZE (processor_alias_table);
3370 /* Set up prefix/suffix so the error messages refer to either the command
3371 line argument, or the attribute(target). */
3380 prefix = "option(\"";
3385 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3386 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3387 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3388 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3389 #ifdef TARGET_BI_ARCH
3392 #if TARGET_BI_ARCH == 1
3393 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3394 is on and OPTION_MASK_ABI_X32 is off. We turn off
3395 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3397 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3400 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3401 on and OPTION_MASK_ABI_64 is off. We turn off
3402 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3403 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3404 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3405 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3411 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3414 OPTION_MASK_ABI_64 for TARGET_X32. */
3415 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3416 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3418 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3420 | OPTION_MASK_ABI_X32
3421 | OPTION_MASK_ABI_64);
3422 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3424 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3425 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3426 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3430 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3431 SUBTARGET_OVERRIDE_OPTIONS;
3434 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3435 SUBSUBTARGET_OVERRIDE_OPTIONS;
3438 /* -fPIC is the default for x86_64. */
3439 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3440 opts->x_flag_pic = 2;
3442 /* Need to check -mtune=generic first. */
3443 if (opts->x_ix86_tune_string)
3445 /* As special support for cross compilers we read -mtune=native
3446 as -mtune=generic. With native compilers we won't see the
3447 -mtune=native, as it was changed by the driver. */
3448 if (!strcmp (opts->x_ix86_tune_string, "native"))
3450 opts->x_ix86_tune_string = "generic";
3452 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3453 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3454 "%stune=k8%s or %stune=generic%s instead as appropriate",
3455 prefix, suffix, prefix, suffix, prefix, suffix);
3459 if (opts->x_ix86_arch_string)
3460 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3461 if (!opts->x_ix86_tune_string)
3463 opts->x_ix86_tune_string
3464 = processor_target_table[TARGET_CPU_DEFAULT].name;
3465 ix86_tune_defaulted = 1;
3468 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3469 or defaulted. We need to use a sensible tune option. */
3470 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3472 opts->x_ix86_tune_string = "generic";
3476 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3477 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3479 /* rep; movq isn't available in 32-bit code. */
3480 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3481 opts->x_ix86_stringop_alg = no_stringop;
3484 if (!opts->x_ix86_arch_string)
3485 opts->x_ix86_arch_string
3486 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3487 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3489 ix86_arch_specified = 1;
3491 if (opts_set->x_ix86_pmode)
3493 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3494 && opts->x_ix86_pmode == PMODE_SI)
3495 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3496 && opts->x_ix86_pmode == PMODE_DI))
3497 error ("address mode %qs not supported in the %s bit mode",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3499 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3502 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3503 ? PMODE_DI : PMODE_SI;
3505 if (!opts_set->x_ix86_abi)
3506 opts->x_ix86_abi = DEFAULT_ABI;
3508 /* For targets using ms ABI enable ms-extensions, if not
3509 explicit turned off. For non-ms ABI we turn off this
3511 if (!opts_set->x_flag_ms_extensions)
3512 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3514 if (opts_set->x_ix86_cmodel)
3516 switch (opts->x_ix86_cmodel)
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_SMALL_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3529 if (opts->x_flag_pic)
3530 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3531 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3532 error ("code model %qs not supported in the %s bit mode",
3534 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3535 error ("code model %qs not supported in x32 mode",
3541 if (opts->x_flag_pic)
3542 opts->x_ix86_cmodel = CM_LARGE_PIC;
3543 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3544 error ("code model %qs not supported in the %s bit mode",
3546 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3547 error ("code model %qs not supported in x32 mode",
3552 if (opts->x_flag_pic)
3553 error ("code model %s does not support PIC mode", "32");
3554 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 error ("code model %qs not supported in the %s bit mode",
3560 if (opts->x_flag_pic)
3562 error ("code model %s does not support PIC mode", "kernel");
3563 opts->x_ix86_cmodel = CM_32;
3565 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3566 error ("code model %qs not supported in the %s bit mode",
3576 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3577 use of rip-relative addressing. This eliminates fixups that
3578 would otherwise be needed if this object is to be placed in a
3579 DLL, and is essentially just as efficient as direct addressing. */
3580 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3581 && (TARGET_RDOS || TARGET_PECOFF))
3582 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3583 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3586 opts->x_ix86_cmodel = CM_32;
3588 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3590 error ("-masm=intel not supported in this configuration");
3591 opts->x_ix86_asm_dialect = ASM_ATT;
3593 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3594 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3595 sorry ("%i-bit mode not compiled in",
3596 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3598 for (i = 0; i < pta_size; i++)
3599 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3601 ix86_schedule = processor_alias_table[i].schedule;
3602 ix86_arch = processor_alias_table[i].processor;
3603 /* Default cpu tuning to the architecture. */
3604 ix86_tune = ix86_arch;
3606 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3607 && !(processor_alias_table[i].flags & PTA_64BIT))
3608 error ("CPU you selected does not support x86-64 "
3611 if (processor_alias_table[i].flags & PTA_MMX
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3614 if (processor_alias_table[i].flags & PTA_3DNOW
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3617 if (processor_alias_table[i].flags & PTA_3DNOW_A
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3620 if (processor_alias_table[i].flags & PTA_SSE
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3623 if (processor_alias_table[i].flags & PTA_SSE2
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3626 if (processor_alias_table[i].flags & PTA_SSE3
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3629 if (processor_alias_table[i].flags & PTA_SSSE3
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3632 if (processor_alias_table[i].flags & PTA_SSE4_1
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3635 if (processor_alias_table[i].flags & PTA_SSE4_2
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3638 if (processor_alias_table[i].flags & PTA_AVX
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3641 if (processor_alias_table[i].flags & PTA_AVX2
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3644 if (processor_alias_table[i].flags & PTA_FMA
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3647 if (processor_alias_table[i].flags & PTA_SSE4A
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3650 if (processor_alias_table[i].flags & PTA_FMA4
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3653 if (processor_alias_table[i].flags & PTA_XOP
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3656 if (processor_alias_table[i].flags & PTA_LWP
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3659 if (processor_alias_table[i].flags & PTA_ABM
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3662 if (processor_alias_table[i].flags & PTA_BMI
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3665 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3668 if (processor_alias_table[i].flags & PTA_TBM
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3671 if (processor_alias_table[i].flags & PTA_BMI2
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3674 if (processor_alias_table[i].flags & PTA_CX16
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3677 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3680 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3681 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3684 if (processor_alias_table[i].flags & PTA_MOVBE
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3687 if (processor_alias_table[i].flags & PTA_AES
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3689 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3690 if (processor_alias_table[i].flags & PTA_SHA
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3692 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3693 if (processor_alias_table[i].flags & PTA_PCLMUL
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3696 if (processor_alias_table[i].flags & PTA_FSGSBASE
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3699 if (processor_alias_table[i].flags & PTA_RDRND
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3702 if (processor_alias_table[i].flags & PTA_F16C
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3705 if (processor_alias_table[i].flags & PTA_RTM
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3708 if (processor_alias_table[i].flags & PTA_HLE
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3711 if (processor_alias_table[i].flags & PTA_PRFCHW
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3714 if (processor_alias_table[i].flags & PTA_RDSEED
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3717 if (processor_alias_table[i].flags & PTA_ADX
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3720 if (processor_alias_table[i].flags & PTA_FXSR
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3723 if (processor_alias_table[i].flags & PTA_XSAVE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3726 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3729 if (processor_alias_table[i].flags & PTA_AVX512F
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3732 if (processor_alias_table[i].flags & PTA_AVX512ER
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3735 if (processor_alias_table[i].flags & PTA_AVX512PF
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3738 if (processor_alias_table[i].flags & PTA_AVX512CD
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3741 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3744 if (processor_alias_table[i].flags & PTA_PCOMMIT
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3747 if (processor_alias_table[i].flags & PTA_CLWB
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3750 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3753 if (processor_alias_table[i].flags & PTA_XSAVEC
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3756 if (processor_alias_table[i].flags & PTA_XSAVES
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3759 if (processor_alias_table[i].flags & PTA_AVX512DQ
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3762 if (processor_alias_table[i].flags & PTA_AVX512BW
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3765 if (processor_alias_table[i].flags & PTA_AVX512VL
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3768 if (processor_alias_table[i].flags & PTA_MPX
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3771 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3774 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3777 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3778 x86_prefetch_sse = true;
3783 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3784 error ("Intel MPX does not support x32");
3786 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3787 error ("Intel MPX does not support x32");
3789 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3790 error ("generic CPU can be used only for %stune=%s %s",
3791 prefix, suffix, sw);
3792 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3793 error ("intel CPU can be used only for %stune=%s %s",
3794 prefix, suffix, sw);
3795 else if (i == pta_size)
3796 error ("bad value (%s) for %sarch=%s %s",
3797 opts->x_ix86_arch_string, prefix, suffix, sw);
3799 ix86_arch_mask = 1u << ix86_arch;
3800 for (i = 0; i < X86_ARCH_LAST; ++i)
3801 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3803 for (i = 0; i < pta_size; i++)
3804 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3806 ix86_schedule = processor_alias_table[i].schedule;
3807 ix86_tune = processor_alias_table[i].processor;
3808 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 if (!(processor_alias_table[i].flags & PTA_64BIT))
3812 if (ix86_tune_defaulted)
3814 opts->x_ix86_tune_string = "x86-64";
3815 for (i = 0; i < pta_size; i++)
3816 if (! strcmp (opts->x_ix86_tune_string,
3817 processor_alias_table[i].name))
3819 ix86_schedule = processor_alias_table[i].schedule;
3820 ix86_tune = processor_alias_table[i].processor;
3823 error ("CPU you selected does not support x86-64 "
3827 /* Intel CPUs have always interpreted SSE prefetch instructions as
3828 NOPs; so, we can enable SSE prefetch instructions even when
3829 -mtune (rather than -march) points us to a processor that has them.
3830 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3831 higher processors. */
3833 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3834 x86_prefetch_sse = true;
3838 if (ix86_tune_specified && i == pta_size)
3839 error ("bad value (%s) for %stune=%s %s",
3840 opts->x_ix86_tune_string, prefix, suffix, sw);
3842 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3844 #ifndef USE_IX86_FRAME_POINTER
3845 #define USE_IX86_FRAME_POINTER 0
3848 #ifndef USE_X86_64_FRAME_POINTER
3849 #define USE_X86_64_FRAME_POINTER 0
3852 /* Set the default values for switches whose default depends on TARGET_64BIT
3853 in case they weren't overwritten by command line options. */
3854 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3856 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3857 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3858 if (opts->x_flag_asynchronous_unwind_tables
3859 && !opts_set->x_flag_unwind_tables
3860 && TARGET_64BIT_MS_ABI)
3861 opts->x_flag_unwind_tables = 1;
3862 if (opts->x_flag_asynchronous_unwind_tables == 2)
3863 opts->x_flag_unwind_tables
3864 = opts->x_flag_asynchronous_unwind_tables = 1;
3865 if (opts->x_flag_pcc_struct_return == 2)
3866 opts->x_flag_pcc_struct_return = 0;
3870 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3871 opts->x_flag_omit_frame_pointer
3872 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3873 if (opts->x_flag_asynchronous_unwind_tables == 2)
3874 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3875 if (opts->x_flag_pcc_struct_return == 2)
3876 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3879 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3880 /* TODO: ix86_cost should be chosen at instruction or function granuality
3881 so for cold code we use size_cost even in !optimize_size compilation. */
3882 if (opts->x_optimize_size)
3883 ix86_cost = &ix86_size_cost;
3885 ix86_cost = ix86_tune_cost;
3887 /* Arrange to set up i386_stack_locals for all functions. */
3888 init_machine_status = ix86_init_machine_status;
3890 /* Validate -mregparm= value. */
3891 if (opts_set->x_ix86_regparm)
3893 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3894 warning (0, "-mregparm is ignored in 64-bit mode");
3895 if (opts->x_ix86_regparm > REGPARM_MAX)
3897 error ("-mregparm=%d is not between 0 and %d",
3898 opts->x_ix86_regparm, REGPARM_MAX);
3899 opts->x_ix86_regparm = 0;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_regparm = REGPARM_MAX;
3905 /* Default align_* from the processor table. */
3906 if (opts->x_align_loops == 0)
3908 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3909 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3911 if (opts->x_align_jumps == 0)
3913 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3914 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3916 if (opts->x_align_functions == 0)
3918 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3921 /* Provide default for -mbranch-cost= value. */
3922 if (!opts_set->x_ix86_branch_cost)
3923 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3927 opts->x_target_flags
3928 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3930 /* Enable by default the SSE and MMX builtins. Do allow the user to
3931 explicitly disable any of these. In particular, disabling SSE and
3932 MMX for kernel code is extremely useful. */
3933 if (!ix86_arch_specified)
3934 opts->x_ix86_isa_flags
3935 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3936 | TARGET_SUBTARGET64_ISA_DEFAULT)
3937 & ~opts->x_ix86_isa_flags_explicit);
3939 if (TARGET_RTD_P (opts->x_target_flags))
3940 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3944 opts->x_target_flags
3945 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3947 if (!ix86_arch_specified)
3948 opts->x_ix86_isa_flags
3949 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3951 /* i386 ABI does not specify red zone. It still makes sense to use it
3952 when programmer takes care to stack from being destroyed. */
3953 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3954 opts->x_target_flags |= MASK_NO_RED_ZONE;
3957 /* Keep nonleaf frame pointers. */
3958 if (opts->x_flag_omit_frame_pointer)
3959 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3960 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3961 opts->x_flag_omit_frame_pointer = 1;
3963 /* If we're doing fast math, we don't care about comparison order
3964 wrt NaNs. This lets us use a shorter comparison sequence. */
3965 if (opts->x_flag_finite_math_only)
3966 opts->x_target_flags &= ~MASK_IEEE_FP;
3968 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3969 since the insns won't need emulation. */
3970 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3971 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3973 /* Likewise, if the target doesn't have a 387, or we've specified
3974 software floating point, don't use 387 inline intrinsics. */
3975 if (!TARGET_80387_P (opts->x_target_flags))
3976 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3978 /* Turn on MMX builtins for -msse. */
3979 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 opts->x_ix86_isa_flags
3981 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3983 /* Enable SSE prefetch. */
3984 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3985 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3986 x86_prefetch_sse = true;
3988 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3989 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3990 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3991 opts->x_ix86_isa_flags
3992 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3994 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3995 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3996 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_isa_flags
3998 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4000 /* Enable lzcnt instruction for -mabm. */
4001 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4002 opts->x_ix86_isa_flags
4003 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4005 /* Validate -mpreferred-stack-boundary= value or default it to
4006 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4007 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4008 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4010 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4011 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4012 int max = (TARGET_SEH ? 4 : 12);
4014 if (opts->x_ix86_preferred_stack_boundary_arg < min
4015 || opts->x_ix86_preferred_stack_boundary_arg > max)
4018 error ("-mpreferred-stack-boundary is not supported "
4021 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4022 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4025 ix86_preferred_stack_boundary
4026 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4029 /* Set the default value for -mstackrealign. */
4030 if (opts->x_ix86_force_align_arg_pointer == -1)
4031 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4033 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4035 /* Validate -mincoming-stack-boundary= value or default it to
4036 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4037 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4038 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4040 if (opts->x_ix86_incoming_stack_boundary_arg
4041 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4042 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4043 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4044 opts->x_ix86_incoming_stack_boundary_arg,
4045 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4048 ix86_user_incoming_stack_boundary
4049 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4050 ix86_incoming_stack_boundary
4051 = ix86_user_incoming_stack_boundary;
4055 #ifndef NO_PROFILE_COUNTERS
4056 if (flag_nop_mcount)
4057 error ("-mnop-mcount is not compatible with this target");
4059 if (flag_nop_mcount && flag_pic)
4060 error ("-mnop-mcount is not implemented for -fPIC");
4062 /* Accept -msseregparm only if at least SSE support is enabled. */
4063 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4064 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4065 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4067 if (opts_set->x_ix86_fpmath)
4069 if (opts->x_ix86_fpmath & FPMATH_SSE)
4071 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4073 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4074 opts->x_ix86_fpmath = FPMATH_387;
4076 else if ((opts->x_ix86_fpmath & FPMATH_387)
4077 && !TARGET_80387_P (opts->x_target_flags))
4079 warning (0, "387 instruction set disabled, using SSE arithmetics");
4080 opts->x_ix86_fpmath = FPMATH_SSE;
4084 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4085 fpmath=387. The second is however default at many targets since the
4086 extra 80bit precision of temporaries is considered to be part of ABI.
4087 Overwrite the default at least for -ffast-math.
4088 TODO: -mfpmath=both seems to produce same performing code with bit
4089 smaller binaries. It is however not clear if register allocation is
4090 ready for this setting.
4091 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4092 codegen. We may switch to 387 with -ffast-math for size optimized
4094 else if (fast_math_flags_set_p (&global_options)
4095 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4096 opts->x_ix86_fpmath = FPMATH_SSE;
4098 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4100 /* If the i387 is disabled, then do not return values in it. */
4101 if (!TARGET_80387_P (opts->x_target_flags))
4102 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4104 /* Use external vectorized library in vectorizing intrinsics. */
4105 if (opts_set->x_ix86_veclibabi_type)
4106 switch (opts->x_ix86_veclibabi_type)
4108 case ix86_veclibabi_type_svml:
4109 ix86_veclib_handler = ix86_veclibabi_svml;
4112 case ix86_veclibabi_type_acml:
4113 ix86_veclib_handler = ix86_veclibabi_acml;
4120 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4121 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4122 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4124 /* If stack probes are required, the space used for large function
4125 arguments on the stack must also be probed, so enable
4126 -maccumulate-outgoing-args so this happens in the prologue. */
4127 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4128 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4130 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4131 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4132 "for correctness", prefix, suffix);
4133 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4136 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4139 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4140 p = strchr (internal_label_prefix, 'X');
4141 internal_label_prefix_len = p - internal_label_prefix;
4145 /* When scheduling description is not available, disable scheduler pass
4146 so it won't slow down the compilation and make x87 code slower. */
4147 if (!TARGET_SCHEDULE)
4148 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4150 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4151 ix86_tune_cost->simultaneous_prefetches,
4152 opts->x_param_values,
4153 opts_set->x_param_values);
4154 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4155 ix86_tune_cost->prefetch_block,
4156 opts->x_param_values,
4157 opts_set->x_param_values);
4158 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4159 ix86_tune_cost->l1_cache_size,
4160 opts->x_param_values,
4161 opts_set->x_param_values);
4162 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4163 ix86_tune_cost->l2_cache_size,
4164 opts->x_param_values,
4165 opts_set->x_param_values);
4167 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4168 if (opts->x_flag_prefetch_loop_arrays < 0
4170 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4171 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4172 opts->x_flag_prefetch_loop_arrays = 1;
4174 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4175 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4176 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4177 targetm.expand_builtin_va_start = NULL;
4179 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4181 ix86_gen_leave = gen_leave_rex64;
4182 if (Pmode == DImode)
4184 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4185 ix86_gen_tls_local_dynamic_base_64
4186 = gen_tls_local_dynamic_base_64_di;
4190 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4191 ix86_gen_tls_local_dynamic_base_64
4192 = gen_tls_local_dynamic_base_64_si;
4196 ix86_gen_leave = gen_leave;
4198 if (Pmode == DImode)
4200 ix86_gen_add3 = gen_adddi3;
4201 ix86_gen_sub3 = gen_subdi3;
4202 ix86_gen_sub3_carry = gen_subdi3_carry;
4203 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4204 ix86_gen_andsp = gen_anddi3;
4205 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4206 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4207 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4208 ix86_gen_monitor = gen_sse3_monitor_di;
4212 ix86_gen_add3 = gen_addsi3;
4213 ix86_gen_sub3 = gen_subsi3;
4214 ix86_gen_sub3_carry = gen_subsi3_carry;
4215 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4216 ix86_gen_andsp = gen_andsi3;
4217 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4218 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4219 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4220 ix86_gen_monitor = gen_sse3_monitor_si;
4224 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4225 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4226 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4229 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4231 if (opts->x_flag_fentry > 0)
4232 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4234 opts->x_flag_fentry = 0;
4236 else if (TARGET_SEH)
4238 if (opts->x_flag_fentry == 0)
4239 sorry ("-mno-fentry isn%'t compatible with SEH");
4240 opts->x_flag_fentry = 1;
4242 else if (opts->x_flag_fentry < 0)
4244 #if defined(PROFILE_BEFORE_PROLOGUE)
4245 opts->x_flag_fentry = 1;
4247 opts->x_flag_fentry = 0;
4251 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4252 opts->x_target_flags |= MASK_VZEROUPPER;
4253 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4254 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4255 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4256 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4257 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4258 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4259 /* Enable 128-bit AVX instruction generation
4260 for the auto-vectorizer. */
4261 if (TARGET_AVX128_OPTIMAL
4262 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4263 opts->x_target_flags |= MASK_PREFER_AVX128;
4265 if (opts->x_ix86_recip_name)
4267 char *p = ASTRDUP (opts->x_ix86_recip_name);
4269 unsigned int mask, i;
4272 while ((q = strtok (p, ",")) != NULL)
4283 if (!strcmp (q, "default"))
4284 mask = RECIP_MASK_ALL;
4287 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4288 if (!strcmp (q, recip_options[i].string))
4290 mask = recip_options[i].mask;
4294 if (i == ARRAY_SIZE (recip_options))
4296 error ("unknown option for -mrecip=%s", q);
4298 mask = RECIP_MASK_NONE;
4302 opts->x_recip_mask_explicit |= mask;
4304 opts->x_recip_mask &= ~mask;
4306 opts->x_recip_mask |= mask;
4310 if (TARGET_RECIP_P (opts->x_target_flags))
4311 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4312 else if (opts_set->x_target_flags & MASK_RECIP)
4313 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4315 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4316 for 64-bit Bionic. */
4317 if (TARGET_HAS_BIONIC
4318 && !(opts_set->x_target_flags
4319 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4320 opts->x_target_flags |= (TARGET_64BIT
4321 ? MASK_LONG_DOUBLE_128
4322 : MASK_LONG_DOUBLE_64);
4324 /* Only one of them can be active. */
4325 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4326 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4328 /* Save the initial options in case the user does function specific
4331 target_option_default_node = target_option_current_node
4332 = build_target_option_node (opts);
4334 /* Handle stack protector */
4335 if (!opts_set->x_ix86_stack_protector_guard)
4336 opts->x_ix86_stack_protector_guard
4337 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4339 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4340 if (opts->x_ix86_tune_memcpy_strategy)
4342 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4343 ix86_parse_stringop_strategy_string (str, false);
4347 if (opts->x_ix86_tune_memset_strategy)
4349 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4350 ix86_parse_stringop_strategy_string (str, true);
4355 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4358 ix86_option_override (void)
4360 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4361 struct register_pass_info insert_vzeroupper_info
4362 = { pass_insert_vzeroupper, "reload",
4363 1, PASS_POS_INSERT_AFTER
4366 ix86_option_override_internal (true, &global_options, &global_options_set);
4369 /* This needs to be done at start up. It's convenient to do it here. */
4370 register_pass (&insert_vzeroupper_info);
4373 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4375 ix86_offload_options (void)
4378 return xstrdup ("-foffload-abi=lp64");
4379 return xstrdup ("-foffload-abi=ilp32");
4382 /* Update register usage after having seen the compiler flags. */
4385 ix86_conditional_register_usage (void)
4389 /* For 32-bit targets, squash the REX registers. */
4392 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4394 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4395 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4397 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4400 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4401 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4402 : TARGET_64BIT ? (1 << 2)
4405 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4407 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4409 /* Set/reset conditionally defined registers from
4410 CALL_USED_REGISTERS initializer. */
4411 if (call_used_regs[i] > 1)
4412 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4414 /* Calculate registers of CLOBBERED_REGS register set
4415 as call used registers from GENERAL_REGS register set. */
4416 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4417 && call_used_regs[i])
4418 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4421 /* If MMX is disabled, squash the registers. */
4423 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4424 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427 /* If SSE is disabled, squash the registers. */
4429 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4430 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4431 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4433 /* If the FPU is disabled, squash the registers. */
4434 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4436 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4437 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4439 /* If AVX512F is disabled, squash the registers. */
4440 if (! TARGET_AVX512F)
4442 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4445 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4446 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4449 /* If MPX is disabled, squash the registers. */
4451 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4452 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4456 /* Save the current options */
4459 ix86_function_specific_save (struct cl_target_option *ptr,
4460 struct gcc_options *opts)
4462 ptr->arch = ix86_arch;
4463 ptr->schedule = ix86_schedule;
4464 ptr->prefetch_sse = x86_prefetch_sse;
4465 ptr->tune = ix86_tune;
4466 ptr->branch_cost = ix86_branch_cost;
4467 ptr->tune_defaulted = ix86_tune_defaulted;
4468 ptr->arch_specified = ix86_arch_specified;
4469 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4470 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4471 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4472 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4473 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4474 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4475 ptr->x_ix86_abi = opts->x_ix86_abi;
4476 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4477 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4478 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4479 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4480 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4481 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4482 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4483 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4484 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4485 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4486 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4487 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4488 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4489 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4490 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4491 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4492 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4493 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4494 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4495 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4497 /* The fields are char but the variables are not; make sure the
4498 values fit in the fields. */
4499 gcc_assert (ptr->arch == ix86_arch);
4500 gcc_assert (ptr->schedule == ix86_schedule);
4501 gcc_assert (ptr->tune == ix86_tune);
4502 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4505 /* Restore the current options */
4508 ix86_function_specific_restore (struct gcc_options *opts,
4509 struct cl_target_option *ptr)
4511 enum processor_type old_tune = ix86_tune;
4512 enum processor_type old_arch = ix86_arch;
4513 unsigned int ix86_arch_mask;
4516 /* We don't change -fPIC. */
4517 opts->x_flag_pic = flag_pic;
4519 ix86_arch = (enum processor_type) ptr->arch;
4520 ix86_schedule = (enum attr_cpu) ptr->schedule;
4521 ix86_tune = (enum processor_type) ptr->tune;
4522 x86_prefetch_sse = ptr->prefetch_sse;
4523 opts->x_ix86_branch_cost = ptr->branch_cost;
4524 ix86_tune_defaulted = ptr->tune_defaulted;
4525 ix86_arch_specified = ptr->arch_specified;
4526 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4527 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4528 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4529 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4530 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4531 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4532 opts->x_ix86_abi = ptr->x_ix86_abi;
4533 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4534 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4535 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4536 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4537 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4538 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4539 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4540 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4541 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4542 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4543 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4544 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4545 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4546 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4547 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4548 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4549 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4550 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4551 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4552 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4553 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4554 /* TODO: ix86_cost should be chosen at instruction or function granuality
4555 so for cold code we use size_cost even in !optimize_size compilation. */
4556 if (opts->x_optimize_size)
4557 ix86_cost = &ix86_size_cost;
4559 ix86_cost = ix86_tune_cost;
4561 /* Recreate the arch feature tests if the arch changed */
4562 if (old_arch != ix86_arch)
4564 ix86_arch_mask = 1u << ix86_arch;
4565 for (i = 0; i < X86_ARCH_LAST; ++i)
4566 ix86_arch_features[i]
4567 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4570 /* Recreate the tune optimization tests */
4571 if (old_tune != ix86_tune)
4572 set_ix86_tune_features (ix86_tune, false);
4575 /* Adjust target options after streaming them in. This is mainly about
4576 reconciling them with global options. */
4579 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4581 /* flag_pic is a global option, but ix86_cmodel is target saved option
4582 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4583 for PIC, or error out. */
4585 switch (ptr->x_ix86_cmodel)
4588 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4592 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4596 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4600 error ("code model %s does not support PIC mode", "kernel");
4607 switch (ptr->x_ix86_cmodel)
4610 ptr->x_ix86_cmodel = CM_SMALL;
4614 ptr->x_ix86_cmodel = CM_MEDIUM;
4618 ptr->x_ix86_cmodel = CM_LARGE;
4626 /* Print the current options */
4629 ix86_function_specific_print (FILE *file, int indent,
4630 struct cl_target_option *ptr)
4633 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4634 NULL, NULL, ptr->x_ix86_fpmath, false);
4636 gcc_assert (ptr->arch < PROCESSOR_max);
4637 fprintf (file, "%*sarch = %d (%s)\n",
4639 ptr->arch, processor_target_table[ptr->arch].name);
4641 gcc_assert (ptr->tune < PROCESSOR_max);
4642 fprintf (file, "%*stune = %d (%s)\n",
4644 ptr->tune, processor_target_table[ptr->tune].name);
4646 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4650 fprintf (file, "%*s%s\n", indent, "", target_string);
4651 free (target_string);
4656 /* Inner function to process the attribute((target(...))), take an argument and
4657 set the current options from the argument. If we have a list, recursively go
4661 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4662 struct gcc_options *opts,
4663 struct gcc_options *opts_set,
4664 struct gcc_options *enum_opts_set)
4669 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4670 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4671 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4672 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4673 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4689 enum ix86_opt_type type;
4694 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4695 IX86_ATTR_ISA ("abm", OPT_mabm),
4696 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4697 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4698 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4699 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4700 IX86_ATTR_ISA ("aes", OPT_maes),
4701 IX86_ATTR_ISA ("sha", OPT_msha),
4702 IX86_ATTR_ISA ("avx", OPT_mavx),
4703 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4704 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4705 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4706 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4707 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4708 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4709 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4710 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4711 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4712 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4713 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4714 IX86_ATTR_ISA ("sse", OPT_msse),
4715 IX86_ATTR_ISA ("sse2", OPT_msse2),
4716 IX86_ATTR_ISA ("sse3", OPT_msse3),
4717 IX86_ATTR_ISA ("sse4", OPT_msse4),
4718 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4719 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4720 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4721 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4722 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4723 IX86_ATTR_ISA ("fma", OPT_mfma),
4724 IX86_ATTR_ISA ("xop", OPT_mxop),
4725 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4726 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4727 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4728 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4729 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4730 IX86_ATTR_ISA ("hle", OPT_mhle),
4731 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4732 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4733 IX86_ATTR_ISA ("adx", OPT_madx),
4734 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4735 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4736 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4737 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4738 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4739 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4740 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4741 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4742 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4743 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4744 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4747 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4749 /* string options */
4750 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4751 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4754 IX86_ATTR_YES ("cld",
4758 IX86_ATTR_NO ("fancy-math-387",
4759 OPT_mfancy_math_387,
4760 MASK_NO_FANCY_MATH_387),
4762 IX86_ATTR_YES ("ieee-fp",
4766 IX86_ATTR_YES ("inline-all-stringops",
4767 OPT_minline_all_stringops,
4768 MASK_INLINE_ALL_STRINGOPS),
4770 IX86_ATTR_YES ("inline-stringops-dynamically",
4771 OPT_minline_stringops_dynamically,
4772 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4774 IX86_ATTR_NO ("align-stringops",
4775 OPT_mno_align_stringops,
4776 MASK_NO_ALIGN_STRINGOPS),
4778 IX86_ATTR_YES ("recip",
4784 /* If this is a list, recurse to get the options. */
4785 if (TREE_CODE (args) == TREE_LIST)
4789 for (; args; args = TREE_CHAIN (args))
4790 if (TREE_VALUE (args)
4791 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4792 p_strings, opts, opts_set,
4799 else if (TREE_CODE (args) != STRING_CST)
4801 error ("attribute %<target%> argument not a string");
4805 /* Handle multiple arguments separated by commas. */
4806 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4808 while (next_optstr && *next_optstr != '\0')
4810 char *p = next_optstr;
4812 char *comma = strchr (next_optstr, ',');
4813 const char *opt_string;
4814 size_t len, opt_len;
4819 enum ix86_opt_type type = ix86_opt_unknown;
4825 len = comma - next_optstr;
4826 next_optstr = comma + 1;
4834 /* Recognize no-xxx. */
4835 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4844 /* Find the option. */
4847 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4849 type = attrs[i].type;
4850 opt_len = attrs[i].len;
4851 if (ch == attrs[i].string[0]
4852 && ((type != ix86_opt_str && type != ix86_opt_enum)
4855 && memcmp (p, attrs[i].string, opt_len) == 0)
4858 mask = attrs[i].mask;
4859 opt_string = attrs[i].string;
4864 /* Process the option. */
4867 error ("attribute(target(\"%s\")) is unknown", orig_p);
4871 else if (type == ix86_opt_isa)
4873 struct cl_decoded_option decoded;
4875 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4876 ix86_handle_option (opts, opts_set,
4877 &decoded, input_location);
4880 else if (type == ix86_opt_yes || type == ix86_opt_no)
4882 if (type == ix86_opt_no)
4883 opt_set_p = !opt_set_p;
4886 opts->x_target_flags |= mask;
4888 opts->x_target_flags &= ~mask;
4891 else if (type == ix86_opt_str)
4895 error ("option(\"%s\") was already specified", opt_string);
4899 p_strings[opt] = xstrdup (p + opt_len);
4902 else if (type == ix86_opt_enum)
4907 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4909 set_option (opts, enum_opts_set, opt, value,
4910 p + opt_len, DK_UNSPECIFIED, input_location,
4914 error ("attribute(target(\"%s\")) is unknown", orig_p);
4926 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4929 ix86_valid_target_attribute_tree (tree args,
4930 struct gcc_options *opts,
4931 struct gcc_options *opts_set)
4933 const char *orig_arch_string = opts->x_ix86_arch_string;
4934 const char *orig_tune_string = opts->x_ix86_tune_string;
4935 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4936 int orig_tune_defaulted = ix86_tune_defaulted;
4937 int orig_arch_specified = ix86_arch_specified;
4938 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4941 struct cl_target_option *def
4942 = TREE_TARGET_OPTION (target_option_default_node);
4943 struct gcc_options enum_opts_set;
4945 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4947 /* Process each of the options on the chain. */
4948 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4949 opts_set, &enum_opts_set))
4950 return error_mark_node;
4952 /* If the changed options are different from the default, rerun
4953 ix86_option_override_internal, and then save the options away.
4954 The string options are are attribute options, and will be undone
4955 when we copy the save structure. */
4956 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4957 || opts->x_target_flags != def->x_target_flags
4958 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4959 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4960 || enum_opts_set.x_ix86_fpmath)
4962 /* If we are using the default tune= or arch=, undo the string assigned,
4963 and use the default. */
4964 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4965 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4966 else if (!orig_arch_specified)
4967 opts->x_ix86_arch_string = NULL;
4969 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4970 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4971 else if (orig_tune_defaulted)
4972 opts->x_ix86_tune_string = NULL;
4974 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4975 if (enum_opts_set.x_ix86_fpmath)
4976 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4977 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4978 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4980 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4981 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4984 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4985 ix86_option_override_internal (false, opts, opts_set);
4987 /* Add any builtin functions with the new isa if any. */
4988 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4990 /* Save the current options unless we are validating options for
4992 t = build_target_option_node (opts);
4994 opts->x_ix86_arch_string = orig_arch_string;
4995 opts->x_ix86_tune_string = orig_tune_string;
4996 opts_set->x_ix86_fpmath = orig_fpmath_set;
4998 /* Free up memory allocated to hold the strings */
4999 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5000 free (option_strings[i]);
5006 /* Hook to validate attribute((target("string"))). */
5009 ix86_valid_target_attribute_p (tree fndecl,
5010 tree ARG_UNUSED (name),
5012 int ARG_UNUSED (flags))
5014 struct gcc_options func_options;
5015 tree new_target, new_optimize;
5018 /* attribute((target("default"))) does nothing, beyond
5019 affecting multi-versioning. */
5020 if (TREE_VALUE (args)
5021 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5022 && TREE_CHAIN (args) == NULL_TREE
5023 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5026 tree old_optimize = build_optimization_node (&global_options);
5028 /* Get the optimization options of the current function. */
5029 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5032 func_optimize = old_optimize;
5034 /* Init func_options. */
5035 memset (&func_options, 0, sizeof (func_options));
5036 init_options_struct (&func_options, NULL);
5037 lang_hooks.init_options_struct (&func_options);
5039 cl_optimization_restore (&func_options,
5040 TREE_OPTIMIZATION (func_optimize));
5042 /* Initialize func_options to the default before its target options can
5044 cl_target_option_restore (&func_options,
5045 TREE_TARGET_OPTION (target_option_default_node));
5047 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5048 &global_options_set);
5050 new_optimize = build_optimization_node (&func_options);
5052 if (new_target == error_mark_node)
5055 else if (fndecl && new_target)
5057 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5059 if (old_optimize != new_optimize)
5060 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5067 /* Hook to determine if one function can safely inline another. */
5070 ix86_can_inline_p (tree caller, tree callee)
5073 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5074 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5076 /* If callee has no option attributes, then it is ok to inline. */
5080 /* If caller has no option attributes, but callee does then it is not ok to
5082 else if (!caller_tree)
5087 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5088 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5090 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5091 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5093 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5094 != callee_opts->x_ix86_isa_flags)
5097 /* See if we have the same non-isa options. */
5098 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5101 /* See if arch, tune, etc. are the same. */
5102 else if (caller_opts->arch != callee_opts->arch)
5105 else if (caller_opts->tune != callee_opts->tune)
5108 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5111 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5122 /* Remember the last target of ix86_set_current_function. */
5123 static GTY(()) tree ix86_previous_fndecl;
5125 /* Set targets globals to the default (or current #pragma GCC target
5126 if active). Invalidate ix86_previous_fndecl cache. */
5129 ix86_reset_previous_fndecl (void)
5131 tree new_tree = target_option_current_node;
5132 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5133 if (TREE_TARGET_GLOBALS (new_tree))
5134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5135 else if (new_tree == target_option_default_node)
5136 restore_target_globals (&default_target_globals);
5138 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5139 ix86_previous_fndecl = NULL_TREE;
5142 /* Establish appropriate back-end context for processing the function
5143 FNDECL. The argument might be NULL to indicate processing at top
5144 level, outside of any function scope. */
5146 ix86_set_current_function (tree fndecl)
5148 /* Only change the context if the function changes. This hook is called
5149 several times in the course of compiling a function, and we don't want to
5150 slow things down too much or call target_reinit when it isn't safe. */
5151 if (fndecl == ix86_previous_fndecl)
5155 if (ix86_previous_fndecl == NULL_TREE)
5156 old_tree = target_option_current_node;
5157 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5158 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5160 old_tree = target_option_default_node;
5162 if (fndecl == NULL_TREE)
5164 if (old_tree != target_option_current_node)
5165 ix86_reset_previous_fndecl ();
5169 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5170 if (new_tree == NULL_TREE)
5171 new_tree = target_option_default_node;
5173 if (old_tree != new_tree)
5175 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5176 if (TREE_TARGET_GLOBALS (new_tree))
5177 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5178 else if (new_tree == target_option_default_node)
5179 restore_target_globals (&default_target_globals);
5181 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5183 ix86_previous_fndecl = fndecl;
5187 /* Return true if this goes in large data/bss. */
5190 ix86_in_large_data_p (tree exp)
5192 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5195 /* Functions are never large data. */
5196 if (TREE_CODE (exp) == FUNCTION_DECL)
5199 /* Automatic variables are never large data. */
5200 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5203 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5205 const char *section = DECL_SECTION_NAME (exp);
5206 if (strcmp (section, ".ldata") == 0
5207 || strcmp (section, ".lbss") == 0)
5213 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5215 /* If this is an incomplete type with size 0, then we can't put it
5216 in data because it might be too big when completed. Also,
5217 int_size_in_bytes returns -1 if size can vary or is larger than
5218 an integer in which case also it is safer to assume that it goes in
5220 if (size <= 0 || size > ix86_section_threshold)
5227 /* Switch to the appropriate section for output of DECL.
5228 DECL is either a `VAR_DECL' node or a constant of some sort.
5229 RELOC indicates whether forming the initial value of DECL requires
5230 link-time relocations. */
5232 ATTRIBUTE_UNUSED static section *
5233 x86_64_elf_select_section (tree decl, int reloc,
5234 unsigned HOST_WIDE_INT align)
5236 if (ix86_in_large_data_p (decl))
5238 const char *sname = NULL;
5239 unsigned int flags = SECTION_WRITE;
5240 switch (categorize_decl_for_section (decl, reloc))
5245 case SECCAT_DATA_REL:
5246 sname = ".ldata.rel";
5248 case SECCAT_DATA_REL_LOCAL:
5249 sname = ".ldata.rel.local";
5251 case SECCAT_DATA_REL_RO:
5252 sname = ".ldata.rel.ro";
5254 case SECCAT_DATA_REL_RO_LOCAL:
5255 sname = ".ldata.rel.ro.local";
5259 flags |= SECTION_BSS;
5262 case SECCAT_RODATA_MERGE_STR:
5263 case SECCAT_RODATA_MERGE_STR_INIT:
5264 case SECCAT_RODATA_MERGE_CONST:
5268 case SECCAT_SRODATA:
5275 /* We don't split these for medium model. Place them into
5276 default sections and hope for best. */
5281 /* We might get called with string constants, but get_named_section
5282 doesn't like them as they are not DECLs. Also, we need to set
5283 flags in that case. */
5285 return get_section (sname, flags, NULL);
5286 return get_named_section (decl, sname, reloc);
5289 return default_elf_select_section (decl, reloc, align);
5292 /* Select a set of attributes for section NAME based on the properties
5293 of DECL and whether or not RELOC indicates that DECL's initializer
5294 might contain runtime relocations. */
5296 static unsigned int ATTRIBUTE_UNUSED
5297 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5299 unsigned int flags = default_section_type_flags (decl, name, reloc);
5301 if (decl == NULL_TREE
5302 && (strcmp (name, ".ldata.rel.ro") == 0
5303 || strcmp (name, ".ldata.rel.ro.local") == 0))
5304 flags |= SECTION_RELRO;
5306 if (strcmp (name, ".lbss") == 0
5307 || strncmp (name, ".lbss.", 5) == 0
5308 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5309 flags |= SECTION_BSS;
5314 /* Build up a unique section name, expressed as a
5315 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5316 RELOC indicates whether the initial value of EXP requires
5317 link-time relocations. */
5319 static void ATTRIBUTE_UNUSED
5320 x86_64_elf_unique_section (tree decl, int reloc)
5322 if (ix86_in_large_data_p (decl))
5324 const char *prefix = NULL;
5325 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5326 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5328 switch (categorize_decl_for_section (decl, reloc))
5331 case SECCAT_DATA_REL:
5332 case SECCAT_DATA_REL_LOCAL:
5333 case SECCAT_DATA_REL_RO:
5334 case SECCAT_DATA_REL_RO_LOCAL:
5335 prefix = one_only ? ".ld" : ".ldata";
5338 prefix = one_only ? ".lb" : ".lbss";
5341 case SECCAT_RODATA_MERGE_STR:
5342 case SECCAT_RODATA_MERGE_STR_INIT:
5343 case SECCAT_RODATA_MERGE_CONST:
5344 prefix = one_only ? ".lr" : ".lrodata";
5346 case SECCAT_SRODATA:
5353 /* We don't split these for medium model. Place them into
5354 default sections and hope for best. */
5359 const char *name, *linkonce;
5362 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5363 name = targetm.strip_name_encoding (name);
5365 /* If we're using one_only, then there needs to be a .gnu.linkonce
5366 prefix to the section name. */
5367 linkonce = one_only ? ".gnu.linkonce" : "";
5369 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5371 set_decl_section_name (decl, string);
5375 default_unique_section (decl, reloc);
5378 #ifdef COMMON_ASM_OP
5379 /* This says how to output assembler code to declare an
5380 uninitialized external linkage data object.
5382 For medium model x86-64 we need to use .largecomm opcode for
5385 x86_elf_aligned_common (FILE *file,
5386 const char *name, unsigned HOST_WIDE_INT size,
5389 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5390 && size > (unsigned int)ix86_section_threshold)
5391 fputs ("\t.largecomm\t", file);
5393 fputs (COMMON_ASM_OP, file);
5394 assemble_name (file, name);
5395 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5396 size, align / BITS_PER_UNIT);
5400 /* Utility function for targets to use in implementing
5401 ASM_OUTPUT_ALIGNED_BSS. */
5404 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5405 unsigned HOST_WIDE_INT size, int align)
5407 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5408 && size > (unsigned int)ix86_section_threshold)
5409 switch_to_section (get_named_section (decl, ".lbss", 0));
5411 switch_to_section (bss_section);
5412 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5413 #ifdef ASM_DECLARE_OBJECT_NAME
5414 last_assemble_variable_decl = decl;
5415 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5417 /* Standard thing is just output label for the object. */
5418 ASM_OUTPUT_LABEL (file, name);
5419 #endif /* ASM_DECLARE_OBJECT_NAME */
5420 ASM_OUTPUT_SKIP (file, size ? size : 1);
5423 /* Decide whether we must probe the stack before any space allocation
5424 on this target. It's essentially TARGET_STACK_PROBE except when
5425 -fstack-check causes the stack to be already probed differently. */
5428 ix86_target_stack_probe (void)
5430 /* Do not probe the stack twice if static stack checking is enabled. */
5431 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5434 return TARGET_STACK_PROBE;
5437 /* Decide whether we can make a sibling call to a function. DECL is the
5438 declaration of the function being targeted by the call and EXP is the
5439 CALL_EXPR representing the call. */
5442 ix86_function_ok_for_sibcall (tree decl, tree exp)
5444 tree type, decl_or_type;
5447 /* If we are generating position-independent code, we cannot sibcall
5448 optimize any indirect call, or a direct call to a global function,
5449 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5453 && (!decl || !targetm.binds_local_p (decl)))
5456 /* If we need to align the outgoing stack, then sibcalling would
5457 unalign the stack, which may break the called function. */
5458 if (ix86_minimum_incoming_stack_boundary (true)
5459 < PREFERRED_STACK_BOUNDARY)
5464 decl_or_type = decl;
5465 type = TREE_TYPE (decl);
5469 /* We're looking at the CALL_EXPR, we need the type of the function. */
5470 type = CALL_EXPR_FN (exp); /* pointer expression */
5471 type = TREE_TYPE (type); /* pointer type */
5472 type = TREE_TYPE (type); /* function type */
5473 decl_or_type = type;
5476 /* Check that the return value locations are the same. Like
5477 if we are returning floats on the 80387 register stack, we cannot
5478 make a sibcall from a function that doesn't return a float to a
5479 function that does or, conversely, from a function that does return
5480 a float to a function that doesn't; the necessary stack adjustment
5481 would not be executed. This is also the place we notice
5482 differences in the return value ABI. Note that it is ok for one
5483 of the functions to have void return type as long as the return
5484 value of the other is passed in a register. */
5485 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5486 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5488 if (STACK_REG_P (a) || STACK_REG_P (b))
5490 if (!rtx_equal_p (a, b))
5493 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5495 else if (!rtx_equal_p (a, b))
5500 /* The SYSV ABI has more call-clobbered registers;
5501 disallow sibcalls from MS to SYSV. */
5502 if (cfun->machine->call_abi == MS_ABI
5503 && ix86_function_type_abi (type) == SYSV_ABI)
5508 /* If this call is indirect, we'll need to be able to use a
5509 call-clobbered register for the address of the target function.
5510 Make sure that all such registers are not used for passing
5511 parameters. Note that DLLIMPORT functions are indirect. */
5513 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5515 if (ix86_function_regparm (type, NULL) >= 3)
5517 /* ??? Need to count the actual number of registers to be used,
5518 not the possible number of registers. Fix later. */
5524 /* Otherwise okay. That also includes certain types of indirect calls. */
5528 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5529 and "sseregparm" calling convention attributes;
5530 arguments as in struct attribute_spec.handler. */
5533 ix86_handle_cconv_attribute (tree *node, tree name,
5538 if (TREE_CODE (*node) != FUNCTION_TYPE
5539 && TREE_CODE (*node) != METHOD_TYPE
5540 && TREE_CODE (*node) != FIELD_DECL
5541 && TREE_CODE (*node) != TYPE_DECL)
5543 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5545 *no_add_attrs = true;
5549 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5550 if (is_attribute_p ("regparm", name))
5554 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5556 error ("fastcall and regparm attributes are not compatible");
5559 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5561 error ("regparam and thiscall attributes are not compatible");
5564 cst = TREE_VALUE (args);
5565 if (TREE_CODE (cst) != INTEGER_CST)
5567 warning (OPT_Wattributes,
5568 "%qE attribute requires an integer constant argument",
5570 *no_add_attrs = true;
5572 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5574 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5576 *no_add_attrs = true;
5584 /* Do not warn when emulating the MS ABI. */
5585 if ((TREE_CODE (*node) != FUNCTION_TYPE
5586 && TREE_CODE (*node) != METHOD_TYPE)
5587 || ix86_function_type_abi (*node) != MS_ABI)
5588 warning (OPT_Wattributes, "%qE attribute ignored",
5590 *no_add_attrs = true;
5594 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5595 if (is_attribute_p ("fastcall", name))
5597 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5599 error ("fastcall and cdecl attributes are not compatible");
5601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5603 error ("fastcall and stdcall attributes are not compatible");
5605 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5607 error ("fastcall and regparm attributes are not compatible");
5609 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5611 error ("fastcall and thiscall attributes are not compatible");
5615 /* Can combine stdcall with fastcall (redundant), regparm and
5617 else if (is_attribute_p ("stdcall", name))
5619 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5621 error ("stdcall and cdecl attributes are not compatible");
5623 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5625 error ("stdcall and fastcall attributes are not compatible");
5627 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5629 error ("stdcall and thiscall attributes are not compatible");
5633 /* Can combine cdecl with regparm and sseregparm. */
5634 else if (is_attribute_p ("cdecl", name))
5636 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5638 error ("stdcall and cdecl attributes are not compatible");
5640 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5642 error ("fastcall and cdecl attributes are not compatible");
5644 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5646 error ("cdecl and thiscall attributes are not compatible");
5649 else if (is_attribute_p ("thiscall", name))
5651 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5652 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5654 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5656 error ("stdcall and thiscall attributes are not compatible");
5658 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5660 error ("fastcall and thiscall attributes are not compatible");
5662 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5664 error ("cdecl and thiscall attributes are not compatible");
5668 /* Can combine sseregparm with all attributes. */
5673 /* The transactional memory builtins are implicitly regparm or fastcall
5674 depending on the ABI. Override the generic do-nothing attribute that
5675 these builtins were declared with, and replace it with one of the two
5676 attributes that we expect elsewhere. */
5679 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5680 int flags, bool *no_add_attrs)
5684 /* In no case do we want to add the placeholder attribute. */
5685 *no_add_attrs = true;
5687 /* The 64-bit ABI is unchanged for transactional memory. */
5691 /* ??? Is there a better way to validate 32-bit windows? We have
5692 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5693 if (CHECK_STACK_LIMIT > 0)
5694 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5697 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5698 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5700 decl_attributes (node, alt, flags);
5705 /* This function determines from TYPE the calling-convention. */
5708 ix86_get_callcvt (const_tree type)
5710 unsigned int ret = 0;
5715 return IX86_CALLCVT_CDECL;
5717 attrs = TYPE_ATTRIBUTES (type);
5718 if (attrs != NULL_TREE)
5720 if (lookup_attribute ("cdecl", attrs))
5721 ret |= IX86_CALLCVT_CDECL;
5722 else if (lookup_attribute ("stdcall", attrs))
5723 ret |= IX86_CALLCVT_STDCALL;
5724 else if (lookup_attribute ("fastcall", attrs))
5725 ret |= IX86_CALLCVT_FASTCALL;
5726 else if (lookup_attribute ("thiscall", attrs))
5727 ret |= IX86_CALLCVT_THISCALL;
5729 /* Regparam isn't allowed for thiscall and fastcall. */
5730 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5732 if (lookup_attribute ("regparm", attrs))
5733 ret |= IX86_CALLCVT_REGPARM;
5734 if (lookup_attribute ("sseregparm", attrs))
5735 ret |= IX86_CALLCVT_SSEREGPARM;
5738 if (IX86_BASE_CALLCVT(ret) != 0)
5742 is_stdarg = stdarg_p (type);
5743 if (TARGET_RTD && !is_stdarg)
5744 return IX86_CALLCVT_STDCALL | ret;
5748 || TREE_CODE (type) != METHOD_TYPE
5749 || ix86_function_type_abi (type) != MS_ABI)
5750 return IX86_CALLCVT_CDECL | ret;
5752 return IX86_CALLCVT_THISCALL;
5755 /* Return 0 if the attributes for two types are incompatible, 1 if they
5756 are compatible, and 2 if they are nearly compatible (which causes a
5757 warning to be generated). */
5760 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5762 unsigned int ccvt1, ccvt2;
5764 if (TREE_CODE (type1) != FUNCTION_TYPE
5765 && TREE_CODE (type1) != METHOD_TYPE)
5768 ccvt1 = ix86_get_callcvt (type1);
5769 ccvt2 = ix86_get_callcvt (type2);
5772 if (ix86_function_regparm (type1, NULL)
5773 != ix86_function_regparm (type2, NULL))
5779 /* Return the regparm value for a function with the indicated TYPE and DECL.
5780 DECL may be NULL when calling function indirectly
5781 or considering a libcall. */
5784 ix86_function_regparm (const_tree type, const_tree decl)
5791 return (ix86_function_type_abi (type) == SYSV_ABI
5792 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5793 ccvt = ix86_get_callcvt (type);
5794 regparm = ix86_regparm;
5796 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5798 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5801 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5805 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5807 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5810 /* Use register calling convention for local functions when possible. */
5812 && TREE_CODE (decl) == FUNCTION_DECL)
5814 cgraph_node *target = cgraph_node::get (decl);
5816 target = target->function_symbol ();
5818 /* Caller and callee must agree on the calling convention, so
5819 checking here just optimize means that with
5820 __attribute__((optimize (...))) caller could use regparm convention
5821 and callee not, or vice versa. Instead look at whether the callee
5822 is optimized or not. */
5823 if (target && opt_for_fn (target->decl, optimize)
5824 && !(profile_flag && !flag_fentry))
5826 cgraph_local_info *i = &target->local;
5827 if (i && i->local && i->can_change_signature)
5829 int local_regparm, globals = 0, regno;
5831 /* Make sure no regparm register is taken by a
5832 fixed register variable. */
5833 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5835 if (fixed_regs[local_regparm])
5838 /* We don't want to use regparm(3) for nested functions as
5839 these use a static chain pointer in the third argument. */
5840 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5843 /* Save a register for the split stack. */
5844 if (local_regparm == 3 && flag_split_stack)
5847 /* Each fixed register usage increases register pressure,
5848 so less registers should be used for argument passing.
5849 This functionality can be overriden by an explicit
5851 for (regno = AX_REG; regno <= DI_REG; regno++)
5852 if (fixed_regs[regno])
5856 = globals < local_regparm ? local_regparm - globals : 0;
5858 if (local_regparm > regparm)
5859 regparm = local_regparm;
5867 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5868 DFmode (2) arguments in SSE registers for a function with the
5869 indicated TYPE and DECL. DECL may be NULL when calling function
5870 indirectly or considering a libcall. Otherwise return 0. */
5873 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5875 gcc_assert (!TARGET_64BIT);
5877 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5878 by the sseregparm attribute. */
5879 if (TARGET_SSEREGPARM
5880 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5887 error ("calling %qD with attribute sseregparm without "
5888 "SSE/SSE2 enabled", decl);
5890 error ("calling %qT with attribute sseregparm without "
5891 "SSE/SSE2 enabled", type);
5902 cgraph_node *target = cgraph_node::get (decl);
5904 target = target->function_symbol ();
5906 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5907 (and DFmode for SSE2) arguments in SSE registers. */
5909 /* TARGET_SSE_MATH */
5910 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5911 && opt_for_fn (target->decl, optimize)
5912 && !(profile_flag && !flag_fentry))
5914 cgraph_local_info *i = &target->local;
5915 if (i && i->local && i->can_change_signature)
5917 /* Refuse to produce wrong code when local function with SSE enabled
5918 is called from SSE disabled function.
5919 We may work hard to work out these scenarios but hopefully
5920 it doesnot matter in practice. */
5921 if (!TARGET_SSE && warn)
5923 error ("calling %qD with SSE caling convention without "
5924 "SSE/SSE2 enabled", decl);
5927 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5928 ->x_ix86_isa_flags) ? 2 : 1;
5935 /* Return true if EAX is live at the start of the function. Used by
5936 ix86_expand_prologue to determine if we need special help before
5937 calling allocate_stack_worker. */
5940 ix86_eax_live_at_start_p (void)
5942 /* Cheat. Don't bother working forward from ix86_function_regparm
5943 to the function type to whether an actual argument is located in
5944 eax. Instead just look at cfg info, which is still close enough
5945 to correct at this point. This gives false positives for broken
5946 functions that might use uninitialized data that happens to be
5947 allocated in eax, but who cares? */
5948 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5952 ix86_keep_aggregate_return_pointer (tree fntype)
5958 attr = lookup_attribute ("callee_pop_aggregate_return",
5959 TYPE_ATTRIBUTES (fntype));
5961 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5963 /* For 32-bit MS-ABI the default is to keep aggregate
5965 if (ix86_function_type_abi (fntype) == MS_ABI)
5968 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5971 /* Value is the number of bytes of arguments automatically
5972 popped when returning from a subroutine call.
5973 FUNDECL is the declaration node of the function (as a tree),
5974 FUNTYPE is the data type of the function (as a tree),
5975 or for a library call it is an identifier node for the subroutine name.
5976 SIZE is the number of bytes of arguments passed on the stack.
5978 On the 80386, the RTD insn may be used to pop them if the number
5979 of args is fixed, but if the number is variable then the caller
5980 must pop them all. RTD can't be used for library calls now
5981 because the library is compiled with the Unix compiler.
5982 Use of RTD is a selectable option, since it is incompatible with
5983 standard Unix calling sequences. If the option is not selected,
5984 the caller must always pop the args.
5986 The attribute stdcall is equivalent to RTD on a per module basis. */
5989 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5993 /* None of the 64-bit ABIs pop arguments. */
5997 ccvt = ix86_get_callcvt (funtype);
5999 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6000 | IX86_CALLCVT_THISCALL)) != 0
6001 && ! stdarg_p (funtype))
6004 /* Lose any fake structure return argument if it is passed on the stack. */
6005 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6006 && !ix86_keep_aggregate_return_pointer (funtype))
6008 int nregs = ix86_function_regparm (funtype, fundecl);
6010 return GET_MODE_SIZE (Pmode);
6016 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6019 ix86_legitimate_combined_insn (rtx_insn *insn)
6021 /* Check operand constraints in case hard registers were propagated
6022 into insn pattern. This check prevents combine pass from
6023 generating insn patterns with invalid hard register operands.
6024 These invalid insns can eventually confuse reload to error out
6025 with a spill failure. See also PRs 46829 and 46843. */
6026 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6030 extract_insn (insn);
6031 preprocess_constraints (insn);
6033 int n_operands = recog_data.n_operands;
6034 int n_alternatives = recog_data.n_alternatives;
6035 for (i = 0; i < n_operands; i++)
6037 rtx op = recog_data.operand[i];
6038 machine_mode mode = GET_MODE (op);
6039 const operand_alternative *op_alt;
6044 /* For pre-AVX disallow unaligned loads/stores where the
6045 instructions don't support it. */
6047 && VECTOR_MODE_P (GET_MODE (op))
6048 && misaligned_operand (op, GET_MODE (op)))
6050 int min_align = get_attr_ssememalign (insn);
6055 /* A unary operator may be accepted by the predicate, but it
6056 is irrelevant for matching constraints. */
6060 if (GET_CODE (op) == SUBREG)
6062 if (REG_P (SUBREG_REG (op))
6063 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6064 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6065 GET_MODE (SUBREG_REG (op)),
6068 op = SUBREG_REG (op);
6071 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6074 op_alt = recog_op_alt;
6076 /* Operand has no constraints, anything is OK. */
6077 win = !n_alternatives;
6079 alternative_mask preferred = get_preferred_alternatives (insn);
6080 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6082 if (!TEST_BIT (preferred, j))
6084 if (op_alt[i].anything_ok
6085 || (op_alt[i].matches != -1
6087 (recog_data.operand[i],
6088 recog_data.operand[op_alt[i].matches]))
6089 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6104 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6106 static unsigned HOST_WIDE_INT
6107 ix86_asan_shadow_offset (void)
6109 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6110 : HOST_WIDE_INT_C (0x7fff8000))
6111 : (HOST_WIDE_INT_1 << 29);
6114 /* Argument support functions. */
6116 /* Return true when register may be used to pass function parameters. */
6118 ix86_function_arg_regno_p (int regno)
6121 const int *parm_regs;
6123 if (TARGET_MPX && BND_REGNO_P (regno))
6129 return (regno < REGPARM_MAX
6130 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6132 return (regno < REGPARM_MAX
6133 || (TARGET_MMX && MMX_REGNO_P (regno)
6134 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6135 || (TARGET_SSE && SSE_REGNO_P (regno)
6136 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6139 if (TARGET_SSE && SSE_REGNO_P (regno)
6140 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6143 /* TODO: The function should depend on current function ABI but
6144 builtins.c would need updating then. Therefore we use the
6147 /* RAX is used as hidden argument to va_arg functions. */
6148 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6151 if (ix86_abi == MS_ABI)
6152 parm_regs = x86_64_ms_abi_int_parameter_registers;
6154 parm_regs = x86_64_int_parameter_registers;
6155 for (i = 0; i < (ix86_abi == MS_ABI
6156 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6157 if (regno == parm_regs[i])
6162 /* Return if we do not know how to pass TYPE solely in registers. */
6165 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6167 if (must_pass_in_stack_var_size_or_pad (mode, type))
6170 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6171 The layout_type routine is crafty and tries to trick us into passing
6172 currently unsupported vector types on the stack by using TImode. */
6173 return (!TARGET_64BIT && mode == TImode
6174 && type && TREE_CODE (type) != VECTOR_TYPE);
6177 /* It returns the size, in bytes, of the area reserved for arguments passed
6178 in registers for the function represented by fndecl dependent to the used
6181 ix86_reg_parm_stack_space (const_tree fndecl)
6183 enum calling_abi call_abi = SYSV_ABI;
6184 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6185 call_abi = ix86_function_abi (fndecl);
6187 call_abi = ix86_function_type_abi (fndecl);
6188 if (TARGET_64BIT && call_abi == MS_ABI)
6193 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6196 ix86_function_type_abi (const_tree fntype)
6198 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6200 enum calling_abi abi = ix86_abi;
6201 if (abi == SYSV_ABI)
6203 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6207 static bool warned = false;
6210 error ("X32 does not support ms_abi attribute");
6217 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6224 /* We add this as a workaround in order to use libc_has_function
6227 ix86_libc_has_function (enum function_class fn_class)
6229 return targetm.libc_has_function (fn_class);
6233 ix86_function_ms_hook_prologue (const_tree fn)
6235 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6237 if (decl_function_context (fn) != NULL_TREE)
6238 error_at (DECL_SOURCE_LOCATION (fn),
6239 "ms_hook_prologue is not compatible with nested function");
6246 static enum calling_abi
6247 ix86_function_abi (const_tree fndecl)
6251 return ix86_function_type_abi (TREE_TYPE (fndecl));
6254 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6257 ix86_cfun_abi (void)
6261 return cfun->machine->call_abi;
6264 /* Write the extra assembler code needed to declare a function properly. */
6267 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6270 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6274 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6275 unsigned int filler_cc = 0xcccccccc;
6277 for (i = 0; i < filler_count; i += 4)
6278 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6281 #ifdef SUBTARGET_ASM_UNWIND_INIT
6282 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6285 ASM_OUTPUT_LABEL (asm_out_file, fname);
6287 /* Output magic byte marker, if hot-patch attribute is set. */
6292 /* leaq [%rsp + 0], %rsp */
6293 asm_fprintf (asm_out_file, ASM_BYTE
6294 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6298 /* movl.s %edi, %edi
6300 movl.s %esp, %ebp */
6301 asm_fprintf (asm_out_file, ASM_BYTE
6302 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6308 extern void init_regs (void);
6310 /* Implementation of call abi switching target hook. Specific to FNDECL
6311 the specific call register sets are set. See also
6312 ix86_conditional_register_usage for more details. */
6314 ix86_call_abi_override (const_tree fndecl)
6316 if (fndecl == NULL_TREE)
6317 cfun->machine->call_abi = ix86_abi;
6319 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6322 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6323 expensive re-initialization of init_regs each time we switch function context
6324 since this is needed only during RTL expansion. */
6326 ix86_maybe_switch_abi (void)
6329 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6333 /* Return 1 if pseudo register should be created and used to hold
6334 GOT address for PIC code. */
6336 ix86_use_pseudo_pic_reg (void)
6339 && (ix86_cmodel == CM_SMALL_PIC
6346 /* Initialize large model PIC register. */
6349 ix86_init_large_pic_reg (unsigned int tmp_regno)
6351 rtx_code_label *label;
6354 gcc_assert (Pmode == DImode);
6355 label = gen_label_rtx ();
6357 LABEL_PRESERVE_P (label) = 1;
6358 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6359 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6360 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6362 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6363 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6364 pic_offset_table_rtx, tmp_reg));
6367 /* Create and initialize PIC register if required. */
6369 ix86_init_pic_reg (void)
6374 if (!ix86_use_pseudo_pic_reg ())
6381 if (ix86_cmodel == CM_LARGE_PIC)
6382 ix86_init_large_pic_reg (R11_REG);
6384 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6388 /* If there is future mcount call in the function it is more profitable
6389 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6390 rtx reg = crtl->profile
6391 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6392 : pic_offset_table_rtx;
6393 rtx insn = emit_insn (gen_set_got (reg));
6394 RTX_FRAME_RELATED_P (insn) = 1;
6396 emit_move_insn (pic_offset_table_rtx, reg);
6397 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6403 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6404 insert_insn_on_edge (seq, entry_edge);
6405 commit_one_edge_insertion (entry_edge);
6408 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6409 for a call to a function whose data type is FNTYPE.
6410 For a library call, FNTYPE is 0. */
6413 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6414 tree fntype, /* tree ptr for function decl */
6415 rtx libname, /* SYMBOL_REF of library name or 0 */
6419 struct cgraph_local_info *i = NULL;
6420 struct cgraph_node *target = NULL;
6422 memset (cum, 0, sizeof (*cum));
6426 target = cgraph_node::get (fndecl);
6429 target = target->function_symbol ();
6430 i = cgraph_node::local_info (target->decl);
6431 cum->call_abi = ix86_function_abi (target->decl);
6434 cum->call_abi = ix86_function_abi (fndecl);
6437 cum->call_abi = ix86_function_type_abi (fntype);
6439 cum->caller = caller;
6441 /* Set up the number of registers to use for passing arguments. */
6442 cum->nregs = ix86_regparm;
6445 cum->nregs = (cum->call_abi == SYSV_ABI
6446 ? X86_64_REGPARM_MAX
6447 : X86_64_MS_REGPARM_MAX);
6451 cum->sse_nregs = SSE_REGPARM_MAX;
6454 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6455 ? X86_64_SSE_REGPARM_MAX
6456 : X86_64_MS_SSE_REGPARM_MAX);
6460 cum->mmx_nregs = MMX_REGPARM_MAX;
6461 cum->warn_avx512f = true;
6462 cum->warn_avx = true;
6463 cum->warn_sse = true;
6464 cum->warn_mmx = true;
6466 /* Because type might mismatch in between caller and callee, we need to
6467 use actual type of function for local calls.
6468 FIXME: cgraph_analyze can be told to actually record if function uses
6469 va_start so for local functions maybe_vaarg can be made aggressive
6471 FIXME: once typesytem is fixed, we won't need this code anymore. */
6472 if (i && i->local && i->can_change_signature)
6473 fntype = TREE_TYPE (target->decl);
6474 cum->stdarg = stdarg_p (fntype);
6475 cum->maybe_vaarg = (fntype
6476 ? (!prototype_p (fntype) || stdarg_p (fntype))
6479 cum->bnd_regno = FIRST_BND_REG;
6480 cum->bnds_in_bt = 0;
6481 cum->force_bnd_pass = 0;
6485 /* If there are variable arguments, then we won't pass anything
6486 in registers in 32-bit mode. */
6487 if (stdarg_p (fntype))
6492 cum->warn_avx512f = false;
6493 cum->warn_avx = false;
6494 cum->warn_sse = false;
6495 cum->warn_mmx = false;
6499 /* Use ecx and edx registers if function has fastcall attribute,
6500 else look for regparm information. */
6503 unsigned int ccvt = ix86_get_callcvt (fntype);
6504 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6507 cum->fastcall = 1; /* Same first register as in fastcall. */
6509 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6515 cum->nregs = ix86_function_regparm (fntype, fndecl);
6518 /* Set up the number of SSE registers used for passing SFmode
6519 and DFmode arguments. Warn for mismatching ABI. */
6520 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6524 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6525 But in the case of vector types, it is some vector mode.
6527 When we have only some of our vector isa extensions enabled, then there
6528 are some modes for which vector_mode_supported_p is false. For these
6529 modes, the generic vector support in gcc will choose some non-vector mode
6530 in order to implement the type. By computing the natural mode, we'll
6531 select the proper ABI location for the operand and not depend on whatever
6532 the middle-end decides to do with these vector types.
6534 The midde-end can't deal with the vector types > 16 bytes. In this
6535 case, we return the original mode and warn ABI change if CUM isn't
6538 If INT_RETURN is true, warn ABI change if the vector mode isn't
6539 available for function return value. */
6542 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6545 machine_mode mode = TYPE_MODE (type);
6547 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6549 HOST_WIDE_INT size = int_size_in_bytes (type);
6550 if ((size == 8 || size == 16 || size == 32 || size == 64)
6551 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6552 && TYPE_VECTOR_SUBPARTS (type) > 1)
6554 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6556 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6557 mode = MIN_MODE_VECTOR_FLOAT;
6559 mode = MIN_MODE_VECTOR_INT;
6561 /* Get the mode which has this inner mode and number of units. */
6562 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6563 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6564 && GET_MODE_INNER (mode) == innermode)
6566 if (size == 64 && !TARGET_AVX512F)
6568 static bool warnedavx512f;
6569 static bool warnedavx512f_ret;
6571 if (cum && cum->warn_avx512f && !warnedavx512f)
6573 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6574 "without AVX512F enabled changes the ABI"))
6575 warnedavx512f = true;
6577 else if (in_return && !warnedavx512f_ret)
6579 if (warning (OPT_Wpsabi, "AVX512F vector return "
6580 "without AVX512F enabled changes the ABI"))
6581 warnedavx512f_ret = true;
6584 return TYPE_MODE (type);
6586 else if (size == 32 && !TARGET_AVX)
6588 static bool warnedavx;
6589 static bool warnedavx_ret;
6591 if (cum && cum->warn_avx && !warnedavx)
6593 if (warning (OPT_Wpsabi, "AVX vector argument "
6594 "without AVX enabled changes the ABI"))
6597 else if (in_return && !warnedavx_ret)
6599 if (warning (OPT_Wpsabi, "AVX vector return "
6600 "without AVX enabled changes the ABI"))
6601 warnedavx_ret = true;
6604 return TYPE_MODE (type);
6606 else if (((size == 8 && TARGET_64BIT) || size == 16)
6609 static bool warnedsse;
6610 static bool warnedsse_ret;
6612 if (cum && cum->warn_sse && !warnedsse)
6614 if (warning (OPT_Wpsabi, "SSE vector argument "
6615 "without SSE enabled changes the ABI"))
6618 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6620 if (warning (OPT_Wpsabi, "SSE vector return "
6621 "without SSE enabled changes the ABI"))
6622 warnedsse_ret = true;
6625 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6627 static bool warnedmmx;
6628 static bool warnedmmx_ret;
6630 if (cum && cum->warn_mmx && !warnedmmx)
6632 if (warning (OPT_Wpsabi, "MMX vector argument "
6633 "without MMX enabled changes the ABI"))
6636 else if (in_return && !warnedmmx_ret)
6638 if (warning (OPT_Wpsabi, "MMX vector return "
6639 "without MMX enabled changes the ABI"))
6640 warnedmmx_ret = true;
6653 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6654 this may not agree with the mode that the type system has chosen for the
6655 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6656 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6659 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6664 if (orig_mode != BLKmode)
6665 tmp = gen_rtx_REG (orig_mode, regno);
6668 tmp = gen_rtx_REG (mode, regno);
6669 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6670 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6676 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6677 of this code is to classify each 8bytes of incoming argument by the register
6678 class and assign registers accordingly. */
6680 /* Return the union class of CLASS1 and CLASS2.
6681 See the x86-64 PS ABI for details. */
6683 static enum x86_64_reg_class
6684 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6686 /* Rule #1: If both classes are equal, this is the resulting class. */
6687 if (class1 == class2)
6690 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6692 if (class1 == X86_64_NO_CLASS)
6694 if (class2 == X86_64_NO_CLASS)
6697 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6698 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6699 return X86_64_MEMORY_CLASS;
6701 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6702 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6703 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6704 return X86_64_INTEGERSI_CLASS;
6705 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6706 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6707 return X86_64_INTEGER_CLASS;
6709 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6711 if (class1 == X86_64_X87_CLASS
6712 || class1 == X86_64_X87UP_CLASS
6713 || class1 == X86_64_COMPLEX_X87_CLASS
6714 || class2 == X86_64_X87_CLASS
6715 || class2 == X86_64_X87UP_CLASS
6716 || class2 == X86_64_COMPLEX_X87_CLASS)
6717 return X86_64_MEMORY_CLASS;
6719 /* Rule #6: Otherwise class SSE is used. */
6720 return X86_64_SSE_CLASS;
6723 /* Classify the argument of type TYPE and mode MODE.
6724 CLASSES will be filled by the register class used to pass each word
6725 of the operand. The number of words is returned. In case the parameter
6726 should be passed in memory, 0 is returned. As a special case for zero
6727 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6729 BIT_OFFSET is used internally for handling records and specifies offset
6730 of the offset in bits modulo 512 to avoid overflow cases.
6732 See the x86-64 PS ABI for details.
6736 classify_argument (machine_mode mode, const_tree type,
6737 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6739 HOST_WIDE_INT bytes =
6740 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6742 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744 /* Variable sized entities are always passed/returned in memory. */
6748 if (mode != VOIDmode
6749 && targetm.calls.must_pass_in_stack (mode, type))
6752 if (type && AGGREGATE_TYPE_P (type))
6756 enum x86_64_reg_class subclasses[MAX_CLASSES];
6758 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6762 for (i = 0; i < words; i++)
6763 classes[i] = X86_64_NO_CLASS;
6765 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6766 signalize memory class, so handle it as special case. */
6769 classes[0] = X86_64_NO_CLASS;
6773 /* Classify each field of record and merge classes. */
6774 switch (TREE_CODE (type))
6777 /* And now merge the fields of structure. */
6778 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6780 if (TREE_CODE (field) == FIELD_DECL)
6784 if (TREE_TYPE (field) == error_mark_node)
6787 /* Bitfields are always classified as integer. Handle them
6788 early, since later code would consider them to be
6789 misaligned integers. */
6790 if (DECL_BIT_FIELD (field))
6792 for (i = (int_bit_position (field)
6793 + (bit_offset % 64)) / 8 / 8;
6794 i < ((int_bit_position (field) + (bit_offset % 64))
6795 + tree_to_shwi (DECL_SIZE (field))
6798 merge_classes (X86_64_INTEGER_CLASS,
6805 type = TREE_TYPE (field);
6807 /* Flexible array member is ignored. */
6808 if (TYPE_MODE (type) == BLKmode
6809 && TREE_CODE (type) == ARRAY_TYPE
6810 && TYPE_SIZE (type) == NULL_TREE
6811 && TYPE_DOMAIN (type) != NULL_TREE
6812 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6817 if (!warned && warn_psabi)
6820 inform (input_location,
6821 "the ABI of passing struct with"
6822 " a flexible array member has"
6823 " changed in GCC 4.4");
6827 num = classify_argument (TYPE_MODE (type), type,
6829 (int_bit_position (field)
6830 + bit_offset) % 512);
6833 pos = (int_bit_position (field)
6834 + (bit_offset % 64)) / 8 / 8;
6835 for (i = 0; i < num && (i + pos) < words; i++)
6837 merge_classes (subclasses[i], classes[i + pos]);
6844 /* Arrays are handled as small records. */
6847 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6848 TREE_TYPE (type), subclasses, bit_offset);
6852 /* The partial classes are now full classes. */
6853 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6854 subclasses[0] = X86_64_SSE_CLASS;
6855 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6856 && !((bit_offset % 64) == 0 && bytes == 4))
6857 subclasses[0] = X86_64_INTEGER_CLASS;
6859 for (i = 0; i < words; i++)
6860 classes[i] = subclasses[i % num];
6865 case QUAL_UNION_TYPE:
6866 /* Unions are similar to RECORD_TYPE but offset is always 0.
6868 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6870 if (TREE_CODE (field) == FIELD_DECL)
6874 if (TREE_TYPE (field) == error_mark_node)
6877 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6878 TREE_TYPE (field), subclasses,
6882 for (i = 0; i < num && i < words; i++)
6883 classes[i] = merge_classes (subclasses[i], classes[i]);
6894 /* When size > 16 bytes, if the first one isn't
6895 X86_64_SSE_CLASS or any other ones aren't
6896 X86_64_SSEUP_CLASS, everything should be passed in
6898 if (classes[0] != X86_64_SSE_CLASS)
6901 for (i = 1; i < words; i++)
6902 if (classes[i] != X86_64_SSEUP_CLASS)
6906 /* Final merger cleanup. */
6907 for (i = 0; i < words; i++)
6909 /* If one class is MEMORY, everything should be passed in
6911 if (classes[i] == X86_64_MEMORY_CLASS)
6914 /* The X86_64_SSEUP_CLASS should be always preceded by
6915 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6916 if (classes[i] == X86_64_SSEUP_CLASS
6917 && classes[i - 1] != X86_64_SSE_CLASS
6918 && classes[i - 1] != X86_64_SSEUP_CLASS)
6920 /* The first one should never be X86_64_SSEUP_CLASS. */
6921 gcc_assert (i != 0);
6922 classes[i] = X86_64_SSE_CLASS;
6925 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6926 everything should be passed in memory. */
6927 if (classes[i] == X86_64_X87UP_CLASS
6928 && (classes[i - 1] != X86_64_X87_CLASS))
6932 /* The first one should never be X86_64_X87UP_CLASS. */
6933 gcc_assert (i != 0);
6934 if (!warned && warn_psabi)
6937 inform (input_location,
6938 "the ABI of passing union with long double"
6939 " has changed in GCC 4.4");
6947 /* Compute alignment needed. We align all types to natural boundaries with
6948 exception of XFmode that is aligned to 64bits. */
6949 if (mode != VOIDmode && mode != BLKmode)
6951 int mode_alignment = GET_MODE_BITSIZE (mode);
6954 mode_alignment = 128;
6955 else if (mode == XCmode)
6956 mode_alignment = 256;
6957 if (COMPLEX_MODE_P (mode))
6958 mode_alignment /= 2;
6959 /* Misaligned fields are always returned in memory. */
6960 if (bit_offset % mode_alignment)
6964 /* for V1xx modes, just use the base mode */
6965 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6966 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6967 mode = GET_MODE_INNER (mode);
6969 /* Classification of atomic types. */
6974 classes[0] = X86_64_SSE_CLASS;
6977 classes[0] = X86_64_SSE_CLASS;
6978 classes[1] = X86_64_SSEUP_CLASS;
6988 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6990 /* Analyze last 128 bits only. */
6991 size = (size - 1) & 0x7f;
6995 classes[0] = X86_64_INTEGERSI_CLASS;
7000 classes[0] = X86_64_INTEGER_CLASS;
7003 else if (size < 64+32)
7005 classes[0] = X86_64_INTEGER_CLASS;
7006 classes[1] = X86_64_INTEGERSI_CLASS;
7009 else if (size < 64+64)
7011 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7019 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7023 /* OImode shouldn't be used directly. */
7028 if (!(bit_offset % 64))
7029 classes[0] = X86_64_SSESF_CLASS;
7031 classes[0] = X86_64_SSE_CLASS;
7034 classes[0] = X86_64_SSEDF_CLASS;
7037 classes[0] = X86_64_X87_CLASS;
7038 classes[1] = X86_64_X87UP_CLASS;
7041 classes[0] = X86_64_SSE_CLASS;
7042 classes[1] = X86_64_SSEUP_CLASS;
7045 classes[0] = X86_64_SSE_CLASS;
7046 if (!(bit_offset % 64))
7052 if (!warned && warn_psabi)
7055 inform (input_location,
7056 "the ABI of passing structure with complex float"
7057 " member has changed in GCC 4.4");
7059 classes[1] = X86_64_SSESF_CLASS;
7063 classes[0] = X86_64_SSEDF_CLASS;
7064 classes[1] = X86_64_SSEDF_CLASS;
7067 classes[0] = X86_64_COMPLEX_X87_CLASS;
7070 /* This modes is larger than 16 bytes. */
7078 classes[0] = X86_64_SSE_CLASS;
7079 classes[1] = X86_64_SSEUP_CLASS;
7080 classes[2] = X86_64_SSEUP_CLASS;
7081 classes[3] = X86_64_SSEUP_CLASS;
7089 classes[0] = X86_64_SSE_CLASS;
7090 classes[1] = X86_64_SSEUP_CLASS;
7091 classes[2] = X86_64_SSEUP_CLASS;
7092 classes[3] = X86_64_SSEUP_CLASS;
7093 classes[4] = X86_64_SSEUP_CLASS;
7094 classes[5] = X86_64_SSEUP_CLASS;
7095 classes[6] = X86_64_SSEUP_CLASS;
7096 classes[7] = X86_64_SSEUP_CLASS;
7104 classes[0] = X86_64_SSE_CLASS;
7105 classes[1] = X86_64_SSEUP_CLASS;
7113 classes[0] = X86_64_SSE_CLASS;
7119 gcc_assert (VECTOR_MODE_P (mode));
7124 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7126 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7127 classes[0] = X86_64_INTEGERSI_CLASS;
7129 classes[0] = X86_64_INTEGER_CLASS;
7130 classes[1] = X86_64_INTEGER_CLASS;
7131 return 1 + (bytes > 8);
7135 /* Examine the argument and return set number of register required in each
7136 class. Return true iff parameter should be passed in memory. */
7139 examine_argument (machine_mode mode, const_tree type, int in_return,
7140 int *int_nregs, int *sse_nregs)
7142 enum x86_64_reg_class regclass[MAX_CLASSES];
7143 int n = classify_argument (mode, type, regclass, 0);
7150 for (n--; n >= 0; n--)
7151 switch (regclass[n])
7153 case X86_64_INTEGER_CLASS:
7154 case X86_64_INTEGERSI_CLASS:
7157 case X86_64_SSE_CLASS:
7158 case X86_64_SSESF_CLASS:
7159 case X86_64_SSEDF_CLASS:
7162 case X86_64_NO_CLASS:
7163 case X86_64_SSEUP_CLASS:
7165 case X86_64_X87_CLASS:
7166 case X86_64_X87UP_CLASS:
7167 case X86_64_COMPLEX_X87_CLASS:
7171 case X86_64_MEMORY_CLASS:
7178 /* Construct container for the argument used by GCC interface. See
7179 FUNCTION_ARG for the detailed description. */
7182 construct_container (machine_mode mode, machine_mode orig_mode,
7183 const_tree type, int in_return, int nintregs, int nsseregs,
7184 const int *intreg, int sse_regno)
7186 /* The following variables hold the static issued_error state. */
7187 static bool issued_sse_arg_error;
7188 static bool issued_sse_ret_error;
7189 static bool issued_x87_ret_error;
7191 machine_mode tmpmode;
7193 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7194 enum x86_64_reg_class regclass[MAX_CLASSES];
7198 int needed_sseregs, needed_intregs;
7199 rtx exp[MAX_CLASSES];
7202 n = classify_argument (mode, type, regclass, 0);
7205 if (examine_argument (mode, type, in_return, &needed_intregs,
7208 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7211 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7212 some less clueful developer tries to use floating-point anyway. */
7213 if (needed_sseregs && !TARGET_SSE)
7217 if (!issued_sse_ret_error)
7219 error ("SSE register return with SSE disabled");
7220 issued_sse_ret_error = true;
7223 else if (!issued_sse_arg_error)
7225 error ("SSE register argument with SSE disabled");
7226 issued_sse_arg_error = true;
7231 /* Likewise, error if the ABI requires us to return values in the
7232 x87 registers and the user specified -mno-80387. */
7233 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7234 for (i = 0; i < n; i++)
7235 if (regclass[i] == X86_64_X87_CLASS
7236 || regclass[i] == X86_64_X87UP_CLASS
7237 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7239 if (!issued_x87_ret_error)
7241 error ("x87 register return with x87 disabled");
7242 issued_x87_ret_error = true;
7247 /* First construct simple cases. Avoid SCmode, since we want to use
7248 single register to pass this type. */
7249 if (n == 1 && mode != SCmode)
7250 switch (regclass[0])
7252 case X86_64_INTEGER_CLASS:
7253 case X86_64_INTEGERSI_CLASS:
7254 return gen_rtx_REG (mode, intreg[0]);
7255 case X86_64_SSE_CLASS:
7256 case X86_64_SSESF_CLASS:
7257 case X86_64_SSEDF_CLASS:
7258 if (mode != BLKmode)
7259 return gen_reg_or_parallel (mode, orig_mode,
7260 SSE_REGNO (sse_regno));
7262 case X86_64_X87_CLASS:
7263 case X86_64_COMPLEX_X87_CLASS:
7264 return gen_rtx_REG (mode, FIRST_STACK_REG);
7265 case X86_64_NO_CLASS:
7266 /* Zero sized array, struct or class. */
7272 && regclass[0] == X86_64_SSE_CLASS
7273 && regclass[1] == X86_64_SSEUP_CLASS
7275 return gen_reg_or_parallel (mode, orig_mode,
7276 SSE_REGNO (sse_regno));
7278 && regclass[0] == X86_64_SSE_CLASS
7279 && regclass[1] == X86_64_SSEUP_CLASS
7280 && regclass[2] == X86_64_SSEUP_CLASS
7281 && regclass[3] == X86_64_SSEUP_CLASS
7283 return gen_reg_or_parallel (mode, orig_mode,
7284 SSE_REGNO (sse_regno));
7286 && regclass[0] == X86_64_SSE_CLASS
7287 && regclass[1] == X86_64_SSEUP_CLASS
7288 && regclass[2] == X86_64_SSEUP_CLASS
7289 && regclass[3] == X86_64_SSEUP_CLASS
7290 && regclass[4] == X86_64_SSEUP_CLASS
7291 && regclass[5] == X86_64_SSEUP_CLASS
7292 && regclass[6] == X86_64_SSEUP_CLASS
7293 && regclass[7] == X86_64_SSEUP_CLASS
7295 return gen_reg_or_parallel (mode, orig_mode,
7296 SSE_REGNO (sse_regno));
7298 && regclass[0] == X86_64_X87_CLASS
7299 && regclass[1] == X86_64_X87UP_CLASS)
7300 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7303 && regclass[0] == X86_64_INTEGER_CLASS
7304 && regclass[1] == X86_64_INTEGER_CLASS
7305 && (mode == CDImode || mode == TImode)
7306 && intreg[0] + 1 == intreg[1])
7307 return gen_rtx_REG (mode, intreg[0]);
7309 /* Otherwise figure out the entries of the PARALLEL. */
7310 for (i = 0; i < n; i++)
7314 switch (regclass[i])
7316 case X86_64_NO_CLASS:
7318 case X86_64_INTEGER_CLASS:
7319 case X86_64_INTEGERSI_CLASS:
7320 /* Merge TImodes on aligned occasions here too. */
7321 if (i * 8 + 8 > bytes)
7323 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7324 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7328 /* We've requested 24 bytes we
7329 don't have mode for. Use DImode. */
7330 if (tmpmode == BLKmode)
7333 = gen_rtx_EXPR_LIST (VOIDmode,
7334 gen_rtx_REG (tmpmode, *intreg),
7338 case X86_64_SSESF_CLASS:
7340 = gen_rtx_EXPR_LIST (VOIDmode,
7341 gen_rtx_REG (SFmode,
7342 SSE_REGNO (sse_regno)),
7346 case X86_64_SSEDF_CLASS:
7348 = gen_rtx_EXPR_LIST (VOIDmode,
7349 gen_rtx_REG (DFmode,
7350 SSE_REGNO (sse_regno)),
7354 case X86_64_SSE_CLASS:
7362 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7372 && regclass[1] == X86_64_SSEUP_CLASS
7373 && regclass[2] == X86_64_SSEUP_CLASS
7374 && regclass[3] == X86_64_SSEUP_CLASS);
7380 && regclass[1] == X86_64_SSEUP_CLASS
7381 && regclass[2] == X86_64_SSEUP_CLASS
7382 && regclass[3] == X86_64_SSEUP_CLASS
7383 && regclass[4] == X86_64_SSEUP_CLASS
7384 && regclass[5] == X86_64_SSEUP_CLASS
7385 && regclass[6] == X86_64_SSEUP_CLASS
7386 && regclass[7] == X86_64_SSEUP_CLASS);
7394 = gen_rtx_EXPR_LIST (VOIDmode,
7395 gen_rtx_REG (tmpmode,
7396 SSE_REGNO (sse_regno)),
7405 /* Empty aligned struct, union or class. */
7409 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7410 for (i = 0; i < nexps; i++)
7411 XVECEXP (ret, 0, i) = exp [i];
7415 /* Update the data in CUM to advance over an argument of mode MODE
7416 and data type TYPE. (TYPE is null for libcalls where that information
7417 may not be available.)
7419 Return a number of integer regsiters advanced over. */
7422 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7423 const_tree type, HOST_WIDE_INT bytes,
7424 HOST_WIDE_INT words)
7442 cum->words += words;
7443 cum->nregs -= words;
7444 cum->regno += words;
7445 if (cum->nregs >= 0)
7447 if (cum->nregs <= 0)
7455 /* OImode shouldn't be used directly. */
7459 if (cum->float_in_sse < 2)
7462 if (cum->float_in_sse < 1)
7485 if (!type || !AGGREGATE_TYPE_P (type))
7487 cum->sse_words += words;
7488 cum->sse_nregs -= 1;
7489 cum->sse_regno += 1;
7490 if (cum->sse_nregs <= 0)
7504 if (!type || !AGGREGATE_TYPE_P (type))
7506 cum->mmx_words += words;
7507 cum->mmx_nregs -= 1;
7508 cum->mmx_regno += 1;
7509 if (cum->mmx_nregs <= 0)
7522 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7523 const_tree type, HOST_WIDE_INT words, bool named)
7525 int int_nregs, sse_nregs;
7527 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7528 if (!named && (VALID_AVX512F_REG_MODE (mode)
7529 || VALID_AVX256_REG_MODE (mode)))
7532 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7533 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7535 cum->nregs -= int_nregs;
7536 cum->sse_nregs -= sse_nregs;
7537 cum->regno += int_nregs;
7538 cum->sse_regno += sse_nregs;
7543 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7544 cum->words = (cum->words + align - 1) & ~(align - 1);
7545 cum->words += words;
7551 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7552 HOST_WIDE_INT words)
7554 /* Otherwise, this should be passed indirect. */
7555 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7557 cum->words += words;
7567 /* Update the data in CUM to advance over an argument of mode MODE and
7568 data type TYPE. (TYPE is null for libcalls where that information
7569 may not be available.) */
7572 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7573 const_tree type, bool named)
7575 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7576 HOST_WIDE_INT bytes, words;
7579 if (mode == BLKmode)
7580 bytes = int_size_in_bytes (type);
7582 bytes = GET_MODE_SIZE (mode);
7583 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7586 mode = type_natural_mode (type, NULL, false);
7588 if ((type && POINTER_BOUNDS_TYPE_P (type))
7589 || POINTER_BOUNDS_MODE_P (mode))
7591 /* If we pass bounds in BT then just update remained bounds count. */
7592 if (cum->bnds_in_bt)
7598 /* Update remained number of bounds to force. */
7599 if (cum->force_bnd_pass)
7600 cum->force_bnd_pass--;
7607 /* The first arg not going to Bounds Tables resets this counter. */
7608 cum->bnds_in_bt = 0;
7609 /* For unnamed args we always pass bounds to avoid bounds mess when
7610 passed and received types do not match. If bounds do not follow
7611 unnamed arg, still pretend required number of bounds were passed. */
7612 if (cum->force_bnd_pass)
7614 cum->bnd_regno += cum->force_bnd_pass;
7615 cum->force_bnd_pass = 0;
7618 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7619 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7620 else if (TARGET_64BIT)
7621 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7623 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7625 /* For stdarg we expect bounds to be passed for each value passed
7628 cum->force_bnd_pass = nregs;
7629 /* For pointers passed in memory we expect bounds passed in Bounds
7632 cum->bnds_in_bt = chkp_type_bounds_count (type);
7635 /* Define where to put the arguments to a function.
7636 Value is zero to push the argument on the stack,
7637 or a hard register in which to store the argument.
7639 MODE is the argument's machine mode.
7640 TYPE is the data type of the argument (as a tree).
7641 This is null for libcalls where that information may
7643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7644 the preceding args and about the function being called.
7645 NAMED is nonzero if this argument is a named parameter
7646 (otherwise it is an extra parameter matching an ellipsis). */
7649 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7650 machine_mode orig_mode, const_tree type,
7651 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7653 /* Avoid the AL settings for the Unix64 ABI. */
7654 if (mode == VOIDmode)
7670 if (words <= cum->nregs)
7672 int regno = cum->regno;
7674 /* Fastcall allocates the first two DWORD (SImode) or
7675 smaller arguments to ECX and EDX if it isn't an
7681 || (type && AGGREGATE_TYPE_P (type)))
7684 /* ECX not EAX is the first allocated register. */
7685 if (regno == AX_REG)
7688 return gen_rtx_REG (mode, regno);
7693 if (cum->float_in_sse < 2)
7696 if (cum->float_in_sse < 1)
7700 /* In 32bit, we pass TImode in xmm registers. */
7707 if (!type || !AGGREGATE_TYPE_P (type))
7710 return gen_reg_or_parallel (mode, orig_mode,
7711 cum->sse_regno + FIRST_SSE_REG);
7717 /* OImode and XImode shouldn't be used directly. */
7732 if (!type || !AGGREGATE_TYPE_P (type))
7735 return gen_reg_or_parallel (mode, orig_mode,
7736 cum->sse_regno + FIRST_SSE_REG);
7746 if (!type || !AGGREGATE_TYPE_P (type))
7749 return gen_reg_or_parallel (mode, orig_mode,
7750 cum->mmx_regno + FIRST_MMX_REG);
7759 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7760 machine_mode orig_mode, const_tree type, bool named)
7762 /* Handle a hidden AL argument containing number of registers
7763 for varargs x86-64 functions. */
7764 if (mode == VOIDmode)
7765 return GEN_INT (cum->maybe_vaarg
7766 ? (cum->sse_nregs < 0
7767 ? X86_64_SSE_REGPARM_MAX
7788 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7794 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7796 &x86_64_int_parameter_registers [cum->regno],
7801 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7802 machine_mode orig_mode, bool named,
7803 HOST_WIDE_INT bytes)
7807 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7808 We use value of -2 to specify that current function call is MSABI. */
7809 if (mode == VOIDmode)
7810 return GEN_INT (-2);
7812 /* If we've run out of registers, it goes on the stack. */
7813 if (cum->nregs == 0)
7816 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7818 /* Only floating point modes are passed in anything but integer regs. */
7819 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7822 regno = cum->regno + FIRST_SSE_REG;
7827 /* Unnamed floating parameters are passed in both the
7828 SSE and integer registers. */
7829 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7830 t2 = gen_rtx_REG (mode, regno);
7831 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7832 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7833 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7836 /* Handle aggregated types passed in register. */
7837 if (orig_mode == BLKmode)
7839 if (bytes > 0 && bytes <= 8)
7840 mode = (bytes > 4 ? DImode : SImode);
7841 if (mode == BLKmode)
7845 return gen_reg_or_parallel (mode, orig_mode, regno);
7848 /* Return where to put the arguments to a function.
7849 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7851 MODE is the argument's machine mode. TYPE is the data type of the
7852 argument. It is null for libcalls where that information may not be
7853 available. CUM gives information about the preceding args and about
7854 the function being called. NAMED is nonzero if this argument is a
7855 named parameter (otherwise it is an extra parameter matching an
7859 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7860 const_tree type, bool named)
7862 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7863 machine_mode mode = omode;
7864 HOST_WIDE_INT bytes, words;
7867 /* All pointer bounds argumntas are handled separately here. */
7868 if ((type && POINTER_BOUNDS_TYPE_P (type))
7869 || POINTER_BOUNDS_MODE_P (mode))
7871 /* Return NULL if bounds are forced to go in Bounds Table. */
7872 if (cum->bnds_in_bt)
7874 /* Return the next available bound reg if any. */
7875 else if (cum->bnd_regno <= LAST_BND_REG)
7876 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7877 /* Return the next special slot number otherwise. */
7879 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7884 if (mode == BLKmode)
7885 bytes = int_size_in_bytes (type);
7887 bytes = GET_MODE_SIZE (mode);
7888 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7890 /* To simplify the code below, represent vector types with a vector mode
7891 even if MMX/SSE are not active. */
7892 if (type && TREE_CODE (type) == VECTOR_TYPE)
7893 mode = type_natural_mode (type, cum, false);
7895 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7896 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7897 else if (TARGET_64BIT)
7898 arg = function_arg_64 (cum, mode, omode, type, named);
7900 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7905 /* A C expression that indicates when an argument must be passed by
7906 reference. If nonzero for an argument, a copy of that argument is
7907 made in memory and a pointer to the argument is passed instead of
7908 the argument itself. The pointer is passed in whatever way is
7909 appropriate for passing a pointer to that type. */
7912 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7913 const_tree type, bool)
7915 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7917 /* Bounds are never passed by reference. */
7918 if ((type && POINTER_BOUNDS_TYPE_P (type))
7919 || POINTER_BOUNDS_MODE_P (mode))
7922 /* See Windows x64 Software Convention. */
7923 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7925 int msize = (int) GET_MODE_SIZE (mode);
7928 /* Arrays are passed by reference. */
7929 if (TREE_CODE (type) == ARRAY_TYPE)
7932 if (AGGREGATE_TYPE_P (type))
7934 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7935 are passed by reference. */
7936 msize = int_size_in_bytes (type);
7940 /* __m128 is passed by reference. */
7942 case 1: case 2: case 4: case 8:
7948 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7954 /* Return true when TYPE should be 128bit aligned for 32bit argument
7955 passing ABI. XXX: This function is obsolete and is only used for
7956 checking psABI compatibility with previous versions of GCC. */
7959 ix86_compat_aligned_value_p (const_tree type)
7961 machine_mode mode = TYPE_MODE (type);
7962 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7966 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7968 if (TYPE_ALIGN (type) < 128)
7971 if (AGGREGATE_TYPE_P (type))
7973 /* Walk the aggregates recursively. */
7974 switch (TREE_CODE (type))
7978 case QUAL_UNION_TYPE:
7982 /* Walk all the structure fields. */
7983 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7985 if (TREE_CODE (field) == FIELD_DECL
7986 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7993 /* Just for use if some languages passes arrays by value. */
7994 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8005 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8006 XXX: This function is obsolete and is only used for checking psABI
8007 compatibility with previous versions of GCC. */
8010 ix86_compat_function_arg_boundary (machine_mode mode,
8011 const_tree type, unsigned int align)
8013 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8014 natural boundaries. */
8015 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8017 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8018 make an exception for SSE modes since these require 128bit
8021 The handling here differs from field_alignment. ICC aligns MMX
8022 arguments to 4 byte boundaries, while structure fields are aligned
8023 to 8 byte boundaries. */
8026 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8027 align = PARM_BOUNDARY;
8031 if (!ix86_compat_aligned_value_p (type))
8032 align = PARM_BOUNDARY;
8035 if (align > BIGGEST_ALIGNMENT)
8036 align = BIGGEST_ALIGNMENT;
8040 /* Return true when TYPE should be 128bit aligned for 32bit argument
8044 ix86_contains_aligned_value_p (const_tree type)
8046 machine_mode mode = TYPE_MODE (type);
8048 if (mode == XFmode || mode == XCmode)
8051 if (TYPE_ALIGN (type) < 128)
8054 if (AGGREGATE_TYPE_P (type))
8056 /* Walk the aggregates recursively. */
8057 switch (TREE_CODE (type))
8061 case QUAL_UNION_TYPE:
8065 /* Walk all the structure fields. */
8066 for (field = TYPE_FIELDS (type);
8068 field = DECL_CHAIN (field))
8070 if (TREE_CODE (field) == FIELD_DECL
8071 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8078 /* Just for use if some languages passes arrays by value. */
8079 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8088 return TYPE_ALIGN (type) >= 128;
8093 /* Gives the alignment boundary, in bits, of an argument with the
8094 specified mode and type. */
8097 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8102 /* Since the main variant type is used for call, we convert it to
8103 the main variant type. */
8104 type = TYPE_MAIN_VARIANT (type);
8105 align = TYPE_ALIGN (type);
8108 align = GET_MODE_ALIGNMENT (mode);
8109 if (align < PARM_BOUNDARY)
8110 align = PARM_BOUNDARY;
8114 unsigned int saved_align = align;
8118 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8121 if (mode == XFmode || mode == XCmode)
8122 align = PARM_BOUNDARY;
8124 else if (!ix86_contains_aligned_value_p (type))
8125 align = PARM_BOUNDARY;
8128 align = PARM_BOUNDARY;
8133 && align != ix86_compat_function_arg_boundary (mode, type,
8137 inform (input_location,
8138 "The ABI for passing parameters with %d-byte"
8139 " alignment has changed in GCC 4.6",
8140 align / BITS_PER_UNIT);
8147 /* Return true if N is a possible register number of function value. */
8150 ix86_function_value_regno_p (const unsigned int regno)
8157 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8160 return TARGET_64BIT && ix86_abi != MS_ABI;
8163 return chkp_function_instrumented_p (current_function_decl);
8165 /* Complex values are returned in %st(0)/%st(1) pair. */
8168 /* TODO: The function should depend on current function ABI but
8169 builtins.c would need updating then. Therefore we use the
8171 if (TARGET_64BIT && ix86_abi == MS_ABI)
8173 return TARGET_FLOAT_RETURNS_IN_80387;
8175 /* Complex values are returned in %xmm0/%xmm1 pair. */
8181 if (TARGET_MACHO || TARGET_64BIT)
8189 /* Define how to find the value returned by a function.
8190 VALTYPE is the data type of the value (as a tree).
8191 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8192 otherwise, FUNC is 0. */
8195 function_value_32 (machine_mode orig_mode, machine_mode mode,
8196 const_tree fntype, const_tree fn)
8200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8201 we normally prevent this case when mmx is not available. However
8202 some ABIs may require the result to be returned like DImode. */
8203 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8204 regno = FIRST_MMX_REG;
8206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8207 we prevent this case when sse is not available. However some ABIs
8208 may require the result to be returned like integer TImode. */
8209 else if (mode == TImode
8210 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8211 regno = FIRST_SSE_REG;
8213 /* 32-byte vector modes in %ymm0. */
8214 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8215 regno = FIRST_SSE_REG;
8217 /* 64-byte vector modes in %zmm0. */
8218 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8219 regno = FIRST_SSE_REG;
8221 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8222 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8223 regno = FIRST_FLOAT_REG;
8225 /* Most things go in %eax. */
8228 /* Override FP return register with %xmm0 for local functions when
8229 SSE math is enabled or for functions with sseregparm attribute. */
8230 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8232 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8233 if ((sse_level >= 1 && mode == SFmode)
8234 || (sse_level == 2 && mode == DFmode))
8235 regno = FIRST_SSE_REG;
8238 /* OImode shouldn't be used directly. */
8239 gcc_assert (mode != OImode);
8241 return gen_rtx_REG (orig_mode, regno);
8245 function_value_64 (machine_mode orig_mode, machine_mode mode,
8250 /* Handle libcalls, which don't provide a type node. */
8251 if (valtype == NULL)
8265 regno = FIRST_SSE_REG;
8269 regno = FIRST_FLOAT_REG;
8277 return gen_rtx_REG (mode, regno);
8279 else if (POINTER_TYPE_P (valtype))
8281 /* Pointers are always returned in word_mode. */
8285 ret = construct_container (mode, orig_mode, valtype, 1,
8286 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8287 x86_64_int_return_registers, 0);
8289 /* For zero sized structures, construct_container returns NULL, but we
8290 need to keep rest of compiler happy by returning meaningful value. */
8292 ret = gen_rtx_REG (orig_mode, AX_REG);
8298 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8301 unsigned int regno = AX_REG;
8305 switch (GET_MODE_SIZE (mode))
8308 if (valtype != NULL_TREE
8309 && !VECTOR_INTEGER_TYPE_P (valtype)
8310 && !VECTOR_INTEGER_TYPE_P (valtype)
8311 && !INTEGRAL_TYPE_P (valtype)
8312 && !VECTOR_FLOAT_TYPE_P (valtype))
8314 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8315 && !COMPLEX_MODE_P (mode))
8316 regno = FIRST_SSE_REG;
8320 if (mode == SFmode || mode == DFmode)
8321 regno = FIRST_SSE_REG;
8327 return gen_rtx_REG (orig_mode, regno);
8331 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8332 machine_mode orig_mode, machine_mode mode)
8334 const_tree fn, fntype;
8337 if (fntype_or_decl && DECL_P (fntype_or_decl))
8338 fn = fntype_or_decl;
8339 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8341 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8342 || POINTER_BOUNDS_MODE_P (mode))
8343 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8344 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8345 return function_value_ms_64 (orig_mode, mode, valtype);
8346 else if (TARGET_64BIT)
8347 return function_value_64 (orig_mode, mode, valtype);
8349 return function_value_32 (orig_mode, mode, fntype, fn);
8353 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8355 machine_mode mode, orig_mode;
8357 orig_mode = TYPE_MODE (valtype);
8358 mode = type_natural_mode (valtype, NULL, true);
8359 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8362 /* Return an RTX representing a place where a function returns
8363 or recieves pointer bounds or NULL if no bounds are returned.
8365 VALTYPE is a data type of a value returned by the function.
8367 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8368 or FUNCTION_TYPE of the function.
8370 If OUTGOING is false, return a place in which the caller will
8371 see the return value. Otherwise, return a place where a
8372 function returns a value. */
8375 ix86_function_value_bounds (const_tree valtype,
8376 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8377 bool outgoing ATTRIBUTE_UNUSED)
8381 if (BOUNDED_TYPE_P (valtype))
8382 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8383 else if (chkp_type_has_pointer (valtype))
8388 unsigned i, bnd_no = 0;
8390 bitmap_obstack_initialize (NULL);
8391 slots = BITMAP_ALLOC (NULL);
8392 chkp_find_bound_slots (valtype, slots);
8394 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8396 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8397 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8398 gcc_assert (bnd_no < 2);
8399 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8402 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8404 BITMAP_FREE (slots);
8405 bitmap_obstack_release (NULL);
8413 /* Pointer function arguments and return values are promoted to
8417 ix86_promote_function_mode (const_tree type, machine_mode mode,
8418 int *punsignedp, const_tree fntype,
8421 if (type != NULL_TREE && POINTER_TYPE_P (type))
8423 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8426 return default_promote_function_mode (type, mode, punsignedp, fntype,
8430 /* Return true if a structure, union or array with MODE containing FIELD
8431 should be accessed using BLKmode. */
8434 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8436 /* Union with XFmode must be in BLKmode. */
8437 return (mode == XFmode
8438 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8439 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8443 ix86_libcall_value (machine_mode mode)
8445 return ix86_function_value_1 (NULL, NULL, mode, mode);
8448 /* Return true iff type is returned in memory. */
8451 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8453 #ifdef SUBTARGET_RETURN_IN_MEMORY
8454 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8456 const machine_mode mode = type_natural_mode (type, NULL, true);
8459 if (POINTER_BOUNDS_TYPE_P (type))
8464 if (ix86_function_type_abi (fntype) == MS_ABI)
8466 size = int_size_in_bytes (type);
8468 /* __m128 is returned in xmm0. */
8469 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8470 || INTEGRAL_TYPE_P (type)
8471 || VECTOR_FLOAT_TYPE_P (type))
8472 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8473 && !COMPLEX_MODE_P (mode)
8474 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8477 /* Otherwise, the size must be exactly in [1248]. */
8478 return size != 1 && size != 2 && size != 4 && size != 8;
8482 int needed_intregs, needed_sseregs;
8484 return examine_argument (mode, type, 1,
8485 &needed_intregs, &needed_sseregs);
8490 if (mode == BLKmode)
8493 size = int_size_in_bytes (type);
8495 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8498 if (VECTOR_MODE_P (mode) || mode == TImode)
8500 /* User-created vectors small enough to fit in EAX. */
8504 /* Unless ABI prescibes otherwise,
8505 MMX/3dNow values are returned in MM0 if available. */
8508 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8510 /* SSE values are returned in XMM0 if available. */
8514 /* AVX values are returned in YMM0 if available. */
8518 /* AVX512F values are returned in ZMM0 if available. */
8520 return !TARGET_AVX512F;
8529 /* OImode shouldn't be used directly. */
8530 gcc_assert (mode != OImode);
8538 /* Create the va_list data type. */
8540 /* Returns the calling convention specific va_list date type.
8541 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8544 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8546 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8548 /* For i386 we use plain pointer to argument area. */
8549 if (!TARGET_64BIT || abi == MS_ABI)
8550 return build_pointer_type (char_type_node);
8552 record = lang_hooks.types.make_type (RECORD_TYPE);
8553 type_decl = build_decl (BUILTINS_LOCATION,
8554 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8556 f_gpr = build_decl (BUILTINS_LOCATION,
8557 FIELD_DECL, get_identifier ("gp_offset"),
8558 unsigned_type_node);
8559 f_fpr = build_decl (BUILTINS_LOCATION,
8560 FIELD_DECL, get_identifier ("fp_offset"),
8561 unsigned_type_node);
8562 f_ovf = build_decl (BUILTINS_LOCATION,
8563 FIELD_DECL, get_identifier ("overflow_arg_area"),
8565 f_sav = build_decl (BUILTINS_LOCATION,
8566 FIELD_DECL, get_identifier ("reg_save_area"),
8569 va_list_gpr_counter_field = f_gpr;
8570 va_list_fpr_counter_field = f_fpr;
8572 DECL_FIELD_CONTEXT (f_gpr) = record;
8573 DECL_FIELD_CONTEXT (f_fpr) = record;
8574 DECL_FIELD_CONTEXT (f_ovf) = record;
8575 DECL_FIELD_CONTEXT (f_sav) = record;
8577 TYPE_STUB_DECL (record) = type_decl;
8578 TYPE_NAME (record) = type_decl;
8579 TYPE_FIELDS (record) = f_gpr;
8580 DECL_CHAIN (f_gpr) = f_fpr;
8581 DECL_CHAIN (f_fpr) = f_ovf;
8582 DECL_CHAIN (f_ovf) = f_sav;
8584 layout_type (record);
8586 /* The correct type is an array type of one element. */
8587 return build_array_type (record, build_index_type (size_zero_node));
8590 /* Setup the builtin va_list data type and for 64-bit the additional
8591 calling convention specific va_list data types. */
8594 ix86_build_builtin_va_list (void)
8596 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8598 /* Initialize abi specific va_list builtin types. */
8602 if (ix86_abi == MS_ABI)
8604 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8605 if (TREE_CODE (t) != RECORD_TYPE)
8606 t = build_variant_type_copy (t);
8607 sysv_va_list_type_node = t;
8612 if (TREE_CODE (t) != RECORD_TYPE)
8613 t = build_variant_type_copy (t);
8614 sysv_va_list_type_node = t;
8616 if (ix86_abi != MS_ABI)
8618 t = ix86_build_builtin_va_list_abi (MS_ABI);
8619 if (TREE_CODE (t) != RECORD_TYPE)
8620 t = build_variant_type_copy (t);
8621 ms_va_list_type_node = t;
8626 if (TREE_CODE (t) != RECORD_TYPE)
8627 t = build_variant_type_copy (t);
8628 ms_va_list_type_node = t;
8635 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8638 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8644 /* GPR size of varargs save area. */
8645 if (cfun->va_list_gpr_size)
8646 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8648 ix86_varargs_gpr_size = 0;
8650 /* FPR size of varargs save area. We don't need it if we don't pass
8651 anything in SSE registers. */
8652 if (TARGET_SSE && cfun->va_list_fpr_size)
8653 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8655 ix86_varargs_fpr_size = 0;
8657 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8660 save_area = frame_pointer_rtx;
8661 set = get_varargs_alias_set ();
8663 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8664 if (max > X86_64_REGPARM_MAX)
8665 max = X86_64_REGPARM_MAX;
8667 for (i = cum->regno; i < max; i++)
8669 mem = gen_rtx_MEM (word_mode,
8670 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8671 MEM_NOTRAP_P (mem) = 1;
8672 set_mem_alias_set (mem, set);
8673 emit_move_insn (mem,
8674 gen_rtx_REG (word_mode,
8675 x86_64_int_parameter_registers[i]));
8678 if (ix86_varargs_fpr_size)
8681 rtx_code_label *label;
8684 /* Now emit code to save SSE registers. The AX parameter contains number
8685 of SSE parameter registers used to call this function, though all we
8686 actually check here is the zero/non-zero status. */
8688 label = gen_label_rtx ();
8689 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8690 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8693 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8694 we used movdqa (i.e. TImode) instead? Perhaps even better would
8695 be if we could determine the real mode of the data, via a hook
8696 into pass_stdarg. Ignore all that for now. */
8698 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8699 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8701 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8702 if (max > X86_64_SSE_REGPARM_MAX)
8703 max = X86_64_SSE_REGPARM_MAX;
8705 for (i = cum->sse_regno; i < max; ++i)
8707 mem = plus_constant (Pmode, save_area,
8708 i * 16 + ix86_varargs_gpr_size);
8709 mem = gen_rtx_MEM (smode, mem);
8710 MEM_NOTRAP_P (mem) = 1;
8711 set_mem_alias_set (mem, set);
8712 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8714 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8722 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8724 alias_set_type set = get_varargs_alias_set ();
8727 /* Reset to zero, as there might be a sysv vaarg used
8729 ix86_varargs_gpr_size = 0;
8730 ix86_varargs_fpr_size = 0;
8732 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8736 mem = gen_rtx_MEM (Pmode,
8737 plus_constant (Pmode, virtual_incoming_args_rtx,
8738 i * UNITS_PER_WORD));
8739 MEM_NOTRAP_P (mem) = 1;
8740 set_mem_alias_set (mem, set);
8742 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8743 emit_move_insn (mem, reg);
8748 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8749 tree type, int *, int no_rtl)
8751 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8752 CUMULATIVE_ARGS next_cum;
8755 /* This argument doesn't appear to be used anymore. Which is good,
8756 because the old code here didn't suppress rtl generation. */
8757 gcc_assert (!no_rtl);
8762 fntype = TREE_TYPE (current_function_decl);
8764 /* For varargs, we do not want to skip the dummy va_dcl argument.
8765 For stdargs, we do want to skip the last named argument. */
8767 if (stdarg_p (fntype))
8768 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8771 if (cum->call_abi == MS_ABI)
8772 setup_incoming_varargs_ms_64 (&next_cum);
8774 setup_incoming_varargs_64 (&next_cum);
8778 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8779 enum machine_mode mode,
8781 int *pretend_size ATTRIBUTE_UNUSED,
8784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8785 CUMULATIVE_ARGS next_cum;
8788 int bnd_reg, i, max;
8790 gcc_assert (!no_rtl);
8792 /* Do nothing if we use plain pointer to argument area. */
8793 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8796 fntype = TREE_TYPE (current_function_decl);
8798 /* For varargs, we do not want to skip the dummy va_dcl argument.
8799 For stdargs, we do want to skip the last named argument. */
8801 if (stdarg_p (fntype))
8802 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8804 save_area = frame_pointer_rtx;
8806 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8807 if (max > X86_64_REGPARM_MAX)
8808 max = X86_64_REGPARM_MAX;
8810 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8811 if (chkp_function_instrumented_p (current_function_decl))
8812 for (i = cum->regno; i < max; i++)
8814 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8815 rtx reg = gen_rtx_REG (DImode,
8816 x86_64_int_parameter_registers[i]);
8820 if (bnd_reg <= LAST_BND_REG)
8821 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8825 plus_constant (Pmode, arg_pointer_rtx,
8826 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8827 bounds = gen_reg_rtx (BNDmode);
8828 emit_insn (BNDmode == BND64mode
8829 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8830 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8833 emit_insn (BNDmode == BND64mode
8834 ? gen_bnd64_stx (addr, ptr, bounds)
8835 : gen_bnd32_stx (addr, ptr, bounds));
8842 /* Checks if TYPE is of kind va_list char *. */
8845 is_va_list_char_pointer (tree type)
8849 /* For 32-bit it is always true. */
8852 canonic = ix86_canonical_va_list_type (type);
8853 return (canonic == ms_va_list_type_node
8854 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8857 /* Implement va_start. */
8860 ix86_va_start (tree valist, rtx nextarg)
8862 HOST_WIDE_INT words, n_gpr, n_fpr;
8863 tree f_gpr, f_fpr, f_ovf, f_sav;
8864 tree gpr, fpr, ovf, sav, t;
8868 if (flag_split_stack
8869 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8871 unsigned int scratch_regno;
8873 /* When we are splitting the stack, we can't refer to the stack
8874 arguments using internal_arg_pointer, because they may be on
8875 the old stack. The split stack prologue will arrange to
8876 leave a pointer to the old stack arguments in a scratch
8877 register, which we here copy to a pseudo-register. The split
8878 stack prologue can't set the pseudo-register directly because
8879 it (the prologue) runs before any registers have been saved. */
8881 scratch_regno = split_stack_prologue_scratch_regno ();
8882 if (scratch_regno != INVALID_REGNUM)
8887 reg = gen_reg_rtx (Pmode);
8888 cfun->machine->split_stack_varargs_pointer = reg;
8891 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8895 push_topmost_sequence ();
8896 emit_insn_after (seq, entry_of_function ());
8897 pop_topmost_sequence ();
8901 /* Only 64bit target needs something special. */
8902 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8904 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8905 std_expand_builtin_va_start (valist, nextarg);
8910 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8911 next = expand_binop (ptr_mode, add_optab,
8912 cfun->machine->split_stack_varargs_pointer,
8913 crtl->args.arg_offset_rtx,
8914 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8915 convert_move (va_r, next, 0);
8917 /* Store zero bounds for va_list. */
8918 if (chkp_function_instrumented_p (current_function_decl))
8919 chkp_expand_bounds_reset_for_mem (valist,
8920 make_tree (TREE_TYPE (valist),
8927 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8928 f_fpr = DECL_CHAIN (f_gpr);
8929 f_ovf = DECL_CHAIN (f_fpr);
8930 f_sav = DECL_CHAIN (f_ovf);
8932 valist = build_simple_mem_ref (valist);
8933 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8934 /* The following should be folded into the MEM_REF offset. */
8935 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8937 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8939 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8941 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8944 /* Count number of gp and fp argument registers used. */
8945 words = crtl->args.info.words;
8946 n_gpr = crtl->args.info.regno;
8947 n_fpr = crtl->args.info.sse_regno;
8949 if (cfun->va_list_gpr_size)
8951 type = TREE_TYPE (gpr);
8952 t = build2 (MODIFY_EXPR, type,
8953 gpr, build_int_cst (type, n_gpr * 8));
8954 TREE_SIDE_EFFECTS (t) = 1;
8955 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8958 if (TARGET_SSE && cfun->va_list_fpr_size)
8960 type = TREE_TYPE (fpr);
8961 t = build2 (MODIFY_EXPR, type, fpr,
8962 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8963 TREE_SIDE_EFFECTS (t) = 1;
8964 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8967 /* Find the overflow area. */
8968 type = TREE_TYPE (ovf);
8969 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8970 ovf_rtx = crtl->args.internal_arg_pointer;
8972 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8973 t = make_tree (type, ovf_rtx);
8975 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8977 /* Store zero bounds for overflow area pointer. */
8978 if (chkp_function_instrumented_p (current_function_decl))
8979 chkp_expand_bounds_reset_for_mem (ovf, t);
8981 t = build2 (MODIFY_EXPR, type, ovf, t);
8982 TREE_SIDE_EFFECTS (t) = 1;
8983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8985 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8987 /* Find the register save area.
8988 Prologue of the function save it right above stack frame. */
8989 type = TREE_TYPE (sav);
8990 t = make_tree (type, frame_pointer_rtx);
8991 if (!ix86_varargs_gpr_size)
8992 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8994 /* Store zero bounds for save area pointer. */
8995 if (chkp_function_instrumented_p (current_function_decl))
8996 chkp_expand_bounds_reset_for_mem (sav, t);
8998 t = build2 (MODIFY_EXPR, type, sav, t);
8999 TREE_SIDE_EFFECTS (t) = 1;
9000 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9004 /* Implement va_arg. */
9007 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9010 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9011 tree f_gpr, f_fpr, f_ovf, f_sav;
9012 tree gpr, fpr, ovf, sav, t;
9014 tree lab_false, lab_over = NULL_TREE;
9019 machine_mode nat_mode;
9020 unsigned int arg_boundary;
9022 /* Only 64bit target needs something special. */
9023 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9024 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9026 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9027 f_fpr = DECL_CHAIN (f_gpr);
9028 f_ovf = DECL_CHAIN (f_fpr);
9029 f_sav = DECL_CHAIN (f_ovf);
9031 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9032 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9033 valist = build_va_arg_indirect_ref (valist);
9034 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9035 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9036 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9038 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9040 type = build_pointer_type (type);
9041 size = int_size_in_bytes (type);
9042 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9044 nat_mode = type_natural_mode (type, NULL, false);
9059 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9060 if (!TARGET_64BIT_MS_ABI)
9067 container = construct_container (nat_mode, TYPE_MODE (type),
9068 type, 0, X86_64_REGPARM_MAX,
9069 X86_64_SSE_REGPARM_MAX, intreg,
9074 /* Pull the value out of the saved registers. */
9076 addr = create_tmp_var (ptr_type_node, "addr");
9080 int needed_intregs, needed_sseregs;
9082 tree int_addr, sse_addr;
9084 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9085 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9087 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9089 need_temp = (!REG_P (container)
9090 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9091 || TYPE_ALIGN (type) > 128));
9093 /* In case we are passing structure, verify that it is consecutive block
9094 on the register save area. If not we need to do moves. */
9095 if (!need_temp && !REG_P (container))
9097 /* Verify that all registers are strictly consecutive */
9098 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9102 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9104 rtx slot = XVECEXP (container, 0, i);
9105 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9106 || INTVAL (XEXP (slot, 1)) != i * 16)
9114 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9116 rtx slot = XVECEXP (container, 0, i);
9117 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9118 || INTVAL (XEXP (slot, 1)) != i * 8)
9130 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9131 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9134 /* First ensure that we fit completely in registers. */
9137 t = build_int_cst (TREE_TYPE (gpr),
9138 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9139 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9140 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9141 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9142 gimplify_and_add (t, pre_p);
9146 t = build_int_cst (TREE_TYPE (fpr),
9147 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9148 + X86_64_REGPARM_MAX * 8);
9149 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9150 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9151 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9152 gimplify_and_add (t, pre_p);
9155 /* Compute index to start of area used for integer regs. */
9158 /* int_addr = gpr + sav; */
9159 t = fold_build_pointer_plus (sav, gpr);
9160 gimplify_assign (int_addr, t, pre_p);
9164 /* sse_addr = fpr + sav; */
9165 t = fold_build_pointer_plus (sav, fpr);
9166 gimplify_assign (sse_addr, t, pre_p);
9170 int i, prev_size = 0;
9171 tree temp = create_tmp_var (type, "va_arg_tmp");
9174 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9175 gimplify_assign (addr, t, pre_p);
9177 for (i = 0; i < XVECLEN (container, 0); i++)
9179 rtx slot = XVECEXP (container, 0, i);
9180 rtx reg = XEXP (slot, 0);
9181 machine_mode mode = GET_MODE (reg);
9187 tree dest_addr, dest;
9188 int cur_size = GET_MODE_SIZE (mode);
9190 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9191 prev_size = INTVAL (XEXP (slot, 1));
9192 if (prev_size + cur_size > size)
9194 cur_size = size - prev_size;
9195 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9196 if (mode == BLKmode)
9199 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9200 if (mode == GET_MODE (reg))
9201 addr_type = build_pointer_type (piece_type);
9203 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9205 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9208 if (SSE_REGNO_P (REGNO (reg)))
9210 src_addr = sse_addr;
9211 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9215 src_addr = int_addr;
9216 src_offset = REGNO (reg) * 8;
9218 src_addr = fold_convert (addr_type, src_addr);
9219 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9221 dest_addr = fold_convert (daddr_type, addr);
9222 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9223 if (cur_size == GET_MODE_SIZE (mode))
9225 src = build_va_arg_indirect_ref (src_addr);
9226 dest = build_va_arg_indirect_ref (dest_addr);
9228 gimplify_assign (dest, src, pre_p);
9233 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9234 3, dest_addr, src_addr,
9235 size_int (cur_size));
9236 gimplify_and_add (copy, pre_p);
9238 prev_size += cur_size;
9244 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9245 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9246 gimplify_assign (gpr, t, pre_p);
9251 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9252 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9253 gimplify_assign (fpr, t, pre_p);
9256 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9258 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9261 /* ... otherwise out of the overflow area. */
9263 /* When we align parameter on stack for caller, if the parameter
9264 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9265 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9266 here with caller. */
9267 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9268 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9269 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9271 /* Care for on-stack alignment if needed. */
9272 if (arg_boundary <= 64 || size == 0)
9276 HOST_WIDE_INT align = arg_boundary / 8;
9277 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9278 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9279 build_int_cst (TREE_TYPE (t), -align));
9282 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9283 gimplify_assign (addr, t, pre_p);
9285 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9286 gimplify_assign (unshare_expr (ovf), t, pre_p);
9289 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9291 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9292 addr = fold_convert (ptrtype, addr);
9295 addr = build_va_arg_indirect_ref (addr);
9296 return build_va_arg_indirect_ref (addr);
9299 /* Return true if OPNUM's MEM should be matched
9300 in movabs* patterns. */
9303 ix86_check_movabs (rtx insn, int opnum)
9307 set = PATTERN (insn);
9308 if (GET_CODE (set) == PARALLEL)
9309 set = XVECEXP (set, 0, 0);
9310 gcc_assert (GET_CODE (set) == SET);
9311 mem = XEXP (set, opnum);
9312 while (GET_CODE (mem) == SUBREG)
9313 mem = SUBREG_REG (mem);
9314 gcc_assert (MEM_P (mem));
9315 return volatile_ok || !MEM_VOLATILE_P (mem);
9318 /* Initialize the table of extra 80387 mathematical constants. */
9321 init_ext_80387_constants (void)
9323 static const char * cst[5] =
9325 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9326 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9327 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9328 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9329 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9333 for (i = 0; i < 5; i++)
9335 real_from_string (&ext_80387_constants_table[i], cst[i]);
9336 /* Ensure each constant is rounded to XFmode precision. */
9337 real_convert (&ext_80387_constants_table[i],
9338 XFmode, &ext_80387_constants_table[i]);
9341 ext_80387_constants_init = 1;
9344 /* Return non-zero if the constant is something that
9345 can be loaded with a special instruction. */
9348 standard_80387_constant_p (rtx x)
9350 machine_mode mode = GET_MODE (x);
9354 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9357 if (x == CONST0_RTX (mode))
9359 if (x == CONST1_RTX (mode))
9362 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9364 /* For XFmode constants, try to find a special 80387 instruction when
9365 optimizing for size or on those CPUs that benefit from them. */
9367 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9371 if (! ext_80387_constants_init)
9372 init_ext_80387_constants ();
9374 for (i = 0; i < 5; i++)
9375 if (real_identical (&r, &ext_80387_constants_table[i]))
9379 /* Load of the constant -0.0 or -1.0 will be split as
9380 fldz;fchs or fld1;fchs sequence. */
9381 if (real_isnegzero (&r))
9383 if (real_identical (&r, &dconstm1))
9389 /* Return the opcode of the special instruction to be used to load
9393 standard_80387_constant_opcode (rtx x)
9395 switch (standard_80387_constant_p (x))
9419 /* Return the CONST_DOUBLE representing the 80387 constant that is
9420 loaded by the specified special instruction. The argument IDX
9421 matches the return value from standard_80387_constant_p. */
9424 standard_80387_constant_rtx (int idx)
9428 if (! ext_80387_constants_init)
9429 init_ext_80387_constants ();
9445 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9449 /* Return 1 if X is all 0s and 2 if x is all 1s
9450 in supported SSE/AVX vector mode. */
9453 standard_sse_constant_p (rtx x)
9455 machine_mode mode = GET_MODE (x);
9457 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9459 if (vector_all_ones_operand (x, mode))
9487 /* Return the opcode of the special instruction to be used to load
9491 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9493 switch (standard_sse_constant_p (x))
9496 switch (get_attr_mode (insn))
9499 return "vpxord\t%g0, %g0, %g0";
9501 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9502 : "vpxord\t%g0, %g0, %g0";
9504 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9505 : "vpxorq\t%g0, %g0, %g0";
9507 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9508 : "%vpxor\t%0, %d0";
9510 return "%vxorpd\t%0, %d0";
9512 return "%vxorps\t%0, %d0";
9515 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9516 : "vpxor\t%x0, %x0, %x0";
9518 return "vxorpd\t%x0, %x0, %x0";
9520 return "vxorps\t%x0, %x0, %x0";
9528 || get_attr_mode (insn) == MODE_XI
9529 || get_attr_mode (insn) == MODE_V8DF
9530 || get_attr_mode (insn) == MODE_V16SF)
9531 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9533 return "vpcmpeqd\t%0, %0, %0";
9535 return "pcmpeqd\t%0, %0";
9543 /* Returns true if OP contains a symbol reference */
9546 symbolic_reference_mentioned_p (rtx op)
9551 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9554 fmt = GET_RTX_FORMAT (GET_CODE (op));
9555 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9561 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9562 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9566 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9573 /* Return true if it is appropriate to emit `ret' instructions in the
9574 body of a function. Do this only if the epilogue is simple, needing a
9575 couple of insns. Prior to reloading, we can't tell how many registers
9576 must be saved, so return false then. Return false if there is no frame
9577 marker to de-allocate. */
9580 ix86_can_use_return_insn_p (void)
9582 struct ix86_frame frame;
9584 if (! reload_completed || frame_pointer_needed)
9587 /* Don't allow more than 32k pop, since that's all we can do
9588 with one instruction. */
9589 if (crtl->args.pops_args && crtl->args.size >= 32768)
9592 ix86_compute_frame_layout (&frame);
9593 return (frame.stack_pointer_offset == UNITS_PER_WORD
9594 && (frame.nregs + frame.nsseregs) == 0);
9597 /* Value should be nonzero if functions must have frame pointers.
9598 Zero means the frame pointer need not be set up (and parms may
9599 be accessed via the stack pointer) in functions that seem suitable. */
9602 ix86_frame_pointer_required (void)
9604 /* If we accessed previous frames, then the generated code expects
9605 to be able to access the saved ebp value in our frame. */
9606 if (cfun->machine->accesses_prev_frame)
9609 /* Several x86 os'es need a frame pointer for other reasons,
9610 usually pertaining to setjmp. */
9611 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9614 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9615 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9618 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9619 allocation is 4GB. */
9620 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9623 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9624 turns off the frame pointer by default. Turn it back on now if
9625 we've not got a leaf function. */
9626 if (TARGET_OMIT_LEAF_FRAME_POINTER
9628 || ix86_current_function_calls_tls_descriptor))
9631 if (crtl->profile && !flag_fentry)
9637 /* Record that the current function accesses previous call frames. */
9640 ix86_setup_frame_addresses (void)
9642 cfun->machine->accesses_prev_frame = 1;
9645 #ifndef USE_HIDDEN_LINKONCE
9646 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9647 # define USE_HIDDEN_LINKONCE 1
9649 # define USE_HIDDEN_LINKONCE 0
9653 static int pic_labels_used;
9655 /* Fills in the label name that should be used for a pc thunk for
9656 the given register. */
9659 get_pc_thunk_name (char name[32], unsigned int regno)
9661 gcc_assert (!TARGET_64BIT);
9663 if (USE_HIDDEN_LINKONCE)
9664 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9666 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9670 /* This function generates code for -fpic that loads %ebx with
9671 the return address of the caller and then returns. */
9674 ix86_code_end (void)
9679 for (regno = AX_REG; regno <= SP_REG; regno++)
9684 if (!(pic_labels_used & (1 << regno)))
9687 get_pc_thunk_name (name, regno);
9689 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9690 get_identifier (name),
9691 build_function_type_list (void_type_node, NULL_TREE));
9692 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9693 NULL_TREE, void_type_node);
9694 TREE_PUBLIC (decl) = 1;
9695 TREE_STATIC (decl) = 1;
9696 DECL_IGNORED_P (decl) = 1;
9701 switch_to_section (darwin_sections[text_coal_section]);
9702 fputs ("\t.weak_definition\t", asm_out_file);
9703 assemble_name (asm_out_file, name);
9704 fputs ("\n\t.private_extern\t", asm_out_file);
9705 assemble_name (asm_out_file, name);
9706 putc ('\n', asm_out_file);
9707 ASM_OUTPUT_LABEL (asm_out_file, name);
9708 DECL_WEAK (decl) = 1;
9712 if (USE_HIDDEN_LINKONCE)
9714 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9716 targetm.asm_out.unique_section (decl, 0);
9717 switch_to_section (get_named_section (decl, NULL, 0));
9719 targetm.asm_out.globalize_label (asm_out_file, name);
9720 fputs ("\t.hidden\t", asm_out_file);
9721 assemble_name (asm_out_file, name);
9722 putc ('\n', asm_out_file);
9723 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9727 switch_to_section (text_section);
9728 ASM_OUTPUT_LABEL (asm_out_file, name);
9731 DECL_INITIAL (decl) = make_node (BLOCK);
9732 current_function_decl = decl;
9733 init_function_start (decl);
9734 first_function_block_is_cold = false;
9735 /* Make sure unwind info is emitted for the thunk if needed. */
9736 final_start_function (emit_barrier (), asm_out_file, 1);
9738 /* Pad stack IP move with 4 instructions (two NOPs count
9739 as one instruction). */
9740 if (TARGET_PAD_SHORT_FUNCTION)
9745 fputs ("\tnop\n", asm_out_file);
9748 xops[0] = gen_rtx_REG (Pmode, regno);
9749 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9750 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9751 output_asm_insn ("%!ret", NULL);
9752 final_end_function ();
9753 init_insn_lengths ();
9754 free_after_compilation (cfun);
9756 current_function_decl = NULL;
9759 if (flag_split_stack)
9760 file_end_indicate_split_stack ();
9763 /* Emit code for the SET_GOT patterns. */
9766 output_set_got (rtx dest, rtx label)
9772 if (TARGET_VXWORKS_RTP && flag_pic)
9774 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9775 xops[2] = gen_rtx_MEM (Pmode,
9776 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9777 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9779 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9780 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9781 an unadorned address. */
9782 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9783 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9784 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9788 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9793 /* We don't need a pic base, we're not producing pic. */
9796 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9797 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9798 targetm.asm_out.internal_label (asm_out_file, "L",
9799 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9804 get_pc_thunk_name (name, REGNO (dest));
9805 pic_labels_used |= 1 << REGNO (dest);
9807 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9808 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9809 output_asm_insn ("%!call\t%X2", xops);
9812 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9813 This is what will be referenced by the Mach-O PIC subsystem. */
9814 if (machopic_should_output_picbase_label () || !label)
9815 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9817 /* When we are restoring the pic base at the site of a nonlocal label,
9818 and we decided to emit the pic base above, we will still output a
9819 local label used for calculating the correction offset (even though
9820 the offset will be 0 in that case). */
9822 targetm.asm_out.internal_label (asm_out_file, "L",
9823 CODE_LABEL_NUMBER (label));
9828 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9833 /* Generate an "push" pattern for input ARG. */
9838 struct machine_function *m = cfun->machine;
9840 if (m->fs.cfa_reg == stack_pointer_rtx)
9841 m->fs.cfa_offset += UNITS_PER_WORD;
9842 m->fs.sp_offset += UNITS_PER_WORD;
9844 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9845 arg = gen_rtx_REG (word_mode, REGNO (arg));
9847 return gen_rtx_SET (VOIDmode,
9848 gen_rtx_MEM (word_mode,
9849 gen_rtx_PRE_DEC (Pmode,
9850 stack_pointer_rtx)),
9854 /* Generate an "pop" pattern for input ARG. */
9859 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9860 arg = gen_rtx_REG (word_mode, REGNO (arg));
9862 return gen_rtx_SET (VOIDmode,
9864 gen_rtx_MEM (word_mode,
9865 gen_rtx_POST_INC (Pmode,
9866 stack_pointer_rtx)));
9869 /* Return >= 0 if there is an unused call-clobbered register available
9870 for the entire function. */
9873 ix86_select_alt_pic_regnum (void)
9875 if (ix86_use_pseudo_pic_reg ())
9876 return INVALID_REGNUM;
9880 && !ix86_current_function_calls_tls_descriptor)
9883 /* Can't use the same register for both PIC and DRAP. */
9885 drap = REGNO (crtl->drap_reg);
9888 for (i = 2; i >= 0; --i)
9889 if (i != drap && !df_regs_ever_live_p (i))
9893 return INVALID_REGNUM;
9896 /* Return TRUE if we need to save REGNO. */
9899 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9901 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9902 && pic_offset_table_rtx)
9904 if (ix86_use_pseudo_pic_reg ())
9906 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9907 _mcount in prologue. */
9908 if (!TARGET_64BIT && flag_pic && crtl->profile)
9911 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9913 || crtl->calls_eh_return
9914 || crtl->uses_const_pool
9915 || cfun->has_nonlocal_label)
9916 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9919 if (crtl->calls_eh_return && maybe_eh_return)
9924 unsigned test = EH_RETURN_DATA_REGNO (i);
9925 if (test == INVALID_REGNUM)
9933 && regno == REGNO (crtl->drap_reg)
9934 && !cfun->machine->no_drap_save_restore)
9937 return (df_regs_ever_live_p (regno)
9938 && !call_used_regs[regno]
9939 && !fixed_regs[regno]
9940 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9943 /* Return number of saved general prupose registers. */
9946 ix86_nsaved_regs (void)
9951 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9952 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9957 /* Return number of saved SSE registrers. */
9960 ix86_nsaved_sseregs (void)
9965 if (!TARGET_64BIT_MS_ABI)
9967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9973 /* Given FROM and TO register numbers, say whether this elimination is
9974 allowed. If stack alignment is needed, we can only replace argument
9975 pointer with hard frame pointer, or replace frame pointer with stack
9976 pointer. Otherwise, frame pointer elimination is automatically
9977 handled and all other eliminations are valid. */
9980 ix86_can_eliminate (const int from, const int to)
9982 if (stack_realign_fp)
9983 return ((from == ARG_POINTER_REGNUM
9984 && to == HARD_FRAME_POINTER_REGNUM)
9985 || (from == FRAME_POINTER_REGNUM
9986 && to == STACK_POINTER_REGNUM));
9988 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9991 /* Return the offset between two registers, one to be eliminated, and the other
9992 its replacement, at the start of a routine. */
9995 ix86_initial_elimination_offset (int from, int to)
9997 struct ix86_frame frame;
9998 ix86_compute_frame_layout (&frame);
10000 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10001 return frame.hard_frame_pointer_offset;
10002 else if (from == FRAME_POINTER_REGNUM
10003 && to == HARD_FRAME_POINTER_REGNUM)
10004 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10007 gcc_assert (to == STACK_POINTER_REGNUM);
10009 if (from == ARG_POINTER_REGNUM)
10010 return frame.stack_pointer_offset;
10012 gcc_assert (from == FRAME_POINTER_REGNUM);
10013 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10017 /* In a dynamically-aligned function, we can't know the offset from
10018 stack pointer to frame pointer, so we must ensure that setjmp
10019 eliminates fp against the hard fp (%ebp) rather than trying to
10020 index from %esp up to the top of the frame across a gap that is
10021 of unknown (at compile-time) size. */
10023 ix86_builtin_setjmp_frame_value (void)
10025 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10028 /* When using -fsplit-stack, the allocation routines set a field in
10029 the TCB to the bottom of the stack plus this much space, measured
10032 #define SPLIT_STACK_AVAILABLE 256
10034 /* Fill structure ix86_frame about frame of currently computed function. */
10037 ix86_compute_frame_layout (struct ix86_frame *frame)
10039 unsigned HOST_WIDE_INT stack_alignment_needed;
10040 HOST_WIDE_INT offset;
10041 unsigned HOST_WIDE_INT preferred_alignment;
10042 HOST_WIDE_INT size = get_frame_size ();
10043 HOST_WIDE_INT to_allocate;
10045 frame->nregs = ix86_nsaved_regs ();
10046 frame->nsseregs = ix86_nsaved_sseregs ();
10048 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10049 function prologues and leaf. */
10050 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10051 && (!crtl->is_leaf || cfun->calls_alloca != 0
10052 || ix86_current_function_calls_tls_descriptor))
10054 crtl->preferred_stack_boundary = 128;
10055 crtl->stack_alignment_needed = 128;
10057 /* preferred_stack_boundary is never updated for call
10058 expanded from tls descriptor. Update it here. We don't update it in
10059 expand stage because according to the comments before
10060 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10062 else if (ix86_current_function_calls_tls_descriptor
10063 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10065 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10066 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10067 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10070 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10071 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10073 gcc_assert (!size || stack_alignment_needed);
10074 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10075 gcc_assert (preferred_alignment <= stack_alignment_needed);
10077 /* For SEH we have to limit the amount of code movement into the prologue.
10078 At present we do this via a BLOCKAGE, at which point there's very little
10079 scheduling that can be done, which means that there's very little point
10080 in doing anything except PUSHs. */
10082 cfun->machine->use_fast_prologue_epilogue = false;
10084 /* During reload iteration the amount of registers saved can change.
10085 Recompute the value as needed. Do not recompute when amount of registers
10086 didn't change as reload does multiple calls to the function and does not
10087 expect the decision to change within single iteration. */
10088 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10089 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10091 int count = frame->nregs;
10092 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10094 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10096 /* The fast prologue uses move instead of push to save registers. This
10097 is significantly longer, but also executes faster as modern hardware
10098 can execute the moves in parallel, but can't do that for push/pop.
10100 Be careful about choosing what prologue to emit: When function takes
10101 many instructions to execute we may use slow version as well as in
10102 case function is known to be outside hot spot (this is known with
10103 feedback only). Weight the size of function by number of registers
10104 to save as it is cheap to use one or two push instructions but very
10105 slow to use many of them. */
10107 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10108 if (node->frequency < NODE_FREQUENCY_NORMAL
10109 || (flag_branch_probabilities
10110 && node->frequency < NODE_FREQUENCY_HOT))
10111 cfun->machine->use_fast_prologue_epilogue = false;
10113 cfun->machine->use_fast_prologue_epilogue
10114 = !expensive_function_p (count);
10117 frame->save_regs_using_mov
10118 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10119 /* If static stack checking is enabled and done with probes,
10120 the registers need to be saved before allocating the frame. */
10121 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10123 /* Skip return address. */
10124 offset = UNITS_PER_WORD;
10126 /* Skip pushed static chain. */
10127 if (ix86_static_chain_on_stack)
10128 offset += UNITS_PER_WORD;
10130 /* Skip saved base pointer. */
10131 if (frame_pointer_needed)
10132 offset += UNITS_PER_WORD;
10133 frame->hfp_save_offset = offset;
10135 /* The traditional frame pointer location is at the top of the frame. */
10136 frame->hard_frame_pointer_offset = offset;
10138 /* Register save area */
10139 offset += frame->nregs * UNITS_PER_WORD;
10140 frame->reg_save_offset = offset;
10142 /* On SEH target, registers are pushed just before the frame pointer
10145 frame->hard_frame_pointer_offset = offset;
10147 /* Align and set SSE register save area. */
10148 if (frame->nsseregs)
10150 /* The only ABI that has saved SSE registers (Win64) also has a
10151 16-byte aligned default stack, and thus we don't need to be
10152 within the re-aligned local stack frame to save them. */
10153 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10154 offset = (offset + 16 - 1) & -16;
10155 offset += frame->nsseregs * 16;
10157 frame->sse_reg_save_offset = offset;
10159 /* The re-aligned stack starts here. Values before this point are not
10160 directly comparable with values below this point. In order to make
10161 sure that no value happens to be the same before and after, force
10162 the alignment computation below to add a non-zero value. */
10163 if (stack_realign_fp)
10164 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10167 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10168 offset += frame->va_arg_size;
10170 /* Align start of frame for local function. */
10171 if (stack_realign_fp
10172 || offset != frame->sse_reg_save_offset
10175 || cfun->calls_alloca
10176 || ix86_current_function_calls_tls_descriptor)
10177 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10179 /* Frame pointer points here. */
10180 frame->frame_pointer_offset = offset;
10184 /* Add outgoing arguments area. Can be skipped if we eliminated
10185 all the function calls as dead code.
10186 Skipping is however impossible when function calls alloca. Alloca
10187 expander assumes that last crtl->outgoing_args_size
10188 of stack frame are unused. */
10189 if (ACCUMULATE_OUTGOING_ARGS
10190 && (!crtl->is_leaf || cfun->calls_alloca
10191 || ix86_current_function_calls_tls_descriptor))
10193 offset += crtl->outgoing_args_size;
10194 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10197 frame->outgoing_arguments_size = 0;
10199 /* Align stack boundary. Only needed if we're calling another function
10200 or using alloca. */
10201 if (!crtl->is_leaf || cfun->calls_alloca
10202 || ix86_current_function_calls_tls_descriptor)
10203 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10205 /* We've reached end of stack frame. */
10206 frame->stack_pointer_offset = offset;
10208 /* Size prologue needs to allocate. */
10209 to_allocate = offset - frame->sse_reg_save_offset;
10211 if ((!to_allocate && frame->nregs <= 1)
10212 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10213 frame->save_regs_using_mov = false;
10215 if (ix86_using_red_zone ()
10216 && crtl->sp_is_unchanging
10218 && !ix86_current_function_calls_tls_descriptor)
10220 frame->red_zone_size = to_allocate;
10221 if (frame->save_regs_using_mov)
10222 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10223 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10224 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10227 frame->red_zone_size = 0;
10228 frame->stack_pointer_offset -= frame->red_zone_size;
10230 /* The SEH frame pointer location is near the bottom of the frame.
10231 This is enforced by the fact that the difference between the
10232 stack pointer and the frame pointer is limited to 240 bytes in
10233 the unwind data structure. */
10236 HOST_WIDE_INT diff;
10238 /* If we can leave the frame pointer where it is, do so. Also, returns
10239 the establisher frame for __builtin_frame_address (0). */
10240 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10241 if (diff <= SEH_MAX_FRAME_SIZE
10242 && (diff > 240 || (diff & 15) != 0)
10243 && !crtl->accesses_prior_frames)
10245 /* Ideally we'd determine what portion of the local stack frame
10246 (within the constraint of the lowest 240) is most heavily used.
10247 But without that complication, simply bias the frame pointer
10248 by 128 bytes so as to maximize the amount of the local stack
10249 frame that is addressable with 8-bit offsets. */
10250 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10255 /* This is semi-inlined memory_address_length, but simplified
10256 since we know that we're always dealing with reg+offset, and
10257 to avoid having to create and discard all that rtl. */
10260 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10266 /* EBP and R13 cannot be encoded without an offset. */
10267 len = (regno == BP_REG || regno == R13_REG);
10269 else if (IN_RANGE (offset, -128, 127))
10272 /* ESP and R12 must be encoded with a SIB byte. */
10273 if (regno == SP_REG || regno == R12_REG)
10279 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10280 The valid base registers are taken from CFUN->MACHINE->FS. */
10283 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10285 const struct machine_function *m = cfun->machine;
10286 rtx base_reg = NULL;
10287 HOST_WIDE_INT base_offset = 0;
10289 if (m->use_fast_prologue_epilogue)
10291 /* Choose the base register most likely to allow the most scheduling
10292 opportunities. Generally FP is valid throughout the function,
10293 while DRAP must be reloaded within the epilogue. But choose either
10294 over the SP due to increased encoding size. */
10296 if (m->fs.fp_valid)
10298 base_reg = hard_frame_pointer_rtx;
10299 base_offset = m->fs.fp_offset - cfa_offset;
10301 else if (m->fs.drap_valid)
10303 base_reg = crtl->drap_reg;
10304 base_offset = 0 - cfa_offset;
10306 else if (m->fs.sp_valid)
10308 base_reg = stack_pointer_rtx;
10309 base_offset = m->fs.sp_offset - cfa_offset;
10314 HOST_WIDE_INT toffset;
10315 int len = 16, tlen;
10317 /* Choose the base register with the smallest address encoding.
10318 With a tie, choose FP > DRAP > SP. */
10319 if (m->fs.sp_valid)
10321 base_reg = stack_pointer_rtx;
10322 base_offset = m->fs.sp_offset - cfa_offset;
10323 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10325 if (m->fs.drap_valid)
10327 toffset = 0 - cfa_offset;
10328 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10331 base_reg = crtl->drap_reg;
10332 base_offset = toffset;
10336 if (m->fs.fp_valid)
10338 toffset = m->fs.fp_offset - cfa_offset;
10339 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10342 base_reg = hard_frame_pointer_rtx;
10343 base_offset = toffset;
10348 gcc_assert (base_reg != NULL);
10350 return plus_constant (Pmode, base_reg, base_offset);
10353 /* Emit code to save registers in the prologue. */
10356 ix86_emit_save_regs (void)
10358 unsigned int regno;
10361 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10362 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10364 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10365 RTX_FRAME_RELATED_P (insn) = 1;
10369 /* Emit a single register save at CFA - CFA_OFFSET. */
10372 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10373 HOST_WIDE_INT cfa_offset)
10375 struct machine_function *m = cfun->machine;
10376 rtx reg = gen_rtx_REG (mode, regno);
10377 rtx mem, addr, base, insn;
10379 addr = choose_baseaddr (cfa_offset);
10380 mem = gen_frame_mem (mode, addr);
10382 /* For SSE saves, we need to indicate the 128-bit alignment. */
10383 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10385 insn = emit_move_insn (mem, reg);
10386 RTX_FRAME_RELATED_P (insn) = 1;
10389 if (GET_CODE (base) == PLUS)
10390 base = XEXP (base, 0);
10391 gcc_checking_assert (REG_P (base));
10393 /* When saving registers into a re-aligned local stack frame, avoid
10394 any tricky guessing by dwarf2out. */
10395 if (m->fs.realigned)
10397 gcc_checking_assert (stack_realign_drap);
10399 if (regno == REGNO (crtl->drap_reg))
10401 /* A bit of a hack. We force the DRAP register to be saved in
10402 the re-aligned stack frame, which provides us with a copy
10403 of the CFA that will last past the prologue. Install it. */
10404 gcc_checking_assert (cfun->machine->fs.fp_valid);
10405 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10406 cfun->machine->fs.fp_offset - cfa_offset);
10407 mem = gen_rtx_MEM (mode, addr);
10408 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10412 /* The frame pointer is a stable reference within the
10413 aligned frame. Use it. */
10414 gcc_checking_assert (cfun->machine->fs.fp_valid);
10415 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10416 cfun->machine->fs.fp_offset - cfa_offset);
10417 mem = gen_rtx_MEM (mode, addr);
10418 add_reg_note (insn, REG_CFA_EXPRESSION,
10419 gen_rtx_SET (VOIDmode, mem, reg));
10423 /* The memory may not be relative to the current CFA register,
10424 which means that we may need to generate a new pattern for
10425 use by the unwind info. */
10426 else if (base != m->fs.cfa_reg)
10428 addr = plus_constant (Pmode, m->fs.cfa_reg,
10429 m->fs.cfa_offset - cfa_offset);
10430 mem = gen_rtx_MEM (mode, addr);
10431 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10435 /* Emit code to save registers using MOV insns.
10436 First register is stored at CFA - CFA_OFFSET. */
10438 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10440 unsigned int regno;
10442 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10443 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10445 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10446 cfa_offset -= UNITS_PER_WORD;
10450 /* Emit code to save SSE registers using MOV insns.
10451 First register is stored at CFA - CFA_OFFSET. */
10453 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10455 unsigned int regno;
10457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10458 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10460 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10465 static GTY(()) rtx queued_cfa_restores;
10467 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10468 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10469 Don't add the note if the previously saved value will be left untouched
10470 within stack red-zone till return, as unwinders can find the same value
10471 in the register and on the stack. */
10474 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10476 if (!crtl->shrink_wrapped
10477 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10482 add_reg_note (insn, REG_CFA_RESTORE, reg);
10483 RTX_FRAME_RELATED_P (insn) = 1;
10486 queued_cfa_restores
10487 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10490 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10493 ix86_add_queued_cfa_restore_notes (rtx insn)
10496 if (!queued_cfa_restores)
10498 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10500 XEXP (last, 1) = REG_NOTES (insn);
10501 REG_NOTES (insn) = queued_cfa_restores;
10502 queued_cfa_restores = NULL_RTX;
10503 RTX_FRAME_RELATED_P (insn) = 1;
10506 /* Expand prologue or epilogue stack adjustment.
10507 The pattern exist to put a dependency on all ebp-based memory accesses.
10508 STYLE should be negative if instructions should be marked as frame related,
10509 zero if %r11 register is live and cannot be freely used and positive
10513 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10514 int style, bool set_cfa)
10516 struct machine_function *m = cfun->machine;
10518 bool add_frame_related_expr = false;
10520 if (Pmode == SImode)
10521 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10522 else if (x86_64_immediate_operand (offset, DImode))
10523 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10527 /* r11 is used by indirect sibcall return as well, set before the
10528 epilogue and used after the epilogue. */
10530 tmp = gen_rtx_REG (DImode, R11_REG);
10533 gcc_assert (src != hard_frame_pointer_rtx
10534 && dest != hard_frame_pointer_rtx);
10535 tmp = hard_frame_pointer_rtx;
10537 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10539 add_frame_related_expr = true;
10541 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10544 insn = emit_insn (insn);
10546 ix86_add_queued_cfa_restore_notes (insn);
10552 gcc_assert (m->fs.cfa_reg == src);
10553 m->fs.cfa_offset += INTVAL (offset);
10554 m->fs.cfa_reg = dest;
10556 r = gen_rtx_PLUS (Pmode, src, offset);
10557 r = gen_rtx_SET (VOIDmode, dest, r);
10558 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10559 RTX_FRAME_RELATED_P (insn) = 1;
10561 else if (style < 0)
10563 RTX_FRAME_RELATED_P (insn) = 1;
10564 if (add_frame_related_expr)
10566 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10567 r = gen_rtx_SET (VOIDmode, dest, r);
10568 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10572 if (dest == stack_pointer_rtx)
10574 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10575 bool valid = m->fs.sp_valid;
10577 if (src == hard_frame_pointer_rtx)
10579 valid = m->fs.fp_valid;
10580 ooffset = m->fs.fp_offset;
10582 else if (src == crtl->drap_reg)
10584 valid = m->fs.drap_valid;
10589 /* Else there are two possibilities: SP itself, which we set
10590 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10591 taken care of this by hand along the eh_return path. */
10592 gcc_checking_assert (src == stack_pointer_rtx
10593 || offset == const0_rtx);
10596 m->fs.sp_offset = ooffset - INTVAL (offset);
10597 m->fs.sp_valid = valid;
10601 /* Find an available register to be used as dynamic realign argument
10602 pointer regsiter. Such a register will be written in prologue and
10603 used in begin of body, so it must not be
10604 1. parameter passing register.
10606 We reuse static-chain register if it is available. Otherwise, we
10607 use DI for i386 and R13 for x86-64. We chose R13 since it has
10610 Return: the regno of chosen register. */
10612 static unsigned int
10613 find_drap_reg (void)
10615 tree decl = cfun->decl;
10619 /* Use R13 for nested function or function need static chain.
10620 Since function with tail call may use any caller-saved
10621 registers in epilogue, DRAP must not use caller-saved
10622 register in such case. */
10623 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10630 /* Use DI for nested function or function need static chain.
10631 Since function with tail call may use any caller-saved
10632 registers in epilogue, DRAP must not use caller-saved
10633 register in such case. */
10634 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10637 /* Reuse static chain register if it isn't used for parameter
10639 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10641 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10642 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10649 /* Return minimum incoming stack alignment. */
10651 static unsigned int
10652 ix86_minimum_incoming_stack_boundary (bool sibcall)
10654 unsigned int incoming_stack_boundary;
10656 /* Prefer the one specified at command line. */
10657 if (ix86_user_incoming_stack_boundary)
10658 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10659 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10660 if -mstackrealign is used, it isn't used for sibcall check and
10661 estimated stack alignment is 128bit. */
10664 && ix86_force_align_arg_pointer
10665 && crtl->stack_alignment_estimated == 128)
10666 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10668 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10670 /* Incoming stack alignment can be changed on individual functions
10671 via force_align_arg_pointer attribute. We use the smallest
10672 incoming stack boundary. */
10673 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10674 && lookup_attribute (ix86_force_align_arg_pointer_string,
10675 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10676 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10678 /* The incoming stack frame has to be aligned at least at
10679 parm_stack_boundary. */
10680 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10681 incoming_stack_boundary = crtl->parm_stack_boundary;
10683 /* Stack at entrance of main is aligned by runtime. We use the
10684 smallest incoming stack boundary. */
10685 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10686 && DECL_NAME (current_function_decl)
10687 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10688 && DECL_FILE_SCOPE_P (current_function_decl))
10689 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10691 return incoming_stack_boundary;
10694 /* Update incoming stack boundary and estimated stack alignment. */
10697 ix86_update_stack_boundary (void)
10699 ix86_incoming_stack_boundary
10700 = ix86_minimum_incoming_stack_boundary (false);
10702 /* x86_64 vararg needs 16byte stack alignment for register save
10706 && crtl->stack_alignment_estimated < 128)
10707 crtl->stack_alignment_estimated = 128;
10710 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10711 needed or an rtx for DRAP otherwise. */
10714 ix86_get_drap_rtx (void)
10716 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10717 crtl->need_drap = true;
10719 if (stack_realign_drap)
10721 /* Assign DRAP to vDRAP and returns vDRAP */
10722 unsigned int regno = find_drap_reg ();
10725 rtx_insn *seq, *insn;
10727 arg_ptr = gen_rtx_REG (Pmode, regno);
10728 crtl->drap_reg = arg_ptr;
10731 drap_vreg = copy_to_reg (arg_ptr);
10732 seq = get_insns ();
10735 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10738 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10739 RTX_FRAME_RELATED_P (insn) = 1;
10747 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10750 ix86_internal_arg_pointer (void)
10752 return virtual_incoming_args_rtx;
10755 struct scratch_reg {
10760 /* Return a short-lived scratch register for use on function entry.
10761 In 32-bit mode, it is valid only after the registers are saved
10762 in the prologue. This register must be released by means of
10763 release_scratch_register_on_entry once it is dead. */
10766 get_scratch_register_on_entry (struct scratch_reg *sr)
10774 /* We always use R11 in 64-bit mode. */
10779 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10781 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10783 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10784 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10785 int regparm = ix86_function_regparm (fntype, decl);
10787 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10789 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10790 for the static chain register. */
10791 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10792 && drap_regno != AX_REG)
10794 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10795 for the static chain register. */
10796 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10798 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10800 /* ecx is the static chain register. */
10801 else if (regparm < 3 && !fastcall_p && !thiscall_p
10803 && drap_regno != CX_REG)
10805 else if (ix86_save_reg (BX_REG, true))
10807 /* esi is the static chain register. */
10808 else if (!(regparm == 3 && static_chain_p)
10809 && ix86_save_reg (SI_REG, true))
10811 else if (ix86_save_reg (DI_REG, true))
10815 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10820 sr->reg = gen_rtx_REG (Pmode, regno);
10823 rtx insn = emit_insn (gen_push (sr->reg));
10824 RTX_FRAME_RELATED_P (insn) = 1;
10828 /* Release a scratch register obtained from the preceding function. */
10831 release_scratch_register_on_entry (struct scratch_reg *sr)
10835 struct machine_function *m = cfun->machine;
10836 rtx x, insn = emit_insn (gen_pop (sr->reg));
10838 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10839 RTX_FRAME_RELATED_P (insn) = 1;
10840 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10841 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10842 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10843 m->fs.sp_offset -= UNITS_PER_WORD;
10847 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10849 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10852 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10854 /* We skip the probe for the first interval + a small dope of 4 words and
10855 probe that many bytes past the specified size to maintain a protection
10856 area at the botton of the stack. */
10857 const int dope = 4 * UNITS_PER_WORD;
10858 rtx size_rtx = GEN_INT (size), last;
10860 /* See if we have a constant small number of probes to generate. If so,
10861 that's the easy case. The run-time loop is made up of 11 insns in the
10862 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10863 for n # of intervals. */
10864 if (size <= 5 * PROBE_INTERVAL)
10866 HOST_WIDE_INT i, adjust;
10867 bool first_probe = true;
10869 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10870 values of N from 1 until it exceeds SIZE. If only one probe is
10871 needed, this will not generate any code. Then adjust and probe
10872 to PROBE_INTERVAL + SIZE. */
10873 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10877 adjust = 2 * PROBE_INTERVAL + dope;
10878 first_probe = false;
10881 adjust = PROBE_INTERVAL;
10883 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10884 plus_constant (Pmode, stack_pointer_rtx,
10886 emit_stack_probe (stack_pointer_rtx);
10890 adjust = size + PROBE_INTERVAL + dope;
10892 adjust = size + PROBE_INTERVAL - i;
10894 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10895 plus_constant (Pmode, stack_pointer_rtx,
10897 emit_stack_probe (stack_pointer_rtx);
10899 /* Adjust back to account for the additional first interval. */
10900 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10901 plus_constant (Pmode, stack_pointer_rtx,
10902 PROBE_INTERVAL + dope)));
10905 /* Otherwise, do the same as above, but in a loop. Note that we must be
10906 extra careful with variables wrapping around because we might be at
10907 the very top (or the very bottom) of the address space and we have
10908 to be able to handle this case properly; in particular, we use an
10909 equality test for the loop condition. */
10912 HOST_WIDE_INT rounded_size;
10913 struct scratch_reg sr;
10915 get_scratch_register_on_entry (&sr);
10918 /* Step 1: round SIZE to the previous multiple of the interval. */
10920 rounded_size = size & -PROBE_INTERVAL;
10923 /* Step 2: compute initial and final value of the loop counter. */
10925 /* SP = SP_0 + PROBE_INTERVAL. */
10926 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10927 plus_constant (Pmode, stack_pointer_rtx,
10928 - (PROBE_INTERVAL + dope))));
10930 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10931 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10932 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10933 gen_rtx_PLUS (Pmode, sr.reg,
10934 stack_pointer_rtx)));
10937 /* Step 3: the loop
10939 while (SP != LAST_ADDR)
10941 SP = SP + PROBE_INTERVAL
10945 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10946 values of N from 1 until it is equal to ROUNDED_SIZE. */
10948 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10951 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10952 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10954 if (size != rounded_size)
10956 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10957 plus_constant (Pmode, stack_pointer_rtx,
10958 rounded_size - size)));
10959 emit_stack_probe (stack_pointer_rtx);
10962 /* Adjust back to account for the additional first interval. */
10963 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10964 plus_constant (Pmode, stack_pointer_rtx,
10965 PROBE_INTERVAL + dope)));
10967 release_scratch_register_on_entry (&sr);
10970 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10972 /* Even if the stack pointer isn't the CFA register, we need to correctly
10973 describe the adjustments made to it, in particular differentiate the
10974 frame-related ones from the frame-unrelated ones. */
10977 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10978 XVECEXP (expr, 0, 0)
10979 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10980 plus_constant (Pmode, stack_pointer_rtx, -size));
10981 XVECEXP (expr, 0, 1)
10982 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10983 plus_constant (Pmode, stack_pointer_rtx,
10984 PROBE_INTERVAL + dope + size));
10985 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10986 RTX_FRAME_RELATED_P (last) = 1;
10988 cfun->machine->fs.sp_offset += size;
10991 /* Make sure nothing is scheduled before we are done. */
10992 emit_insn (gen_blockage ());
10995 /* Adjust the stack pointer up to REG while probing it. */
10998 output_adjust_stack_and_probe (rtx reg)
11000 static int labelno = 0;
11001 char loop_lab[32], end_lab[32];
11004 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11005 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11007 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11009 /* Jump to END_LAB if SP == LAST_ADDR. */
11010 xops[0] = stack_pointer_rtx;
11012 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11013 fputs ("\tje\t", asm_out_file);
11014 assemble_name_raw (asm_out_file, end_lab);
11015 fputc ('\n', asm_out_file);
11017 /* SP = SP + PROBE_INTERVAL. */
11018 xops[1] = GEN_INT (PROBE_INTERVAL);
11019 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11022 xops[1] = const0_rtx;
11023 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11025 fprintf (asm_out_file, "\tjmp\t");
11026 assemble_name_raw (asm_out_file, loop_lab);
11027 fputc ('\n', asm_out_file);
11029 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11034 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11035 inclusive. These are offsets from the current stack pointer. */
11038 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11040 /* See if we have a constant small number of probes to generate. If so,
11041 that's the easy case. The run-time loop is made up of 7 insns in the
11042 generic case while the compile-time loop is made up of n insns for n #
11044 if (size <= 7 * PROBE_INTERVAL)
11048 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11049 it exceeds SIZE. If only one probe is needed, this will not
11050 generate any code. Then probe at FIRST + SIZE. */
11051 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11052 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11055 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11059 /* Otherwise, do the same as above, but in a loop. Note that we must be
11060 extra careful with variables wrapping around because we might be at
11061 the very top (or the very bottom) of the address space and we have
11062 to be able to handle this case properly; in particular, we use an
11063 equality test for the loop condition. */
11066 HOST_WIDE_INT rounded_size, last;
11067 struct scratch_reg sr;
11069 get_scratch_register_on_entry (&sr);
11072 /* Step 1: round SIZE to the previous multiple of the interval. */
11074 rounded_size = size & -PROBE_INTERVAL;
11077 /* Step 2: compute initial and final value of the loop counter. */
11079 /* TEST_OFFSET = FIRST. */
11080 emit_move_insn (sr.reg, GEN_INT (-first));
11082 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11083 last = first + rounded_size;
11086 /* Step 3: the loop
11088 while (TEST_ADDR != LAST_ADDR)
11090 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11094 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11095 until it is equal to ROUNDED_SIZE. */
11097 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11100 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11101 that SIZE is equal to ROUNDED_SIZE. */
11103 if (size != rounded_size)
11104 emit_stack_probe (plus_constant (Pmode,
11105 gen_rtx_PLUS (Pmode,
11108 rounded_size - size));
11110 release_scratch_register_on_entry (&sr);
11113 /* Make sure nothing is scheduled before we are done. */
11114 emit_insn (gen_blockage ());
11117 /* Probe a range of stack addresses from REG to END, inclusive. These are
11118 offsets from the current stack pointer. */
11121 output_probe_stack_range (rtx reg, rtx end)
11123 static int labelno = 0;
11124 char loop_lab[32], end_lab[32];
11127 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11128 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11130 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11132 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11135 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11136 fputs ("\tje\t", asm_out_file);
11137 assemble_name_raw (asm_out_file, end_lab);
11138 fputc ('\n', asm_out_file);
11140 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11141 xops[1] = GEN_INT (PROBE_INTERVAL);
11142 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11144 /* Probe at TEST_ADDR. */
11145 xops[0] = stack_pointer_rtx;
11147 xops[2] = const0_rtx;
11148 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11150 fprintf (asm_out_file, "\tjmp\t");
11151 assemble_name_raw (asm_out_file, loop_lab);
11152 fputc ('\n', asm_out_file);
11154 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11159 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11160 to be generated in correct form. */
11162 ix86_finalize_stack_realign_flags (void)
11164 /* Check if stack realign is really needed after reload, and
11165 stores result in cfun */
11166 unsigned int incoming_stack_boundary
11167 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11168 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11169 unsigned int stack_realign = (incoming_stack_boundary
11171 ? crtl->max_used_stack_slot_alignment
11172 : crtl->stack_alignment_needed));
11174 if (crtl->stack_realign_finalized)
11176 /* After stack_realign_needed is finalized, we can't no longer
11178 gcc_assert (crtl->stack_realign_needed == stack_realign);
11182 /* If the only reason for frame_pointer_needed is that we conservatively
11183 assumed stack realignment might be needed, but in the end nothing that
11184 needed the stack alignment had been spilled, clear frame_pointer_needed
11185 and say we don't need stack realignment. */
11187 && frame_pointer_needed
11189 && flag_omit_frame_pointer
11190 && crtl->sp_is_unchanging
11191 && !ix86_current_function_calls_tls_descriptor
11192 && !crtl->accesses_prior_frames
11193 && !cfun->calls_alloca
11194 && !crtl->calls_eh_return
11195 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11196 && !ix86_frame_pointer_required ()
11197 && get_frame_size () == 0
11198 && ix86_nsaved_sseregs () == 0
11199 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11201 HARD_REG_SET set_up_by_prologue, prologue_used;
11204 CLEAR_HARD_REG_SET (prologue_used);
11205 CLEAR_HARD_REG_SET (set_up_by_prologue);
11206 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11207 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11208 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11209 HARD_FRAME_POINTER_REGNUM);
11210 FOR_EACH_BB_FN (bb, cfun)
11213 FOR_BB_INSNS (bb, insn)
11214 if (NONDEBUG_INSN_P (insn)
11215 && requires_stack_frame_p (insn, prologue_used,
11216 set_up_by_prologue))
11218 crtl->stack_realign_needed = stack_realign;
11219 crtl->stack_realign_finalized = true;
11224 /* If drap has been set, but it actually isn't live at the start
11225 of the function, there is no reason to set it up. */
11226 if (crtl->drap_reg)
11228 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11229 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11231 crtl->drap_reg = NULL_RTX;
11232 crtl->need_drap = false;
11236 cfun->machine->no_drap_save_restore = true;
11238 frame_pointer_needed = false;
11239 stack_realign = false;
11240 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11241 crtl->stack_alignment_needed = incoming_stack_boundary;
11242 crtl->stack_alignment_estimated = incoming_stack_boundary;
11243 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11244 crtl->preferred_stack_boundary = incoming_stack_boundary;
11245 df_finish_pass (true);
11246 df_scan_alloc (NULL);
11248 df_compute_regs_ever_live (true);
11252 crtl->stack_realign_needed = stack_realign;
11253 crtl->stack_realign_finalized = true;
11256 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11259 ix86_elim_entry_set_got (rtx reg)
11261 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11262 rtx_insn *c_insn = BB_HEAD (bb);
11263 if (!NONDEBUG_INSN_P (c_insn))
11264 c_insn = next_nonnote_nondebug_insn (c_insn);
11265 if (c_insn && NONJUMP_INSN_P (c_insn))
11267 rtx pat = PATTERN (c_insn);
11268 if (GET_CODE (pat) == PARALLEL)
11270 rtx vec = XVECEXP (pat, 0, 0);
11271 if (GET_CODE (vec) == SET
11272 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11273 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11274 delete_insn (c_insn);
11279 /* Expand the prologue into a bunch of separate insns. */
11282 ix86_expand_prologue (void)
11284 struct machine_function *m = cfun->machine;
11286 struct ix86_frame frame;
11287 HOST_WIDE_INT allocate;
11288 bool int_registers_saved;
11289 bool sse_registers_saved;
11291 ix86_finalize_stack_realign_flags ();
11293 /* DRAP should not coexist with stack_realign_fp */
11294 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11296 memset (&m->fs, 0, sizeof (m->fs));
11298 /* Initialize CFA state for before the prologue. */
11299 m->fs.cfa_reg = stack_pointer_rtx;
11300 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11302 /* Track SP offset to the CFA. We continue tracking this after we've
11303 swapped the CFA register away from SP. In the case of re-alignment
11304 this is fudged; we're interested to offsets within the local frame. */
11305 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11306 m->fs.sp_valid = true;
11308 ix86_compute_frame_layout (&frame);
11310 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11312 /* We should have already generated an error for any use of
11313 ms_hook on a nested function. */
11314 gcc_checking_assert (!ix86_static_chain_on_stack);
11316 /* Check if profiling is active and we shall use profiling before
11317 prologue variant. If so sorry. */
11318 if (crtl->profile && flag_fentry != 0)
11319 sorry ("ms_hook_prologue attribute isn%'t compatible "
11320 "with -mfentry for 32-bit");
11322 /* In ix86_asm_output_function_label we emitted:
11323 8b ff movl.s %edi,%edi
11325 8b ec movl.s %esp,%ebp
11327 This matches the hookable function prologue in Win32 API
11328 functions in Microsoft Windows XP Service Pack 2 and newer.
11329 Wine uses this to enable Windows apps to hook the Win32 API
11330 functions provided by Wine.
11332 What that means is that we've already set up the frame pointer. */
11334 if (frame_pointer_needed
11335 && !(crtl->drap_reg && crtl->stack_realign_needed))
11339 /* We've decided to use the frame pointer already set up.
11340 Describe this to the unwinder by pretending that both
11341 push and mov insns happen right here.
11343 Putting the unwind info here at the end of the ms_hook
11344 is done so that we can make absolutely certain we get
11345 the required byte sequence at the start of the function,
11346 rather than relying on an assembler that can produce
11347 the exact encoding required.
11349 However it does mean (in the unpatched case) that we have
11350 a 1 insn window where the asynchronous unwind info is
11351 incorrect. However, if we placed the unwind info at
11352 its correct location we would have incorrect unwind info
11353 in the patched case. Which is probably all moot since
11354 I don't expect Wine generates dwarf2 unwind info for the
11355 system libraries that use this feature. */
11357 insn = emit_insn (gen_blockage ());
11359 push = gen_push (hard_frame_pointer_rtx);
11360 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11361 stack_pointer_rtx);
11362 RTX_FRAME_RELATED_P (push) = 1;
11363 RTX_FRAME_RELATED_P (mov) = 1;
11365 RTX_FRAME_RELATED_P (insn) = 1;
11366 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11367 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11369 /* Note that gen_push incremented m->fs.cfa_offset, even
11370 though we didn't emit the push insn here. */
11371 m->fs.cfa_reg = hard_frame_pointer_rtx;
11372 m->fs.fp_offset = m->fs.cfa_offset;
11373 m->fs.fp_valid = true;
11377 /* The frame pointer is not needed so pop %ebp again.
11378 This leaves us with a pristine state. */
11379 emit_insn (gen_pop (hard_frame_pointer_rtx));
11383 /* The first insn of a function that accepts its static chain on the
11384 stack is to push the register that would be filled in by a direct
11385 call. This insn will be skipped by the trampoline. */
11386 else if (ix86_static_chain_on_stack)
11388 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11389 emit_insn (gen_blockage ());
11391 /* We don't want to interpret this push insn as a register save,
11392 only as a stack adjustment. The real copy of the register as
11393 a save will be done later, if needed. */
11394 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11395 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11396 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11397 RTX_FRAME_RELATED_P (insn) = 1;
11400 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11401 of DRAP is needed and stack realignment is really needed after reload */
11402 if (stack_realign_drap)
11404 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11406 /* Only need to push parameter pointer reg if it is caller saved. */
11407 if (!call_used_regs[REGNO (crtl->drap_reg)])
11409 /* Push arg pointer reg */
11410 insn = emit_insn (gen_push (crtl->drap_reg));
11411 RTX_FRAME_RELATED_P (insn) = 1;
11414 /* Grab the argument pointer. */
11415 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11416 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11417 RTX_FRAME_RELATED_P (insn) = 1;
11418 m->fs.cfa_reg = crtl->drap_reg;
11419 m->fs.cfa_offset = 0;
11421 /* Align the stack. */
11422 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11424 GEN_INT (-align_bytes)));
11425 RTX_FRAME_RELATED_P (insn) = 1;
11427 /* Replicate the return address on the stack so that return
11428 address can be reached via (argp - 1) slot. This is needed
11429 to implement macro RETURN_ADDR_RTX and intrinsic function
11430 expand_builtin_return_addr etc. */
11431 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11432 t = gen_frame_mem (word_mode, t);
11433 insn = emit_insn (gen_push (t));
11434 RTX_FRAME_RELATED_P (insn) = 1;
11436 /* For the purposes of frame and register save area addressing,
11437 we've started over with a new frame. */
11438 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11439 m->fs.realigned = true;
11442 int_registers_saved = (frame.nregs == 0);
11443 sse_registers_saved = (frame.nsseregs == 0);
11445 if (frame_pointer_needed && !m->fs.fp_valid)
11447 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11448 slower on all targets. Also sdb doesn't like it. */
11449 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11450 RTX_FRAME_RELATED_P (insn) = 1;
11452 /* Push registers now, before setting the frame pointer
11454 if (!int_registers_saved
11456 && !frame.save_regs_using_mov)
11458 ix86_emit_save_regs ();
11459 int_registers_saved = true;
11460 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11463 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11465 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11466 RTX_FRAME_RELATED_P (insn) = 1;
11468 if (m->fs.cfa_reg == stack_pointer_rtx)
11469 m->fs.cfa_reg = hard_frame_pointer_rtx;
11470 m->fs.fp_offset = m->fs.sp_offset;
11471 m->fs.fp_valid = true;
11475 if (!int_registers_saved)
11477 /* If saving registers via PUSH, do so now. */
11478 if (!frame.save_regs_using_mov)
11480 ix86_emit_save_regs ();
11481 int_registers_saved = true;
11482 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11485 /* When using red zone we may start register saving before allocating
11486 the stack frame saving one cycle of the prologue. However, avoid
11487 doing this if we have to probe the stack; at least on x86_64 the
11488 stack probe can turn into a call that clobbers a red zone location. */
11489 else if (ix86_using_red_zone ()
11490 && (! TARGET_STACK_PROBE
11491 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11493 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11494 int_registers_saved = true;
11498 if (stack_realign_fp)
11500 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11501 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11503 /* The computation of the size of the re-aligned stack frame means
11504 that we must allocate the size of the register save area before
11505 performing the actual alignment. Otherwise we cannot guarantee
11506 that there's enough storage above the realignment point. */
11507 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11508 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11509 GEN_INT (m->fs.sp_offset
11510 - frame.sse_reg_save_offset),
11513 /* Align the stack. */
11514 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11516 GEN_INT (-align_bytes)));
11518 /* For the purposes of register save area addressing, the stack
11519 pointer is no longer valid. As for the value of sp_offset,
11520 see ix86_compute_frame_layout, which we need to match in order
11521 to pass verification of stack_pointer_offset at the end. */
11522 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11523 m->fs.sp_valid = false;
11526 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11528 if (flag_stack_usage_info)
11530 /* We start to count from ARG_POINTER. */
11531 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11533 /* If it was realigned, take into account the fake frame. */
11534 if (stack_realign_drap)
11536 if (ix86_static_chain_on_stack)
11537 stack_size += UNITS_PER_WORD;
11539 if (!call_used_regs[REGNO (crtl->drap_reg)])
11540 stack_size += UNITS_PER_WORD;
11542 /* This over-estimates by 1 minimal-stack-alignment-unit but
11543 mitigates that by counting in the new return address slot. */
11544 current_function_dynamic_stack_size
11545 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11548 current_function_static_stack_size = stack_size;
11551 /* On SEH target with very large frame size, allocate an area to save
11552 SSE registers (as the very large allocation won't be described). */
11554 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11555 && !sse_registers_saved)
11557 HOST_WIDE_INT sse_size =
11558 frame.sse_reg_save_offset - frame.reg_save_offset;
11560 gcc_assert (int_registers_saved);
11562 /* No need to do stack checking as the area will be immediately
11564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11565 GEN_INT (-sse_size), -1,
11566 m->fs.cfa_reg == stack_pointer_rtx);
11567 allocate -= sse_size;
11568 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11569 sse_registers_saved = true;
11572 /* The stack has already been decremented by the instruction calling us
11573 so probe if the size is non-negative to preserve the protection area. */
11574 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11576 /* We expect the registers to be saved when probes are used. */
11577 gcc_assert (int_registers_saved);
11579 if (STACK_CHECK_MOVING_SP)
11581 if (!(crtl->is_leaf && !cfun->calls_alloca
11582 && allocate <= PROBE_INTERVAL))
11584 ix86_adjust_stack_and_probe (allocate);
11590 HOST_WIDE_INT size = allocate;
11592 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11593 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11595 if (TARGET_STACK_PROBE)
11597 if (crtl->is_leaf && !cfun->calls_alloca)
11599 if (size > PROBE_INTERVAL)
11600 ix86_emit_probe_stack_range (0, size);
11603 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11607 if (crtl->is_leaf && !cfun->calls_alloca)
11609 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11610 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11611 size - STACK_CHECK_PROTECT);
11614 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11621 else if (!ix86_target_stack_probe ()
11622 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11624 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11625 GEN_INT (-allocate), -1,
11626 m->fs.cfa_reg == stack_pointer_rtx);
11630 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11632 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11633 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11634 bool eax_live = ix86_eax_live_at_start_p ();
11635 bool r10_live = false;
11638 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11642 insn = emit_insn (gen_push (eax));
11643 allocate -= UNITS_PER_WORD;
11644 /* Note that SEH directives need to continue tracking the stack
11645 pointer even after the frame pointer has been set up. */
11646 if (sp_is_cfa_reg || TARGET_SEH)
11649 m->fs.cfa_offset += UNITS_PER_WORD;
11650 RTX_FRAME_RELATED_P (insn) = 1;
11651 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11652 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11653 plus_constant (Pmode, stack_pointer_rtx,
11654 -UNITS_PER_WORD)));
11660 r10 = gen_rtx_REG (Pmode, R10_REG);
11661 insn = emit_insn (gen_push (r10));
11662 allocate -= UNITS_PER_WORD;
11663 if (sp_is_cfa_reg || TARGET_SEH)
11666 m->fs.cfa_offset += UNITS_PER_WORD;
11667 RTX_FRAME_RELATED_P (insn) = 1;
11668 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11669 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11670 plus_constant (Pmode, stack_pointer_rtx,
11671 -UNITS_PER_WORD)));
11675 emit_move_insn (eax, GEN_INT (allocate));
11676 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11678 /* Use the fact that AX still contains ALLOCATE. */
11679 adjust_stack_insn = (Pmode == DImode
11680 ? gen_pro_epilogue_adjust_stack_di_sub
11681 : gen_pro_epilogue_adjust_stack_si_sub);
11683 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11684 stack_pointer_rtx, eax));
11686 if (sp_is_cfa_reg || TARGET_SEH)
11689 m->fs.cfa_offset += allocate;
11690 RTX_FRAME_RELATED_P (insn) = 1;
11691 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11692 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11693 plus_constant (Pmode, stack_pointer_rtx,
11696 m->fs.sp_offset += allocate;
11698 /* Use stack_pointer_rtx for relative addressing so that code
11699 works for realigned stack, too. */
11700 if (r10_live && eax_live)
11702 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11703 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11704 gen_frame_mem (word_mode, t));
11705 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11706 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11707 gen_frame_mem (word_mode, t));
11709 else if (eax_live || r10_live)
11711 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11712 emit_move_insn (gen_rtx_REG (word_mode,
11713 (eax_live ? AX_REG : R10_REG)),
11714 gen_frame_mem (word_mode, t));
11717 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11719 /* If we havn't already set up the frame pointer, do so now. */
11720 if (frame_pointer_needed && !m->fs.fp_valid)
11722 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11723 GEN_INT (frame.stack_pointer_offset
11724 - frame.hard_frame_pointer_offset));
11725 insn = emit_insn (insn);
11726 RTX_FRAME_RELATED_P (insn) = 1;
11727 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11729 if (m->fs.cfa_reg == stack_pointer_rtx)
11730 m->fs.cfa_reg = hard_frame_pointer_rtx;
11731 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11732 m->fs.fp_valid = true;
11735 if (!int_registers_saved)
11736 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11737 if (!sse_registers_saved)
11738 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11740 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11742 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11744 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11745 insn = emit_insn (gen_set_got (pic));
11746 RTX_FRAME_RELATED_P (insn) = 1;
11747 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11748 emit_insn (gen_prologue_use (pic));
11749 /* Deleting already emmitted SET_GOT if exist and allocated to
11750 REAL_PIC_OFFSET_TABLE_REGNUM. */
11751 ix86_elim_entry_set_got (pic);
11754 if (crtl->drap_reg && !crtl->stack_realign_needed)
11756 /* vDRAP is setup but after reload it turns out stack realign
11757 isn't necessary, here we will emit prologue to setup DRAP
11758 without stack realign adjustment */
11759 t = choose_baseaddr (0);
11760 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11763 /* Prevent instructions from being scheduled into register save push
11764 sequence when access to the redzone area is done through frame pointer.
11765 The offset between the frame pointer and the stack pointer is calculated
11766 relative to the value of the stack pointer at the end of the function
11767 prologue, and moving instructions that access redzone area via frame
11768 pointer inside push sequence violates this assumption. */
11769 if (frame_pointer_needed && frame.red_zone_size)
11770 emit_insn (gen_memory_blockage ());
11772 /* Emit cld instruction if stringops are used in the function. */
11773 if (TARGET_CLD && ix86_current_function_needs_cld)
11774 emit_insn (gen_cld ());
11776 /* SEH requires that the prologue end within 256 bytes of the start of
11777 the function. Prevent instruction schedules that would extend that.
11778 Further, prevent alloca modifications to the stack pointer from being
11779 combined with prologue modifications. */
11781 emit_insn (gen_prologue_use (stack_pointer_rtx));
11784 /* Emit code to restore REG using a POP insn. */
11787 ix86_emit_restore_reg_using_pop (rtx reg)
11789 struct machine_function *m = cfun->machine;
11790 rtx insn = emit_insn (gen_pop (reg));
11792 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11793 m->fs.sp_offset -= UNITS_PER_WORD;
11795 if (m->fs.cfa_reg == crtl->drap_reg
11796 && REGNO (reg) == REGNO (crtl->drap_reg))
11798 /* Previously we'd represented the CFA as an expression
11799 like *(%ebp - 8). We've just popped that value from
11800 the stack, which means we need to reset the CFA to
11801 the drap register. This will remain until we restore
11802 the stack pointer. */
11803 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11804 RTX_FRAME_RELATED_P (insn) = 1;
11806 /* This means that the DRAP register is valid for addressing too. */
11807 m->fs.drap_valid = true;
11811 if (m->fs.cfa_reg == stack_pointer_rtx)
11813 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11814 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11815 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11816 RTX_FRAME_RELATED_P (insn) = 1;
11818 m->fs.cfa_offset -= UNITS_PER_WORD;
11821 /* When the frame pointer is the CFA, and we pop it, we are
11822 swapping back to the stack pointer as the CFA. This happens
11823 for stack frames that don't allocate other data, so we assume
11824 the stack pointer is now pointing at the return address, i.e.
11825 the function entry state, which makes the offset be 1 word. */
11826 if (reg == hard_frame_pointer_rtx)
11828 m->fs.fp_valid = false;
11829 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11831 m->fs.cfa_reg = stack_pointer_rtx;
11832 m->fs.cfa_offset -= UNITS_PER_WORD;
11834 add_reg_note (insn, REG_CFA_DEF_CFA,
11835 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11836 GEN_INT (m->fs.cfa_offset)));
11837 RTX_FRAME_RELATED_P (insn) = 1;
11842 /* Emit code to restore saved registers using POP insns. */
11845 ix86_emit_restore_regs_using_pop (void)
11847 unsigned int regno;
11849 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11850 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11851 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11854 /* Emit code and notes for the LEAVE instruction. */
11857 ix86_emit_leave (void)
11859 struct machine_function *m = cfun->machine;
11860 rtx insn = emit_insn (ix86_gen_leave ());
11862 ix86_add_queued_cfa_restore_notes (insn);
11864 gcc_assert (m->fs.fp_valid);
11865 m->fs.sp_valid = true;
11866 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11867 m->fs.fp_valid = false;
11869 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11871 m->fs.cfa_reg = stack_pointer_rtx;
11872 m->fs.cfa_offset = m->fs.sp_offset;
11874 add_reg_note (insn, REG_CFA_DEF_CFA,
11875 plus_constant (Pmode, stack_pointer_rtx,
11877 RTX_FRAME_RELATED_P (insn) = 1;
11879 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11883 /* Emit code to restore saved registers using MOV insns.
11884 First register is restored from CFA - CFA_OFFSET. */
11886 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11887 bool maybe_eh_return)
11889 struct machine_function *m = cfun->machine;
11890 unsigned int regno;
11892 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11893 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11895 rtx reg = gen_rtx_REG (word_mode, regno);
11898 mem = choose_baseaddr (cfa_offset);
11899 mem = gen_frame_mem (word_mode, mem);
11900 insn = emit_move_insn (reg, mem);
11902 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11904 /* Previously we'd represented the CFA as an expression
11905 like *(%ebp - 8). We've just popped that value from
11906 the stack, which means we need to reset the CFA to
11907 the drap register. This will remain until we restore
11908 the stack pointer. */
11909 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11910 RTX_FRAME_RELATED_P (insn) = 1;
11912 /* This means that the DRAP register is valid for addressing. */
11913 m->fs.drap_valid = true;
11916 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11918 cfa_offset -= UNITS_PER_WORD;
11922 /* Emit code to restore saved registers using MOV insns.
11923 First register is restored from CFA - CFA_OFFSET. */
11925 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11926 bool maybe_eh_return)
11928 unsigned int regno;
11930 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11931 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11933 rtx reg = gen_rtx_REG (V4SFmode, regno);
11936 mem = choose_baseaddr (cfa_offset);
11937 mem = gen_rtx_MEM (V4SFmode, mem);
11938 set_mem_align (mem, 128);
11939 emit_move_insn (reg, mem);
11941 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11947 /* Restore function stack, frame, and registers. */
11950 ix86_expand_epilogue (int style)
11952 struct machine_function *m = cfun->machine;
11953 struct machine_frame_state frame_state_save = m->fs;
11954 struct ix86_frame frame;
11955 bool restore_regs_via_mov;
11958 ix86_finalize_stack_realign_flags ();
11959 ix86_compute_frame_layout (&frame);
11961 m->fs.sp_valid = (!frame_pointer_needed
11962 || (crtl->sp_is_unchanging
11963 && !stack_realign_fp));
11964 gcc_assert (!m->fs.sp_valid
11965 || m->fs.sp_offset == frame.stack_pointer_offset);
11967 /* The FP must be valid if the frame pointer is present. */
11968 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11969 gcc_assert (!m->fs.fp_valid
11970 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11972 /* We must have *some* valid pointer to the stack frame. */
11973 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11975 /* The DRAP is never valid at this point. */
11976 gcc_assert (!m->fs.drap_valid);
11978 /* See the comment about red zone and frame
11979 pointer usage in ix86_expand_prologue. */
11980 if (frame_pointer_needed && frame.red_zone_size)
11981 emit_insn (gen_memory_blockage ());
11983 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11984 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11986 /* Determine the CFA offset of the end of the red-zone. */
11987 m->fs.red_zone_offset = 0;
11988 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11990 /* The red-zone begins below the return address. */
11991 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11993 /* When the register save area is in the aligned portion of
11994 the stack, determine the maximum runtime displacement that
11995 matches up with the aligned frame. */
11996 if (stack_realign_drap)
11997 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12001 /* Special care must be taken for the normal return case of a function
12002 using eh_return: the eax and edx registers are marked as saved, but
12003 not restored along this path. Adjust the save location to match. */
12004 if (crtl->calls_eh_return && style != 2)
12005 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12007 /* EH_RETURN requires the use of moves to function properly. */
12008 if (crtl->calls_eh_return)
12009 restore_regs_via_mov = true;
12010 /* SEH requires the use of pops to identify the epilogue. */
12011 else if (TARGET_SEH)
12012 restore_regs_via_mov = false;
12013 /* If we're only restoring one register and sp is not valid then
12014 using a move instruction to restore the register since it's
12015 less work than reloading sp and popping the register. */
12016 else if (!m->fs.sp_valid && frame.nregs <= 1)
12017 restore_regs_via_mov = true;
12018 else if (TARGET_EPILOGUE_USING_MOVE
12019 && cfun->machine->use_fast_prologue_epilogue
12020 && (frame.nregs > 1
12021 || m->fs.sp_offset != frame.reg_save_offset))
12022 restore_regs_via_mov = true;
12023 else if (frame_pointer_needed
12025 && m->fs.sp_offset != frame.reg_save_offset)
12026 restore_regs_via_mov = true;
12027 else if (frame_pointer_needed
12028 && TARGET_USE_LEAVE
12029 && cfun->machine->use_fast_prologue_epilogue
12030 && frame.nregs == 1)
12031 restore_regs_via_mov = true;
12033 restore_regs_via_mov = false;
12035 if (restore_regs_via_mov || frame.nsseregs)
12037 /* Ensure that the entire register save area is addressable via
12038 the stack pointer, if we will restore via sp. */
12040 && m->fs.sp_offset > 0x7fffffff
12041 && !(m->fs.fp_valid || m->fs.drap_valid)
12042 && (frame.nsseregs + frame.nregs) != 0)
12044 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12045 GEN_INT (m->fs.sp_offset
12046 - frame.sse_reg_save_offset),
12048 m->fs.cfa_reg == stack_pointer_rtx);
12052 /* If there are any SSE registers to restore, then we have to do it
12053 via moves, since there's obviously no pop for SSE regs. */
12054 if (frame.nsseregs)
12055 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12058 if (restore_regs_via_mov)
12063 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12065 /* eh_return epilogues need %ecx added to the stack pointer. */
12068 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12070 /* Stack align doesn't work with eh_return. */
12071 gcc_assert (!stack_realign_drap);
12072 /* Neither does regparm nested functions. */
12073 gcc_assert (!ix86_static_chain_on_stack);
12075 if (frame_pointer_needed)
12077 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12078 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12079 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12081 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12082 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12084 /* Note that we use SA as a temporary CFA, as the return
12085 address is at the proper place relative to it. We
12086 pretend this happens at the FP restore insn because
12087 prior to this insn the FP would be stored at the wrong
12088 offset relative to SA, and after this insn we have no
12089 other reasonable register to use for the CFA. We don't
12090 bother resetting the CFA to the SP for the duration of
12091 the return insn. */
12092 add_reg_note (insn, REG_CFA_DEF_CFA,
12093 plus_constant (Pmode, sa, UNITS_PER_WORD));
12094 ix86_add_queued_cfa_restore_notes (insn);
12095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12096 RTX_FRAME_RELATED_P (insn) = 1;
12098 m->fs.cfa_reg = sa;
12099 m->fs.cfa_offset = UNITS_PER_WORD;
12100 m->fs.fp_valid = false;
12102 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12103 const0_rtx, style, false);
12107 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12108 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12109 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12110 ix86_add_queued_cfa_restore_notes (insn);
12112 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12113 if (m->fs.cfa_offset != UNITS_PER_WORD)
12115 m->fs.cfa_offset = UNITS_PER_WORD;
12116 add_reg_note (insn, REG_CFA_DEF_CFA,
12117 plus_constant (Pmode, stack_pointer_rtx,
12119 RTX_FRAME_RELATED_P (insn) = 1;
12122 m->fs.sp_offset = UNITS_PER_WORD;
12123 m->fs.sp_valid = true;
12128 /* SEH requires that the function end with (1) a stack adjustment
12129 if necessary, (2) a sequence of pops, and (3) a return or
12130 jump instruction. Prevent insns from the function body from
12131 being scheduled into this sequence. */
12134 /* Prevent a catch region from being adjacent to the standard
12135 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12136 several other flags that would be interesting to test are
12138 if (flag_non_call_exceptions)
12139 emit_insn (gen_nops (const1_rtx));
12141 emit_insn (gen_blockage ());
12144 /* First step is to deallocate the stack frame so that we can
12145 pop the registers. Also do it on SEH target for very large
12146 frame as the emitted instructions aren't allowed by the ABI in
12148 if (!m->fs.sp_valid
12150 && (m->fs.sp_offset - frame.reg_save_offset
12151 >= SEH_MAX_FRAME_SIZE)))
12153 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12154 GEN_INT (m->fs.fp_offset
12155 - frame.reg_save_offset),
12158 else if (m->fs.sp_offset != frame.reg_save_offset)
12160 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12161 GEN_INT (m->fs.sp_offset
12162 - frame.reg_save_offset),
12164 m->fs.cfa_reg == stack_pointer_rtx);
12167 ix86_emit_restore_regs_using_pop ();
12170 /* If we used a stack pointer and haven't already got rid of it,
12172 if (m->fs.fp_valid)
12174 /* If the stack pointer is valid and pointing at the frame
12175 pointer store address, then we only need a pop. */
12176 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12177 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12178 /* Leave results in shorter dependency chains on CPUs that are
12179 able to grok it fast. */
12180 else if (TARGET_USE_LEAVE
12181 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12182 || !cfun->machine->use_fast_prologue_epilogue)
12183 ix86_emit_leave ();
12186 pro_epilogue_adjust_stack (stack_pointer_rtx,
12187 hard_frame_pointer_rtx,
12188 const0_rtx, style, !using_drap);
12189 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12195 int param_ptr_offset = UNITS_PER_WORD;
12198 gcc_assert (stack_realign_drap);
12200 if (ix86_static_chain_on_stack)
12201 param_ptr_offset += UNITS_PER_WORD;
12202 if (!call_used_regs[REGNO (crtl->drap_reg)])
12203 param_ptr_offset += UNITS_PER_WORD;
12205 insn = emit_insn (gen_rtx_SET
12206 (VOIDmode, stack_pointer_rtx,
12207 gen_rtx_PLUS (Pmode,
12209 GEN_INT (-param_ptr_offset))));
12210 m->fs.cfa_reg = stack_pointer_rtx;
12211 m->fs.cfa_offset = param_ptr_offset;
12212 m->fs.sp_offset = param_ptr_offset;
12213 m->fs.realigned = false;
12215 add_reg_note (insn, REG_CFA_DEF_CFA,
12216 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12217 GEN_INT (param_ptr_offset)));
12218 RTX_FRAME_RELATED_P (insn) = 1;
12220 if (!call_used_regs[REGNO (crtl->drap_reg)])
12221 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12224 /* At this point the stack pointer must be valid, and we must have
12225 restored all of the registers. We may not have deallocated the
12226 entire stack frame. We've delayed this until now because it may
12227 be possible to merge the local stack deallocation with the
12228 deallocation forced by ix86_static_chain_on_stack. */
12229 gcc_assert (m->fs.sp_valid);
12230 gcc_assert (!m->fs.fp_valid);
12231 gcc_assert (!m->fs.realigned);
12232 if (m->fs.sp_offset != UNITS_PER_WORD)
12234 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12235 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12239 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12241 /* Sibcall epilogues don't want a return instruction. */
12244 m->fs = frame_state_save;
12248 if (crtl->args.pops_args && crtl->args.size)
12250 rtx popc = GEN_INT (crtl->args.pops_args);
12252 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12253 address, do explicit add, and jump indirectly to the caller. */
12255 if (crtl->args.pops_args >= 65536)
12257 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12260 /* There is no "pascal" calling convention in any 64bit ABI. */
12261 gcc_assert (!TARGET_64BIT);
12263 insn = emit_insn (gen_pop (ecx));
12264 m->fs.cfa_offset -= UNITS_PER_WORD;
12265 m->fs.sp_offset -= UNITS_PER_WORD;
12267 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12268 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12269 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12270 add_reg_note (insn, REG_CFA_REGISTER,
12271 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12272 RTX_FRAME_RELATED_P (insn) = 1;
12274 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12276 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12279 emit_jump_insn (gen_simple_return_pop_internal (popc));
12282 emit_jump_insn (gen_simple_return_internal ());
12284 /* Restore the state back to the state from the prologue,
12285 so that it's correct for the next epilogue. */
12286 m->fs = frame_state_save;
12289 /* Reset from the function's potential modifications. */
12292 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12294 if (pic_offset_table_rtx
12295 && !ix86_use_pseudo_pic_reg ())
12296 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12298 /* Mach-O doesn't support labels at the end of objects, so if
12299 it looks like we might want one, insert a NOP. */
12301 rtx_insn *insn = get_last_insn ();
12302 rtx_insn *deleted_debug_label = NULL;
12305 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12307 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12308 notes only, instead set their CODE_LABEL_NUMBER to -1,
12309 otherwise there would be code generation differences
12310 in between -g and -g0. */
12311 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12312 deleted_debug_label = insn;
12313 insn = PREV_INSN (insn);
12318 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12319 fputs ("\tnop\n", file);
12320 else if (deleted_debug_label)
12321 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12322 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12323 CODE_LABEL_NUMBER (insn) = -1;
12329 /* Return a scratch register to use in the split stack prologue. The
12330 split stack prologue is used for -fsplit-stack. It is the first
12331 instructions in the function, even before the regular prologue.
12332 The scratch register can be any caller-saved register which is not
12333 used for parameters or for the static chain. */
12335 static unsigned int
12336 split_stack_prologue_scratch_regno (void)
12342 bool is_fastcall, is_thiscall;
12345 is_fastcall = (lookup_attribute ("fastcall",
12346 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12348 is_thiscall = (lookup_attribute ("thiscall",
12349 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12351 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12355 if (DECL_STATIC_CHAIN (cfun->decl))
12357 sorry ("-fsplit-stack does not support fastcall with "
12358 "nested function");
12359 return INVALID_REGNUM;
12363 else if (is_thiscall)
12365 if (!DECL_STATIC_CHAIN (cfun->decl))
12369 else if (regparm < 3)
12371 if (!DECL_STATIC_CHAIN (cfun->decl))
12377 sorry ("-fsplit-stack does not support 2 register "
12378 "parameters for a nested function");
12379 return INVALID_REGNUM;
12386 /* FIXME: We could make this work by pushing a register
12387 around the addition and comparison. */
12388 sorry ("-fsplit-stack does not support 3 register parameters");
12389 return INVALID_REGNUM;
12394 /* A SYMBOL_REF for the function which allocates new stackspace for
12397 static GTY(()) rtx split_stack_fn;
12399 /* A SYMBOL_REF for the more stack function when using the large
12402 static GTY(()) rtx split_stack_fn_large;
12404 /* Handle -fsplit-stack. These are the first instructions in the
12405 function, even before the regular prologue. */
12408 ix86_expand_split_stack_prologue (void)
12410 struct ix86_frame frame;
12411 HOST_WIDE_INT allocate;
12412 unsigned HOST_WIDE_INT args_size;
12413 rtx_code_label *label;
12414 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12415 rtx scratch_reg = NULL_RTX;
12416 rtx_code_label *varargs_label = NULL;
12419 gcc_assert (flag_split_stack && reload_completed);
12421 ix86_finalize_stack_realign_flags ();
12422 ix86_compute_frame_layout (&frame);
12423 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12425 /* This is the label we will branch to if we have enough stack
12426 space. We expect the basic block reordering pass to reverse this
12427 branch if optimizing, so that we branch in the unlikely case. */
12428 label = gen_label_rtx ();
12430 /* We need to compare the stack pointer minus the frame size with
12431 the stack boundary in the TCB. The stack boundary always gives
12432 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12433 can compare directly. Otherwise we need to do an addition. */
12435 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12436 UNSPEC_STACK_CHECK);
12437 limit = gen_rtx_CONST (Pmode, limit);
12438 limit = gen_rtx_MEM (Pmode, limit);
12439 if (allocate < SPLIT_STACK_AVAILABLE)
12440 current = stack_pointer_rtx;
12443 unsigned int scratch_regno;
12446 /* We need a scratch register to hold the stack pointer minus
12447 the required frame size. Since this is the very start of the
12448 function, the scratch register can be any caller-saved
12449 register which is not used for parameters. */
12450 offset = GEN_INT (- allocate);
12451 scratch_regno = split_stack_prologue_scratch_regno ();
12452 if (scratch_regno == INVALID_REGNUM)
12454 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12455 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12457 /* We don't use ix86_gen_add3 in this case because it will
12458 want to split to lea, but when not optimizing the insn
12459 will not be split after this point. */
12460 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12461 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12466 emit_move_insn (scratch_reg, offset);
12467 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12468 stack_pointer_rtx));
12470 current = scratch_reg;
12473 ix86_expand_branch (GEU, current, limit, label);
12474 jump_insn = get_last_insn ();
12475 JUMP_LABEL (jump_insn) = label;
12477 /* Mark the jump as very likely to be taken. */
12478 add_int_reg_note (jump_insn, REG_BR_PROB,
12479 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12481 if (split_stack_fn == NULL_RTX)
12483 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12484 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12486 fn = split_stack_fn;
12488 /* Get more stack space. We pass in the desired stack space and the
12489 size of the arguments to copy to the new stack. In 32-bit mode
12490 we push the parameters; __morestack will return on a new stack
12491 anyhow. In 64-bit mode we pass the parameters in r10 and
12493 allocate_rtx = GEN_INT (allocate);
12494 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12495 call_fusage = NULL_RTX;
12500 reg10 = gen_rtx_REG (Pmode, R10_REG);
12501 reg11 = gen_rtx_REG (Pmode, R11_REG);
12503 /* If this function uses a static chain, it will be in %r10.
12504 Preserve it across the call to __morestack. */
12505 if (DECL_STATIC_CHAIN (cfun->decl))
12509 rax = gen_rtx_REG (word_mode, AX_REG);
12510 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12511 use_reg (&call_fusage, rax);
12514 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12517 HOST_WIDE_INT argval;
12519 gcc_assert (Pmode == DImode);
12520 /* When using the large model we need to load the address
12521 into a register, and we've run out of registers. So we
12522 switch to a different calling convention, and we call a
12523 different function: __morestack_large. We pass the
12524 argument size in the upper 32 bits of r10 and pass the
12525 frame size in the lower 32 bits. */
12526 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12527 gcc_assert ((args_size & 0xffffffff) == args_size);
12529 if (split_stack_fn_large == NULL_RTX)
12531 split_stack_fn_large =
12532 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12533 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12535 if (ix86_cmodel == CM_LARGE_PIC)
12537 rtx_code_label *label;
12540 label = gen_label_rtx ();
12541 emit_label (label);
12542 LABEL_PRESERVE_P (label) = 1;
12543 emit_insn (gen_set_rip_rex64 (reg10, label));
12544 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12545 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12546 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12548 x = gen_rtx_CONST (Pmode, x);
12549 emit_move_insn (reg11, x);
12550 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12551 x = gen_const_mem (Pmode, x);
12552 emit_move_insn (reg11, x);
12555 emit_move_insn (reg11, split_stack_fn_large);
12559 argval = ((args_size << 16) << 16) + allocate;
12560 emit_move_insn (reg10, GEN_INT (argval));
12564 emit_move_insn (reg10, allocate_rtx);
12565 emit_move_insn (reg11, GEN_INT (args_size));
12566 use_reg (&call_fusage, reg11);
12569 use_reg (&call_fusage, reg10);
12573 emit_insn (gen_push (GEN_INT (args_size)));
12574 emit_insn (gen_push (allocate_rtx));
12576 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12577 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12579 add_function_usage_to (call_insn, call_fusage);
12581 /* In order to make call/return prediction work right, we now need
12582 to execute a return instruction. See
12583 libgcc/config/i386/morestack.S for the details on how this works.
12585 For flow purposes gcc must not see this as a return
12586 instruction--we need control flow to continue at the subsequent
12587 label. Therefore, we use an unspec. */
12588 gcc_assert (crtl->args.pops_args < 65536);
12589 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12591 /* If we are in 64-bit mode and this function uses a static chain,
12592 we saved %r10 in %rax before calling _morestack. */
12593 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12594 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12595 gen_rtx_REG (word_mode, AX_REG));
12597 /* If this function calls va_start, we need to store a pointer to
12598 the arguments on the old stack, because they may not have been
12599 all copied to the new stack. At this point the old stack can be
12600 found at the frame pointer value used by __morestack, because
12601 __morestack has set that up before calling back to us. Here we
12602 store that pointer in a scratch register, and in
12603 ix86_expand_prologue we store the scratch register in a stack
12605 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12607 unsigned int scratch_regno;
12611 scratch_regno = split_stack_prologue_scratch_regno ();
12612 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12613 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12617 return address within this function
12618 return address of caller of this function
12620 So we add three words to get to the stack arguments.
12624 return address within this function
12625 first argument to __morestack
12626 second argument to __morestack
12627 return address of caller of this function
12629 So we add five words to get to the stack arguments.
12631 words = TARGET_64BIT ? 3 : 5;
12632 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12633 gen_rtx_PLUS (Pmode, frame_reg,
12634 GEN_INT (words * UNITS_PER_WORD))));
12636 varargs_label = gen_label_rtx ();
12637 emit_jump_insn (gen_jump (varargs_label));
12638 JUMP_LABEL (get_last_insn ()) = varargs_label;
12643 emit_label (label);
12644 LABEL_NUSES (label) = 1;
12646 /* If this function calls va_start, we now have to set the scratch
12647 register for the case where we do not call __morestack. In this
12648 case we need to set it based on the stack pointer. */
12649 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12651 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12652 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12653 GEN_INT (UNITS_PER_WORD))));
12655 emit_label (varargs_label);
12656 LABEL_NUSES (varargs_label) = 1;
12660 /* We may have to tell the dataflow pass that the split stack prologue
12661 is initializing a scratch register. */
12664 ix86_live_on_entry (bitmap regs)
12666 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12668 gcc_assert (flag_split_stack);
12669 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12673 /* Extract the parts of an RTL expression that is a valid memory address
12674 for an instruction. Return 0 if the structure of the address is
12675 grossly off. Return -1 if the address contains ASHIFT, so it is not
12676 strictly valid, but still used for computing length of lea instruction. */
12679 ix86_decompose_address (rtx addr, struct ix86_address *out)
12681 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12682 rtx base_reg, index_reg;
12683 HOST_WIDE_INT scale = 1;
12684 rtx scale_rtx = NULL_RTX;
12687 enum ix86_address_seg seg = SEG_DEFAULT;
12689 /* Allow zero-extended SImode addresses,
12690 they will be emitted with addr32 prefix. */
12691 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12693 if (GET_CODE (addr) == ZERO_EXTEND
12694 && GET_MODE (XEXP (addr, 0)) == SImode)
12696 addr = XEXP (addr, 0);
12697 if (CONST_INT_P (addr))
12700 else if (GET_CODE (addr) == AND
12701 && const_32bit_mask (XEXP (addr, 1), DImode))
12703 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12704 if (addr == NULL_RTX)
12707 if (CONST_INT_P (addr))
12712 /* Allow SImode subregs of DImode addresses,
12713 they will be emitted with addr32 prefix. */
12714 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12716 if (GET_CODE (addr) == SUBREG
12717 && GET_MODE (SUBREG_REG (addr)) == DImode)
12719 addr = SUBREG_REG (addr);
12720 if (CONST_INT_P (addr))
12727 else if (GET_CODE (addr) == SUBREG)
12729 if (REG_P (SUBREG_REG (addr)))
12734 else if (GET_CODE (addr) == PLUS)
12736 rtx addends[4], op;
12744 addends[n++] = XEXP (op, 1);
12747 while (GET_CODE (op) == PLUS);
12752 for (i = n; i >= 0; --i)
12755 switch (GET_CODE (op))
12760 index = XEXP (op, 0);
12761 scale_rtx = XEXP (op, 1);
12767 index = XEXP (op, 0);
12768 tmp = XEXP (op, 1);
12769 if (!CONST_INT_P (tmp))
12771 scale = INTVAL (tmp);
12772 if ((unsigned HOST_WIDE_INT) scale > 3)
12774 scale = 1 << scale;
12779 if (GET_CODE (op) != UNSPEC)
12784 if (XINT (op, 1) == UNSPEC_TP
12785 && TARGET_TLS_DIRECT_SEG_REFS
12786 && seg == SEG_DEFAULT)
12787 seg = DEFAULT_TLS_SEG_REG;
12793 if (!REG_P (SUBREG_REG (op)))
12820 else if (GET_CODE (addr) == MULT)
12822 index = XEXP (addr, 0); /* index*scale */
12823 scale_rtx = XEXP (addr, 1);
12825 else if (GET_CODE (addr) == ASHIFT)
12827 /* We're called for lea too, which implements ashift on occasion. */
12828 index = XEXP (addr, 0);
12829 tmp = XEXP (addr, 1);
12830 if (!CONST_INT_P (tmp))
12832 scale = INTVAL (tmp);
12833 if ((unsigned HOST_WIDE_INT) scale > 3)
12835 scale = 1 << scale;
12839 disp = addr; /* displacement */
12845 else if (GET_CODE (index) == SUBREG
12846 && REG_P (SUBREG_REG (index)))
12852 /* Extract the integral value of scale. */
12855 if (!CONST_INT_P (scale_rtx))
12857 scale = INTVAL (scale_rtx);
12860 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12861 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12863 /* Avoid useless 0 displacement. */
12864 if (disp == const0_rtx && (base || index))
12867 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12868 if (base_reg && index_reg && scale == 1
12869 && (index_reg == arg_pointer_rtx
12870 || index_reg == frame_pointer_rtx
12871 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12873 std::swap (base, index);
12874 std::swap (base_reg, index_reg);
12877 /* Special case: %ebp cannot be encoded as a base without a displacement.
12881 && (base_reg == hard_frame_pointer_rtx
12882 || base_reg == frame_pointer_rtx
12883 || base_reg == arg_pointer_rtx
12884 || (REG_P (base_reg)
12885 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12886 || REGNO (base_reg) == R13_REG))))
12889 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12890 Avoid this by transforming to [%esi+0].
12891 Reload calls address legitimization without cfun defined, so we need
12892 to test cfun for being non-NULL. */
12893 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12894 && base_reg && !index_reg && !disp
12895 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12898 /* Special case: encode reg+reg instead of reg*2. */
12899 if (!base && index && scale == 2)
12900 base = index, base_reg = index_reg, scale = 1;
12902 /* Special case: scaling cannot be encoded without base or displacement. */
12903 if (!base && !disp && index && scale != 1)
12907 out->index = index;
12909 out->scale = scale;
12915 /* Return cost of the memory address x.
12916 For i386, it is better to use a complex address than let gcc copy
12917 the address into a reg and make a new pseudo. But not if the address
12918 requires to two regs - that would mean more pseudos with longer
12921 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12923 struct ix86_address parts;
12925 int ok = ix86_decompose_address (x, &parts);
12929 if (parts.base && GET_CODE (parts.base) == SUBREG)
12930 parts.base = SUBREG_REG (parts.base);
12931 if (parts.index && GET_CODE (parts.index) == SUBREG)
12932 parts.index = SUBREG_REG (parts.index);
12934 /* Attempt to minimize number of registers in the address by increasing
12935 address cost for each used register. We don't increase address cost
12936 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12937 is not invariant itself it most likely means that base or index is not
12938 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12939 which is not profitable for x86. */
12941 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12942 && (current_pass->type == GIMPLE_PASS
12943 || !pic_offset_table_rtx
12944 || !REG_P (parts.base)
12945 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12949 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12950 && (current_pass->type == GIMPLE_PASS
12951 || !pic_offset_table_rtx
12952 || !REG_P (parts.index)
12953 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12956 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12957 since it's predecode logic can't detect the length of instructions
12958 and it degenerates to vector decoded. Increase cost of such
12959 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12960 to split such addresses or even refuse such addresses at all.
12962 Following addressing modes are affected:
12967 The first and last case may be avoidable by explicitly coding the zero in
12968 memory address, but I don't have AMD-K6 machine handy to check this
12972 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12973 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12974 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12980 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12981 this is used for to form addresses to local data when -fPIC is in
12985 darwin_local_data_pic (rtx disp)
12987 return (GET_CODE (disp) == UNSPEC
12988 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12991 /* Determine if a given RTX is a valid constant. We already know this
12992 satisfies CONSTANT_P. */
12995 ix86_legitimate_constant_p (machine_mode, rtx x)
12997 /* Pointer bounds constants are not valid. */
12998 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13001 switch (GET_CODE (x))
13006 if (GET_CODE (x) == PLUS)
13008 if (!CONST_INT_P (XEXP (x, 1)))
13013 if (TARGET_MACHO && darwin_local_data_pic (x))
13016 /* Only some unspecs are valid as "constants". */
13017 if (GET_CODE (x) == UNSPEC)
13018 switch (XINT (x, 1))
13021 case UNSPEC_GOTOFF:
13022 case UNSPEC_PLTOFF:
13023 return TARGET_64BIT;
13025 case UNSPEC_NTPOFF:
13026 x = XVECEXP (x, 0, 0);
13027 return (GET_CODE (x) == SYMBOL_REF
13028 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13029 case UNSPEC_DTPOFF:
13030 x = XVECEXP (x, 0, 0);
13031 return (GET_CODE (x) == SYMBOL_REF
13032 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13037 /* We must have drilled down to a symbol. */
13038 if (GET_CODE (x) == LABEL_REF)
13040 if (GET_CODE (x) != SYMBOL_REF)
13045 /* TLS symbols are never valid. */
13046 if (SYMBOL_REF_TLS_MODEL (x))
13049 /* DLLIMPORT symbols are never valid. */
13050 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13051 && SYMBOL_REF_DLLIMPORT_P (x))
13055 /* mdynamic-no-pic */
13056 if (MACHO_DYNAMIC_NO_PIC_P)
13057 return machopic_symbol_defined_p (x);
13062 if (GET_MODE (x) == TImode
13063 && x != CONST0_RTX (TImode)
13069 if (!standard_sse_constant_p (x))
13076 /* Otherwise we handle everything else in the move patterns. */
13080 /* Determine if it's legal to put X into the constant pool. This
13081 is not possible for the address of thread-local symbols, which
13082 is checked above. */
13085 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13087 /* We can always put integral constants and vectors in memory. */
13088 switch (GET_CODE (x))
13098 return !ix86_legitimate_constant_p (mode, x);
13101 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13105 is_imported_p (rtx x)
13107 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13108 || GET_CODE (x) != SYMBOL_REF)
13111 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13115 /* Nonzero if the constant value X is a legitimate general operand
13116 when generating PIC code. It is given that flag_pic is on and
13117 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13120 legitimate_pic_operand_p (rtx x)
13124 switch (GET_CODE (x))
13127 inner = XEXP (x, 0);
13128 if (GET_CODE (inner) == PLUS
13129 && CONST_INT_P (XEXP (inner, 1)))
13130 inner = XEXP (inner, 0);
13132 /* Only some unspecs are valid as "constants". */
13133 if (GET_CODE (inner) == UNSPEC)
13134 switch (XINT (inner, 1))
13137 case UNSPEC_GOTOFF:
13138 case UNSPEC_PLTOFF:
13139 return TARGET_64BIT;
13141 x = XVECEXP (inner, 0, 0);
13142 return (GET_CODE (x) == SYMBOL_REF
13143 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13144 case UNSPEC_MACHOPIC_OFFSET:
13145 return legitimate_pic_address_disp_p (x);
13153 return legitimate_pic_address_disp_p (x);
13160 /* Determine if a given CONST RTX is a valid memory displacement
13164 legitimate_pic_address_disp_p (rtx disp)
13168 /* In 64bit mode we can allow direct addresses of symbols and labels
13169 when they are not dynamic symbols. */
13172 rtx op0 = disp, op1;
13174 switch (GET_CODE (disp))
13180 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13182 op0 = XEXP (XEXP (disp, 0), 0);
13183 op1 = XEXP (XEXP (disp, 0), 1);
13184 if (!CONST_INT_P (op1)
13185 || INTVAL (op1) >= 16*1024*1024
13186 || INTVAL (op1) < -16*1024*1024)
13188 if (GET_CODE (op0) == LABEL_REF)
13190 if (GET_CODE (op0) == CONST
13191 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13192 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13194 if (GET_CODE (op0) == UNSPEC
13195 && XINT (op0, 1) == UNSPEC_PCREL)
13197 if (GET_CODE (op0) != SYMBOL_REF)
13202 /* TLS references should always be enclosed in UNSPEC.
13203 The dllimported symbol needs always to be resolved. */
13204 if (SYMBOL_REF_TLS_MODEL (op0)
13205 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13210 if (is_imported_p (op0))
13213 if (SYMBOL_REF_FAR_ADDR_P (op0)
13214 || !SYMBOL_REF_LOCAL_P (op0))
13217 /* Function-symbols need to be resolved only for
13219 For the small-model we don't need to resolve anything
13221 if ((ix86_cmodel != CM_LARGE_PIC
13222 && SYMBOL_REF_FUNCTION_P (op0))
13223 || ix86_cmodel == CM_SMALL_PIC)
13225 /* Non-external symbols don't need to be resolved for
13226 large, and medium-model. */
13227 if ((ix86_cmodel == CM_LARGE_PIC
13228 || ix86_cmodel == CM_MEDIUM_PIC)
13229 && !SYMBOL_REF_EXTERNAL_P (op0))
13232 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13233 && (SYMBOL_REF_LOCAL_P (op0)
13234 || (HAVE_LD_PIE_COPYRELOC
13236 && !SYMBOL_REF_WEAK (op0)
13237 && !SYMBOL_REF_FUNCTION_P (op0)))
13238 && ix86_cmodel != CM_LARGE_PIC)
13246 if (GET_CODE (disp) != CONST)
13248 disp = XEXP (disp, 0);
13252 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13253 of GOT tables. We should not need these anyway. */
13254 if (GET_CODE (disp) != UNSPEC
13255 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13256 && XINT (disp, 1) != UNSPEC_GOTOFF
13257 && XINT (disp, 1) != UNSPEC_PCREL
13258 && XINT (disp, 1) != UNSPEC_PLTOFF))
13261 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13262 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13268 if (GET_CODE (disp) == PLUS)
13270 if (!CONST_INT_P (XEXP (disp, 1)))
13272 disp = XEXP (disp, 0);
13276 if (TARGET_MACHO && darwin_local_data_pic (disp))
13279 if (GET_CODE (disp) != UNSPEC)
13282 switch (XINT (disp, 1))
13287 /* We need to check for both symbols and labels because VxWorks loads
13288 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13290 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13291 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13292 case UNSPEC_GOTOFF:
13293 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13294 While ABI specify also 32bit relocation but we don't produce it in
13295 small PIC model at all. */
13296 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13297 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13299 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13301 case UNSPEC_GOTTPOFF:
13302 case UNSPEC_GOTNTPOFF:
13303 case UNSPEC_INDNTPOFF:
13306 disp = XVECEXP (disp, 0, 0);
13307 return (GET_CODE (disp) == SYMBOL_REF
13308 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13309 case UNSPEC_NTPOFF:
13310 disp = XVECEXP (disp, 0, 0);
13311 return (GET_CODE (disp) == SYMBOL_REF
13312 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13313 case UNSPEC_DTPOFF:
13314 disp = XVECEXP (disp, 0, 0);
13315 return (GET_CODE (disp) == SYMBOL_REF
13316 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13322 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13323 replace the input X, or the original X if no replacement is called for.
13324 The output parameter *WIN is 1 if the calling macro should goto WIN,
13325 0 if it should not. */
13328 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13331 /* Reload can generate:
13333 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13337 This RTX is rejected from ix86_legitimate_address_p due to
13338 non-strictness of base register 97. Following this rejection,
13339 reload pushes all three components into separate registers,
13340 creating invalid memory address RTX.
13342 Following code reloads only the invalid part of the
13343 memory address RTX. */
13345 if (GET_CODE (x) == PLUS
13346 && REG_P (XEXP (x, 1))
13347 && GET_CODE (XEXP (x, 0)) == PLUS
13348 && REG_P (XEXP (XEXP (x, 0), 1)))
13351 bool something_reloaded = false;
13353 base = XEXP (XEXP (x, 0), 1);
13354 if (!REG_OK_FOR_BASE_STRICT_P (base))
13356 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13357 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13358 opnum, (enum reload_type) type);
13359 something_reloaded = true;
13362 index = XEXP (x, 1);
13363 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13365 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13366 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13367 opnum, (enum reload_type) type);
13368 something_reloaded = true;
13371 gcc_assert (something_reloaded);
13378 /* Determine if op is suitable RTX for an address register.
13379 Return naked register if a register or a register subreg is
13380 found, otherwise return NULL_RTX. */
13383 ix86_validate_address_register (rtx op)
13385 machine_mode mode = GET_MODE (op);
13387 /* Only SImode or DImode registers can form the address. */
13388 if (mode != SImode && mode != DImode)
13393 else if (GET_CODE (op) == SUBREG)
13395 rtx reg = SUBREG_REG (op);
13400 mode = GET_MODE (reg);
13402 /* Don't allow SUBREGs that span more than a word. It can
13403 lead to spill failures when the register is one word out
13404 of a two word structure. */
13405 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13408 /* Allow only SUBREGs of non-eliminable hard registers. */
13409 if (register_no_elim_operand (reg, mode))
13413 /* Op is not a register. */
13417 /* Recognizes RTL expressions that are valid memory addresses for an
13418 instruction. The MODE argument is the machine mode for the MEM
13419 expression that wants to use this address.
13421 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13422 convert common non-canonical forms to canonical form so that they will
13426 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13428 struct ix86_address parts;
13429 rtx base, index, disp;
13430 HOST_WIDE_INT scale;
13431 enum ix86_address_seg seg;
13433 if (ix86_decompose_address (addr, &parts) <= 0)
13434 /* Decomposition failed. */
13438 index = parts.index;
13440 scale = parts.scale;
13443 /* Validate base register. */
13446 rtx reg = ix86_validate_address_register (base);
13448 if (reg == NULL_RTX)
13451 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13452 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13453 /* Base is not valid. */
13457 /* Validate index register. */
13460 rtx reg = ix86_validate_address_register (index);
13462 if (reg == NULL_RTX)
13465 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13466 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13467 /* Index is not valid. */
13471 /* Index and base should have the same mode. */
13473 && GET_MODE (base) != GET_MODE (index))
13476 /* Address override works only on the (%reg) part of %fs:(%reg). */
13477 if (seg != SEG_DEFAULT
13478 && ((base && GET_MODE (base) != word_mode)
13479 || (index && GET_MODE (index) != word_mode)))
13482 /* Validate scale factor. */
13486 /* Scale without index. */
13489 if (scale != 2 && scale != 4 && scale != 8)
13490 /* Scale is not a valid multiplier. */
13494 /* Validate displacement. */
13497 if (GET_CODE (disp) == CONST
13498 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13499 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13500 switch (XINT (XEXP (disp, 0), 1))
13502 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13503 used. While ABI specify also 32bit relocations, we don't produce
13504 them at all and use IP relative instead. */
13506 case UNSPEC_GOTOFF:
13507 gcc_assert (flag_pic);
13509 goto is_legitimate_pic;
13511 /* 64bit address unspec. */
13514 case UNSPEC_GOTPCREL:
13516 gcc_assert (flag_pic);
13517 goto is_legitimate_pic;
13519 case UNSPEC_GOTTPOFF:
13520 case UNSPEC_GOTNTPOFF:
13521 case UNSPEC_INDNTPOFF:
13522 case UNSPEC_NTPOFF:
13523 case UNSPEC_DTPOFF:
13526 case UNSPEC_STACK_CHECK:
13527 gcc_assert (flag_split_stack);
13531 /* Invalid address unspec. */
13535 else if (SYMBOLIC_CONST (disp)
13539 && MACHOPIC_INDIRECT
13540 && !machopic_operand_p (disp)
13546 if (TARGET_64BIT && (index || base))
13548 /* foo@dtpoff(%rX) is ok. */
13549 if (GET_CODE (disp) != CONST
13550 || GET_CODE (XEXP (disp, 0)) != PLUS
13551 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13552 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13553 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13554 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13555 /* Non-constant pic memory reference. */
13558 else if ((!TARGET_MACHO || flag_pic)
13559 && ! legitimate_pic_address_disp_p (disp))
13560 /* Displacement is an invalid pic construct. */
13563 else if (MACHO_DYNAMIC_NO_PIC_P
13564 && !ix86_legitimate_constant_p (Pmode, disp))
13565 /* displacment must be referenced via non_lazy_pointer */
13569 /* This code used to verify that a symbolic pic displacement
13570 includes the pic_offset_table_rtx register.
13572 While this is good idea, unfortunately these constructs may
13573 be created by "adds using lea" optimization for incorrect
13582 This code is nonsensical, but results in addressing
13583 GOT table with pic_offset_table_rtx base. We can't
13584 just refuse it easily, since it gets matched by
13585 "addsi3" pattern, that later gets split to lea in the
13586 case output register differs from input. While this
13587 can be handled by separate addsi pattern for this case
13588 that never results in lea, this seems to be easier and
13589 correct fix for crash to disable this test. */
13591 else if (GET_CODE (disp) != LABEL_REF
13592 && !CONST_INT_P (disp)
13593 && (GET_CODE (disp) != CONST
13594 || !ix86_legitimate_constant_p (Pmode, disp))
13595 && (GET_CODE (disp) != SYMBOL_REF
13596 || !ix86_legitimate_constant_p (Pmode, disp)))
13597 /* Displacement is not constant. */
13599 else if (TARGET_64BIT
13600 && !x86_64_immediate_operand (disp, VOIDmode))
13601 /* Displacement is out of range. */
13603 /* In x32 mode, constant addresses are sign extended to 64bit, so
13604 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13605 else if (TARGET_X32 && !(index || base)
13606 && CONST_INT_P (disp)
13607 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13611 /* Everything looks valid. */
13615 /* Determine if a given RTX is a valid constant address. */
13618 constant_address_p (rtx x)
13620 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13623 /* Return a unique alias set for the GOT. */
13625 static alias_set_type
13626 ix86_GOT_alias_set (void)
13628 static alias_set_type set = -1;
13630 set = new_alias_set ();
13634 /* Set regs_ever_live for PIC base address register
13635 to true if required. */
13637 set_pic_reg_ever_live ()
13639 if (reload_in_progress)
13640 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13643 /* Return a legitimate reference for ORIG (an address) using the
13644 register REG. If REG is 0, a new pseudo is generated.
13646 There are two types of references that must be handled:
13648 1. Global data references must load the address from the GOT, via
13649 the PIC reg. An insn is emitted to do this load, and the reg is
13652 2. Static data references, constant pool addresses, and code labels
13653 compute the address as an offset from the GOT, whose base is in
13654 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13655 differentiate them from global data objects. The returned
13656 address is the PIC reg + an unspec constant.
13658 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13659 reg also appears in the address. */
13662 legitimize_pic_address (rtx orig, rtx reg)
13665 rtx new_rtx = orig;
13668 if (TARGET_MACHO && !TARGET_64BIT)
13671 reg = gen_reg_rtx (Pmode);
13672 /* Use the generic Mach-O PIC machinery. */
13673 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13677 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13679 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13684 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13686 else if (TARGET_64BIT && !TARGET_PECOFF
13687 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13690 /* This symbol may be referenced via a displacement from the PIC
13691 base address (@GOTOFF). */
13693 set_pic_reg_ever_live ();
13694 if (GET_CODE (addr) == CONST)
13695 addr = XEXP (addr, 0);
13696 if (GET_CODE (addr) == PLUS)
13698 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13700 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13703 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13704 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13706 tmpreg = gen_reg_rtx (Pmode);
13709 emit_move_insn (tmpreg, new_rtx);
13713 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13714 tmpreg, 1, OPTAB_DIRECT);
13718 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13720 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13722 /* This symbol may be referenced via a displacement from the PIC
13723 base address (@GOTOFF). */
13725 set_pic_reg_ever_live ();
13726 if (GET_CODE (addr) == CONST)
13727 addr = XEXP (addr, 0);
13728 if (GET_CODE (addr) == PLUS)
13730 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13732 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13735 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13736 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13737 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13741 emit_move_insn (reg, new_rtx);
13745 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13746 /* We can't use @GOTOFF for text labels on VxWorks;
13747 see gotoff_operand. */
13748 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13750 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13754 /* For x64 PE-COFF there is no GOT table. So we use address
13756 if (TARGET_64BIT && TARGET_PECOFF)
13758 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13759 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13762 reg = gen_reg_rtx (Pmode);
13763 emit_move_insn (reg, new_rtx);
13766 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13768 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13769 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13770 new_rtx = gen_const_mem (Pmode, new_rtx);
13771 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13774 reg = gen_reg_rtx (Pmode);
13775 /* Use directly gen_movsi, otherwise the address is loaded
13776 into register for CSE. We don't want to CSE this addresses,
13777 instead we CSE addresses from the GOT table, so skip this. */
13778 emit_insn (gen_movsi (reg, new_rtx));
13783 /* This symbol must be referenced via a load from the
13784 Global Offset Table (@GOT). */
13786 set_pic_reg_ever_live ();
13787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13788 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13790 new_rtx = force_reg (Pmode, new_rtx);
13791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13792 new_rtx = gen_const_mem (Pmode, new_rtx);
13793 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13796 reg = gen_reg_rtx (Pmode);
13797 emit_move_insn (reg, new_rtx);
13803 if (CONST_INT_P (addr)
13804 && !x86_64_immediate_operand (addr, VOIDmode))
13808 emit_move_insn (reg, addr);
13812 new_rtx = force_reg (Pmode, addr);
13814 else if (GET_CODE (addr) == CONST)
13816 addr = XEXP (addr, 0);
13818 /* We must match stuff we generate before. Assume the only
13819 unspecs that can get here are ours. Not that we could do
13820 anything with them anyway.... */
13821 if (GET_CODE (addr) == UNSPEC
13822 || (GET_CODE (addr) == PLUS
13823 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13825 gcc_assert (GET_CODE (addr) == PLUS);
13827 if (GET_CODE (addr) == PLUS)
13829 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13831 /* Check first to see if this is a constant offset from a @GOTOFF
13832 symbol reference. */
13833 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13834 && CONST_INT_P (op1))
13838 set_pic_reg_ever_live ();
13839 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13841 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13842 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13843 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13847 emit_move_insn (reg, new_rtx);
13853 if (INTVAL (op1) < -16*1024*1024
13854 || INTVAL (op1) >= 16*1024*1024)
13856 if (!x86_64_immediate_operand (op1, Pmode))
13857 op1 = force_reg (Pmode, op1);
13858 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13864 rtx base = legitimize_pic_address (op0, reg);
13865 machine_mode mode = GET_MODE (base);
13867 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13869 if (CONST_INT_P (new_rtx))
13871 if (INTVAL (new_rtx) < -16*1024*1024
13872 || INTVAL (new_rtx) >= 16*1024*1024)
13874 if (!x86_64_immediate_operand (new_rtx, mode))
13875 new_rtx = force_reg (mode, new_rtx);
13877 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13880 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13884 /* For %rip addressing, we have to use just disp32, not
13887 && (GET_CODE (base) == SYMBOL_REF
13888 || GET_CODE (base) == LABEL_REF))
13889 base = force_reg (mode, base);
13890 if (GET_CODE (new_rtx) == PLUS
13891 && CONSTANT_P (XEXP (new_rtx, 1)))
13893 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13894 new_rtx = XEXP (new_rtx, 1);
13896 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13904 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13907 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13909 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13911 if (GET_MODE (tp) != tp_mode)
13913 gcc_assert (GET_MODE (tp) == SImode);
13914 gcc_assert (tp_mode == DImode);
13916 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13920 tp = copy_to_mode_reg (tp_mode, tp);
13925 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13927 static GTY(()) rtx ix86_tls_symbol;
13930 ix86_tls_get_addr (void)
13932 if (!ix86_tls_symbol)
13935 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13936 ? "___tls_get_addr" : "__tls_get_addr");
13938 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13941 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13943 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13945 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13946 gen_rtx_CONST (Pmode, unspec));
13949 return ix86_tls_symbol;
13952 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13954 static GTY(()) rtx ix86_tls_module_base_symbol;
13957 ix86_tls_module_base (void)
13959 if (!ix86_tls_module_base_symbol)
13961 ix86_tls_module_base_symbol
13962 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13964 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13965 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13968 return ix86_tls_module_base_symbol;
13971 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13972 false if we expect this to be used for a memory address and true if
13973 we expect to load the address into a register. */
13976 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13978 rtx dest, base, off;
13979 rtx pic = NULL_RTX, tp = NULL_RTX;
13980 machine_mode tp_mode = Pmode;
13983 /* Fall back to global dynamic model if tool chain cannot support local
13985 if (TARGET_SUN_TLS && !TARGET_64BIT
13986 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13987 && model == TLS_MODEL_LOCAL_DYNAMIC)
13988 model = TLS_MODEL_GLOBAL_DYNAMIC;
13992 case TLS_MODEL_GLOBAL_DYNAMIC:
13993 dest = gen_reg_rtx (Pmode);
13997 if (flag_pic && !TARGET_PECOFF)
13998 pic = pic_offset_table_rtx;
14001 pic = gen_reg_rtx (Pmode);
14002 emit_insn (gen_set_got (pic));
14006 if (TARGET_GNU2_TLS)
14009 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14011 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14013 tp = get_thread_pointer (Pmode, true);
14014 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14016 if (GET_MODE (x) != Pmode)
14017 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14019 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14023 rtx caddr = ix86_tls_get_addr ();
14027 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14032 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14033 insns = get_insns ();
14036 if (GET_MODE (x) != Pmode)
14037 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14039 RTL_CONST_CALL_P (insns) = 1;
14040 emit_libcall_block (insns, dest, rax, x);
14043 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14047 case TLS_MODEL_LOCAL_DYNAMIC:
14048 base = gen_reg_rtx (Pmode);
14053 pic = pic_offset_table_rtx;
14056 pic = gen_reg_rtx (Pmode);
14057 emit_insn (gen_set_got (pic));
14061 if (TARGET_GNU2_TLS)
14063 rtx tmp = ix86_tls_module_base ();
14066 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14068 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14070 tp = get_thread_pointer (Pmode, true);
14071 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14072 gen_rtx_MINUS (Pmode, tmp, tp));
14076 rtx caddr = ix86_tls_get_addr ();
14080 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14086 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14087 insns = get_insns ();
14090 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14091 share the LD_BASE result with other LD model accesses. */
14092 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14093 UNSPEC_TLS_LD_BASE);
14095 RTL_CONST_CALL_P (insns) = 1;
14096 emit_libcall_block (insns, base, rax, eqv);
14099 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14102 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14103 off = gen_rtx_CONST (Pmode, off);
14105 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14107 if (TARGET_GNU2_TLS)
14109 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14111 if (GET_MODE (x) != Pmode)
14112 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14114 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14118 case TLS_MODEL_INITIAL_EXEC:
14121 if (TARGET_SUN_TLS && !TARGET_X32)
14123 /* The Sun linker took the AMD64 TLS spec literally
14124 and can only handle %rax as destination of the
14125 initial executable code sequence. */
14127 dest = gen_reg_rtx (DImode);
14128 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14132 /* Generate DImode references to avoid %fs:(%reg32)
14133 problems and linker IE->LE relaxation bug. */
14136 type = UNSPEC_GOTNTPOFF;
14140 set_pic_reg_ever_live ();
14141 pic = pic_offset_table_rtx;
14142 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14144 else if (!TARGET_ANY_GNU_TLS)
14146 pic = gen_reg_rtx (Pmode);
14147 emit_insn (gen_set_got (pic));
14148 type = UNSPEC_GOTTPOFF;
14153 type = UNSPEC_INDNTPOFF;
14156 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14157 off = gen_rtx_CONST (tp_mode, off);
14159 off = gen_rtx_PLUS (tp_mode, pic, off);
14160 off = gen_const_mem (tp_mode, off);
14161 set_mem_alias_set (off, ix86_GOT_alias_set ());
14163 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14165 base = get_thread_pointer (tp_mode,
14166 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14167 off = force_reg (tp_mode, off);
14168 return gen_rtx_PLUS (tp_mode, base, off);
14172 base = get_thread_pointer (Pmode, true);
14173 dest = gen_reg_rtx (Pmode);
14174 emit_insn (ix86_gen_sub3 (dest, base, off));
14178 case TLS_MODEL_LOCAL_EXEC:
14179 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14180 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14181 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14182 off = gen_rtx_CONST (Pmode, off);
14184 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14186 base = get_thread_pointer (Pmode,
14187 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14188 return gen_rtx_PLUS (Pmode, base, off);
14192 base = get_thread_pointer (Pmode, true);
14193 dest = gen_reg_rtx (Pmode);
14194 emit_insn (ix86_gen_sub3 (dest, base, off));
14199 gcc_unreachable ();
14205 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14206 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14207 unique refptr-DECL symbol corresponding to symbol DECL. */
14209 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14211 static inline hashval_t hash (tree_map *m) { return m->hash; }
14213 equal (tree_map *a, tree_map *b)
14215 return a->base.from == b->base.from;
14219 handle_cache_entry (tree_map *&m)
14221 extern void gt_ggc_mx (tree_map *&);
14222 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14224 else if (ggc_marked_p (m->base.from))
14227 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14231 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14234 get_dllimport_decl (tree decl, bool beimport)
14236 struct tree_map *h, in;
14238 const char *prefix;
14239 size_t namelen, prefixlen;
14244 if (!dllimport_map)
14245 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14247 in.hash = htab_hash_pointer (decl);
14248 in.base.from = decl;
14249 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14254 *loc = h = ggc_alloc<tree_map> ();
14256 h->base.from = decl;
14257 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14258 VAR_DECL, NULL, ptr_type_node);
14259 DECL_ARTIFICIAL (to) = 1;
14260 DECL_IGNORED_P (to) = 1;
14261 DECL_EXTERNAL (to) = 1;
14262 TREE_READONLY (to) = 1;
14264 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14265 name = targetm.strip_name_encoding (name);
14267 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14268 ? "*__imp_" : "*__imp__";
14270 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14271 namelen = strlen (name);
14272 prefixlen = strlen (prefix);
14273 imp_name = (char *) alloca (namelen + prefixlen + 1);
14274 memcpy (imp_name, prefix, prefixlen);
14275 memcpy (imp_name + prefixlen, name, namelen + 1);
14277 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14278 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14279 SET_SYMBOL_REF_DECL (rtl, to);
14280 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14283 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14284 #ifdef SUB_TARGET_RECORD_STUB
14285 SUB_TARGET_RECORD_STUB (name);
14289 rtl = gen_const_mem (Pmode, rtl);
14290 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14292 SET_DECL_RTL (to, rtl);
14293 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14298 /* Expand SYMBOL into its corresponding far-addresse symbol.
14299 WANT_REG is true if we require the result be a register. */
14302 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14307 gcc_assert (SYMBOL_REF_DECL (symbol));
14308 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14310 x = DECL_RTL (imp_decl);
14312 x = force_reg (Pmode, x);
14316 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14317 true if we require the result be a register. */
14320 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14325 gcc_assert (SYMBOL_REF_DECL (symbol));
14326 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14328 x = DECL_RTL (imp_decl);
14330 x = force_reg (Pmode, x);
14334 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14335 is true if we require the result be a register. */
14338 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14340 if (!TARGET_PECOFF)
14343 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14345 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14346 return legitimize_dllimport_symbol (addr, inreg);
14347 if (GET_CODE (addr) == CONST
14348 && GET_CODE (XEXP (addr, 0)) == PLUS
14349 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14350 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14352 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14353 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14357 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14359 if (GET_CODE (addr) == SYMBOL_REF
14360 && !is_imported_p (addr)
14361 && SYMBOL_REF_EXTERNAL_P (addr)
14362 && SYMBOL_REF_DECL (addr))
14363 return legitimize_pe_coff_extern_decl (addr, inreg);
14365 if (GET_CODE (addr) == CONST
14366 && GET_CODE (XEXP (addr, 0)) == PLUS
14367 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14368 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14369 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14370 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14372 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14373 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14378 /* Try machine-dependent ways of modifying an illegitimate address
14379 to be legitimate. If we find one, return the new, valid address.
14380 This macro is used in only one place: `memory_address' in explow.c.
14382 OLDX is the address as it was before break_out_memory_refs was called.
14383 In some cases it is useful to look at this to decide what needs to be done.
14385 It is always safe for this macro to do nothing. It exists to recognize
14386 opportunities to optimize the output.
14388 For the 80386, we handle X+REG by loading X into a register R and
14389 using R+REG. R will go in a general reg and indexing will be used.
14390 However, if REG is a broken-out memory address or multiplication,
14391 nothing needs to be done because REG can certainly go in a general reg.
14393 When -fpic is used, special handling is needed for symbolic references.
14394 See comments by legitimize_pic_address in i386.c for details. */
14397 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14399 bool changed = false;
14402 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14404 return legitimize_tls_address (x, (enum tls_model) log, false);
14405 if (GET_CODE (x) == CONST
14406 && GET_CODE (XEXP (x, 0)) == PLUS
14407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14408 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14410 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14411 (enum tls_model) log, false);
14412 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14415 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14417 rtx tmp = legitimize_pe_coff_symbol (x, true);
14422 if (flag_pic && SYMBOLIC_CONST (x))
14423 return legitimize_pic_address (x, 0);
14426 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14427 return machopic_indirect_data_reference (x, 0);
14430 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14431 if (GET_CODE (x) == ASHIFT
14432 && CONST_INT_P (XEXP (x, 1))
14433 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14436 log = INTVAL (XEXP (x, 1));
14437 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14438 GEN_INT (1 << log));
14441 if (GET_CODE (x) == PLUS)
14443 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14445 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14446 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14447 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14450 log = INTVAL (XEXP (XEXP (x, 0), 1));
14451 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14452 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14453 GEN_INT (1 << log));
14456 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14457 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14458 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14461 log = INTVAL (XEXP (XEXP (x, 1), 1));
14462 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14463 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14464 GEN_INT (1 << log));
14467 /* Put multiply first if it isn't already. */
14468 if (GET_CODE (XEXP (x, 1)) == MULT)
14470 std::swap (XEXP (x, 0), XEXP (x, 1));
14474 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14475 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14476 created by virtual register instantiation, register elimination, and
14477 similar optimizations. */
14478 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14481 x = gen_rtx_PLUS (Pmode,
14482 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14483 XEXP (XEXP (x, 1), 0)),
14484 XEXP (XEXP (x, 1), 1));
14488 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14489 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14490 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14492 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14493 && CONSTANT_P (XEXP (x, 1)))
14496 rtx other = NULL_RTX;
14498 if (CONST_INT_P (XEXP (x, 1)))
14500 constant = XEXP (x, 1);
14501 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14503 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14505 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14506 other = XEXP (x, 1);
14514 x = gen_rtx_PLUS (Pmode,
14515 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14516 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14517 plus_constant (Pmode, other,
14518 INTVAL (constant)));
14522 if (changed && ix86_legitimate_address_p (mode, x, false))
14525 if (GET_CODE (XEXP (x, 0)) == MULT)
14528 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14531 if (GET_CODE (XEXP (x, 1)) == MULT)
14534 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14538 && REG_P (XEXP (x, 1))
14539 && REG_P (XEXP (x, 0)))
14542 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14545 x = legitimize_pic_address (x, 0);
14548 if (changed && ix86_legitimate_address_p (mode, x, false))
14551 if (REG_P (XEXP (x, 0)))
14553 rtx temp = gen_reg_rtx (Pmode);
14554 rtx val = force_operand (XEXP (x, 1), temp);
14557 val = convert_to_mode (Pmode, val, 1);
14558 emit_move_insn (temp, val);
14561 XEXP (x, 1) = temp;
14565 else if (REG_P (XEXP (x, 1)))
14567 rtx temp = gen_reg_rtx (Pmode);
14568 rtx val = force_operand (XEXP (x, 0), temp);
14571 val = convert_to_mode (Pmode, val, 1);
14572 emit_move_insn (temp, val);
14575 XEXP (x, 0) = temp;
14583 /* Print an integer constant expression in assembler syntax. Addition
14584 and subtraction are the only arithmetic that may appear in these
14585 expressions. FILE is the stdio stream to write to, X is the rtx, and
14586 CODE is the operand print code from the output string. */
14589 output_pic_addr_const (FILE *file, rtx x, int code)
14593 switch (GET_CODE (x))
14596 gcc_assert (flag_pic);
14601 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14602 output_addr_const (file, x);
14605 const char *name = XSTR (x, 0);
14607 /* Mark the decl as referenced so that cgraph will
14608 output the function. */
14609 if (SYMBOL_REF_DECL (x))
14610 mark_decl_referenced (SYMBOL_REF_DECL (x));
14613 if (MACHOPIC_INDIRECT
14614 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14615 name = machopic_indirection_name (x, /*stub_p=*/true);
14617 assemble_name (file, name);
14619 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14620 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14621 fputs ("@PLT", file);
14628 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14629 assemble_name (asm_out_file, buf);
14633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14637 /* This used to output parentheses around the expression,
14638 but that does not work on the 386 (either ATT or BSD assembler). */
14639 output_pic_addr_const (file, XEXP (x, 0), code);
14643 if (GET_MODE (x) == VOIDmode)
14645 /* We can use %d if the number is <32 bits and positive. */
14646 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14647 fprintf (file, "0x%lx%08lx",
14648 (unsigned long) CONST_DOUBLE_HIGH (x),
14649 (unsigned long) CONST_DOUBLE_LOW (x));
14651 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14654 /* We can't handle floating point constants;
14655 TARGET_PRINT_OPERAND must handle them. */
14656 output_operand_lossage ("floating constant misused");
14660 /* Some assemblers need integer constants to appear first. */
14661 if (CONST_INT_P (XEXP (x, 0)))
14663 output_pic_addr_const (file, XEXP (x, 0), code);
14665 output_pic_addr_const (file, XEXP (x, 1), code);
14669 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14670 output_pic_addr_const (file, XEXP (x, 1), code);
14672 output_pic_addr_const (file, XEXP (x, 0), code);
14678 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14679 output_pic_addr_const (file, XEXP (x, 0), code);
14681 output_pic_addr_const (file, XEXP (x, 1), code);
14683 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14687 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14689 bool f = i386_asm_output_addr_const_extra (file, x);
14694 gcc_assert (XVECLEN (x, 0) == 1);
14695 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14696 switch (XINT (x, 1))
14699 fputs ("@GOT", file);
14701 case UNSPEC_GOTOFF:
14702 fputs ("@GOTOFF", file);
14704 case UNSPEC_PLTOFF:
14705 fputs ("@PLTOFF", file);
14708 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14709 "(%rip)" : "[rip]", file);
14711 case UNSPEC_GOTPCREL:
14712 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14713 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14715 case UNSPEC_GOTTPOFF:
14716 /* FIXME: This might be @TPOFF in Sun ld too. */
14717 fputs ("@gottpoff", file);
14720 fputs ("@tpoff", file);
14722 case UNSPEC_NTPOFF:
14724 fputs ("@tpoff", file);
14726 fputs ("@ntpoff", file);
14728 case UNSPEC_DTPOFF:
14729 fputs ("@dtpoff", file);
14731 case UNSPEC_GOTNTPOFF:
14733 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14734 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14736 fputs ("@gotntpoff", file);
14738 case UNSPEC_INDNTPOFF:
14739 fputs ("@indntpoff", file);
14742 case UNSPEC_MACHOPIC_OFFSET:
14744 machopic_output_function_base_name (file);
14748 output_operand_lossage ("invalid UNSPEC as operand");
14754 output_operand_lossage ("invalid expression as operand");
14758 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14759 We need to emit DTP-relative relocations. */
14761 static void ATTRIBUTE_UNUSED
14762 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14764 fputs (ASM_LONG, file);
14765 output_addr_const (file, x);
14766 fputs ("@dtpoff", file);
14772 fputs (", 0", file);
14775 gcc_unreachable ();
14779 /* Return true if X is a representation of the PIC register. This copes
14780 with calls from ix86_find_base_term, where the register might have
14781 been replaced by a cselib value. */
14784 ix86_pic_register_p (rtx x)
14786 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14787 return (pic_offset_table_rtx
14788 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14789 else if (!REG_P (x))
14791 else if (pic_offset_table_rtx)
14793 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14795 if (HARD_REGISTER_P (x)
14796 && !HARD_REGISTER_P (pic_offset_table_rtx)
14797 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14802 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14805 /* Helper function for ix86_delegitimize_address.
14806 Attempt to delegitimize TLS local-exec accesses. */
14809 ix86_delegitimize_tls_address (rtx orig_x)
14811 rtx x = orig_x, unspec;
14812 struct ix86_address addr;
14814 if (!TARGET_TLS_DIRECT_SEG_REFS)
14818 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14820 if (ix86_decompose_address (x, &addr) == 0
14821 || addr.seg != DEFAULT_TLS_SEG_REG
14822 || addr.disp == NULL_RTX
14823 || GET_CODE (addr.disp) != CONST)
14825 unspec = XEXP (addr.disp, 0);
14826 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14827 unspec = XEXP (unspec, 0);
14828 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14830 x = XVECEXP (unspec, 0, 0);
14831 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14832 if (unspec != XEXP (addr.disp, 0))
14833 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14836 rtx idx = addr.index;
14837 if (addr.scale != 1)
14838 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14839 x = gen_rtx_PLUS (Pmode, idx, x);
14842 x = gen_rtx_PLUS (Pmode, addr.base, x);
14843 if (MEM_P (orig_x))
14844 x = replace_equiv_address_nv (orig_x, x);
14848 /* In the name of slightly smaller debug output, and to cater to
14849 general assembler lossage, recognize PIC+GOTOFF and turn it back
14850 into a direct symbol reference.
14852 On Darwin, this is necessary to avoid a crash, because Darwin
14853 has a different PIC label for each routine but the DWARF debugging
14854 information is not associated with any particular routine, so it's
14855 necessary to remove references to the PIC label from RTL stored by
14856 the DWARF output code. */
14859 ix86_delegitimize_address (rtx x)
14861 rtx orig_x = delegitimize_mem_from_attrs (x);
14862 /* addend is NULL or some rtx if x is something+GOTOFF where
14863 something doesn't include the PIC register. */
14864 rtx addend = NULL_RTX;
14865 /* reg_addend is NULL or a multiple of some register. */
14866 rtx reg_addend = NULL_RTX;
14867 /* const_addend is NULL or a const_int. */
14868 rtx const_addend = NULL_RTX;
14869 /* This is the result, or NULL. */
14870 rtx result = NULL_RTX;
14879 if (GET_CODE (x) == CONST
14880 && GET_CODE (XEXP (x, 0)) == PLUS
14881 && GET_MODE (XEXP (x, 0)) == Pmode
14882 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14883 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14884 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14886 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14887 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14888 if (MEM_P (orig_x))
14889 x = replace_equiv_address_nv (orig_x, x);
14893 if (GET_CODE (x) == CONST
14894 && GET_CODE (XEXP (x, 0)) == UNSPEC
14895 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14896 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14897 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14899 x = XVECEXP (XEXP (x, 0), 0, 0);
14900 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14902 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14910 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14911 return ix86_delegitimize_tls_address (orig_x);
14913 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14914 and -mcmodel=medium -fpic. */
14917 if (GET_CODE (x) != PLUS
14918 || GET_CODE (XEXP (x, 1)) != CONST)
14919 return ix86_delegitimize_tls_address (orig_x);
14921 if (ix86_pic_register_p (XEXP (x, 0)))
14922 /* %ebx + GOT/GOTOFF */
14924 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14926 /* %ebx + %reg * scale + GOT/GOTOFF */
14927 reg_addend = XEXP (x, 0);
14928 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14929 reg_addend = XEXP (reg_addend, 1);
14930 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14931 reg_addend = XEXP (reg_addend, 0);
14934 reg_addend = NULL_RTX;
14935 addend = XEXP (x, 0);
14939 addend = XEXP (x, 0);
14941 x = XEXP (XEXP (x, 1), 0);
14942 if (GET_CODE (x) == PLUS
14943 && CONST_INT_P (XEXP (x, 1)))
14945 const_addend = XEXP (x, 1);
14949 if (GET_CODE (x) == UNSPEC
14950 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14951 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14952 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14953 && !MEM_P (orig_x) && !addend)))
14954 result = XVECEXP (x, 0, 0);
14956 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14957 && !MEM_P (orig_x))
14958 result = XVECEXP (x, 0, 0);
14961 return ix86_delegitimize_tls_address (orig_x);
14964 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14966 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14969 /* If the rest of original X doesn't involve the PIC register, add
14970 addend and subtract pic_offset_table_rtx. This can happen e.g.
14972 leal (%ebx, %ecx, 4), %ecx
14974 movl foo@GOTOFF(%ecx), %edx
14975 in which case we return (%ecx - %ebx) + foo
14976 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14977 and reload has completed. */
14978 if (pic_offset_table_rtx
14979 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14980 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14981 pic_offset_table_rtx),
14983 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14985 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14986 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14987 result = gen_rtx_PLUS (Pmode, tmp, result);
14992 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14994 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14995 if (result == NULL_RTX)
15001 /* If X is a machine specific address (i.e. a symbol or label being
15002 referenced as a displacement from the GOT implemented using an
15003 UNSPEC), then return the base term. Otherwise return X. */
15006 ix86_find_base_term (rtx x)
15012 if (GET_CODE (x) != CONST)
15014 term = XEXP (x, 0);
15015 if (GET_CODE (term) == PLUS
15016 && (CONST_INT_P (XEXP (term, 1))
15017 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15018 term = XEXP (term, 0);
15019 if (GET_CODE (term) != UNSPEC
15020 || (XINT (term, 1) != UNSPEC_GOTPCREL
15021 && XINT (term, 1) != UNSPEC_PCREL))
15024 return XVECEXP (term, 0, 0);
15027 return ix86_delegitimize_address (x);
15031 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15032 bool fp, FILE *file)
15034 const char *suffix;
15036 if (mode == CCFPmode || mode == CCFPUmode)
15038 code = ix86_fp_compare_code_to_integer (code);
15042 code = reverse_condition (code);
15093 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15097 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15098 Those same assemblers have the same but opposite lossage on cmov. */
15099 if (mode == CCmode)
15100 suffix = fp ? "nbe" : "a";
15102 gcc_unreachable ();
15118 gcc_unreachable ();
15122 if (mode == CCmode)
15124 else if (mode == CCCmode)
15125 suffix = fp ? "b" : "c";
15127 gcc_unreachable ();
15143 gcc_unreachable ();
15147 if (mode == CCmode)
15149 else if (mode == CCCmode)
15150 suffix = fp ? "nb" : "nc";
15152 gcc_unreachable ();
15155 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15159 if (mode == CCmode)
15162 gcc_unreachable ();
15165 suffix = fp ? "u" : "p";
15168 suffix = fp ? "nu" : "np";
15171 gcc_unreachable ();
15173 fputs (suffix, file);
15176 /* Print the name of register X to FILE based on its machine mode and number.
15177 If CODE is 'w', pretend the mode is HImode.
15178 If CODE is 'b', pretend the mode is QImode.
15179 If CODE is 'k', pretend the mode is SImode.
15180 If CODE is 'q', pretend the mode is DImode.
15181 If CODE is 'x', pretend the mode is V4SFmode.
15182 If CODE is 't', pretend the mode is V8SFmode.
15183 If CODE is 'g', pretend the mode is V16SFmode.
15184 If CODE is 'h', pretend the reg is the 'high' byte register.
15185 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15186 If CODE is 'd', duplicate the operand for AVX instruction.
15190 print_reg (rtx x, int code, FILE *file)
15193 unsigned int regno;
15194 bool duplicated = code == 'd' && TARGET_AVX;
15196 if (ASSEMBLER_DIALECT == ASM_ATT)
15201 gcc_assert (TARGET_64BIT);
15202 fputs ("rip", file);
15206 regno = true_regnum (x);
15207 gcc_assert (regno != ARG_POINTER_REGNUM
15208 && regno != FRAME_POINTER_REGNUM
15209 && regno != FLAGS_REG
15210 && regno != FPSR_REG
15211 && regno != FPCR_REG);
15213 if (code == 'w' || MMX_REG_P (x))
15215 else if (code == 'b')
15217 else if (code == 'k')
15219 else if (code == 'q')
15221 else if (code == 'y')
15223 else if (code == 'h')
15225 else if (code == 'x')
15227 else if (code == 't')
15229 else if (code == 'g')
15232 code = GET_MODE_SIZE (GET_MODE (x));
15234 /* Irritatingly, AMD extended registers use different naming convention
15235 from the normal registers: "r%d[bwd]" */
15236 if (REX_INT_REGNO_P (regno))
15238 gcc_assert (TARGET_64BIT);
15240 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15244 error ("extended registers have no high halves");
15259 error ("unsupported operand size for extended register");
15269 if (STACK_TOP_P (x))
15278 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15279 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15284 reg = hi_reg_name[regno];
15287 if (regno >= ARRAY_SIZE (qi_reg_name))
15289 reg = qi_reg_name[regno];
15292 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15294 reg = qi_high_reg_name[regno];
15299 gcc_assert (!duplicated);
15301 fputs (hi_reg_name[regno] + 1, file);
15307 gcc_assert (!duplicated);
15309 fputs (hi_reg_name[REGNO (x)] + 1, file);
15314 gcc_unreachable ();
15320 if (ASSEMBLER_DIALECT == ASM_ATT)
15321 fprintf (file, ", %%%s", reg);
15323 fprintf (file, ", %s", reg);
15327 /* Meaning of CODE:
15328 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15329 C -- print opcode suffix for set/cmov insn.
15330 c -- like C, but print reversed condition
15331 F,f -- likewise, but for floating-point.
15332 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15334 R -- print embeded rounding and sae.
15335 r -- print only sae.
15336 z -- print the opcode suffix for the size of the current operand.
15337 Z -- likewise, with special suffixes for x87 instructions.
15338 * -- print a star (in certain assembler syntax)
15339 A -- print an absolute memory reference.
15340 E -- print address with DImode register names if TARGET_64BIT.
15341 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15342 s -- print a shift double count, followed by the assemblers argument
15344 b -- print the QImode name of the register for the indicated operand.
15345 %b0 would print %al if operands[0] is reg 0.
15346 w -- likewise, print the HImode name of the register.
15347 k -- likewise, print the SImode name of the register.
15348 q -- likewise, print the DImode name of the register.
15349 x -- likewise, print the V4SFmode name of the register.
15350 t -- likewise, print the V8SFmode name of the register.
15351 g -- likewise, print the V16SFmode name of the register.
15352 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15353 y -- print "st(0)" instead of "st" as a register.
15354 d -- print duplicated register operand for AVX instruction.
15355 D -- print condition for SSE cmp instruction.
15356 P -- if PIC, print an @PLT suffix.
15357 p -- print raw symbol name.
15358 X -- don't print any sort of PIC '@' suffix for a symbol.
15359 & -- print some in-use local-dynamic symbol name.
15360 H -- print a memory address offset by 8; used for sse high-parts
15361 Y -- print condition for XOP pcom* instruction.
15362 + -- print a branch hint as 'cs' or 'ds' prefix
15363 ; -- print a semicolon (after prefixes due to bug in older gas).
15364 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15365 @ -- print a segment register of thread base pointer load
15366 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15367 ! -- print MPX prefix for jxx/call/ret instructions if required.
15371 ix86_print_operand (FILE *file, rtx x, int code)
15378 switch (ASSEMBLER_DIALECT)
15385 /* Intel syntax. For absolute addresses, registers should not
15386 be surrounded by braces. */
15390 ix86_print_operand (file, x, 0);
15397 gcc_unreachable ();
15400 ix86_print_operand (file, x, 0);
15404 /* Wrap address in an UNSPEC to declare special handling. */
15406 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15408 output_address (x);
15412 if (ASSEMBLER_DIALECT == ASM_ATT)
15417 if (ASSEMBLER_DIALECT == ASM_ATT)
15422 if (ASSEMBLER_DIALECT == ASM_ATT)
15427 if (ASSEMBLER_DIALECT == ASM_ATT)
15432 if (ASSEMBLER_DIALECT == ASM_ATT)
15437 if (ASSEMBLER_DIALECT == ASM_ATT)
15442 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15443 if (ASSEMBLER_DIALECT != ASM_ATT)
15446 switch (GET_MODE_SIZE (GET_MODE (x)))
15461 output_operand_lossage
15462 ("invalid operand size for operand code 'O'");
15471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15473 /* Opcodes don't get size suffixes if using Intel opcodes. */
15474 if (ASSEMBLER_DIALECT == ASM_INTEL)
15477 switch (GET_MODE_SIZE (GET_MODE (x)))
15496 output_operand_lossage
15497 ("invalid operand size for operand code 'z'");
15502 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15504 (0, "non-integer operand used with operand code 'z'");
15508 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15509 if (ASSEMBLER_DIALECT == ASM_INTEL)
15512 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15514 switch (GET_MODE_SIZE (GET_MODE (x)))
15517 #ifdef HAVE_AS_IX86_FILDS
15527 #ifdef HAVE_AS_IX86_FILDQ
15530 fputs ("ll", file);
15538 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15540 /* 387 opcodes don't get size suffixes
15541 if the operands are registers. */
15542 if (STACK_REG_P (x))
15545 switch (GET_MODE_SIZE (GET_MODE (x)))
15566 output_operand_lossage
15567 ("invalid operand type used with operand code 'Z'");
15571 output_operand_lossage
15572 ("invalid operand size for operand code 'Z'");
15591 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15593 ix86_print_operand (file, x, 0);
15594 fputs (", ", file);
15599 switch (GET_CODE (x))
15602 fputs ("neq", file);
15605 fputs ("eq", file);
15609 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15613 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15617 fputs ("le", file);
15621 fputs ("lt", file);
15624 fputs ("unord", file);
15627 fputs ("ord", file);
15630 fputs ("ueq", file);
15633 fputs ("nlt", file);
15636 fputs ("nle", file);
15639 fputs ("ule", file);
15642 fputs ("ult", file);
15645 fputs ("une", file);
15648 output_operand_lossage ("operand is not a condition code, "
15649 "invalid operand code 'Y'");
15655 /* Little bit of braindamage here. The SSE compare instructions
15656 does use completely different names for the comparisons that the
15657 fp conditional moves. */
15658 switch (GET_CODE (x))
15663 fputs ("eq_us", file);
15667 fputs ("eq", file);
15672 fputs ("nge", file);
15676 fputs ("lt", file);
15681 fputs ("ngt", file);
15685 fputs ("le", file);
15688 fputs ("unord", file);
15693 fputs ("neq_oq", file);
15697 fputs ("neq", file);
15702 fputs ("ge", file);
15706 fputs ("nlt", file);
15711 fputs ("gt", file);
15715 fputs ("nle", file);
15718 fputs ("ord", file);
15721 output_operand_lossage ("operand is not a condition code, "
15722 "invalid operand code 'D'");
15729 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15730 if (ASSEMBLER_DIALECT == ASM_ATT)
15736 if (!COMPARISON_P (x))
15738 output_operand_lossage ("operand is not a condition code, "
15739 "invalid operand code '%c'", code);
15742 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15743 code == 'c' || code == 'f',
15744 code == 'F' || code == 'f',
15749 if (!offsettable_memref_p (x))
15751 output_operand_lossage ("operand is not an offsettable memory "
15752 "reference, invalid operand code 'H'");
15755 /* It doesn't actually matter what mode we use here, as we're
15756 only going to use this for printing. */
15757 x = adjust_address_nv (x, DImode, 8);
15758 /* Output 'qword ptr' for intel assembler dialect. */
15759 if (ASSEMBLER_DIALECT == ASM_INTEL)
15764 gcc_assert (CONST_INT_P (x));
15766 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15767 #ifdef HAVE_AS_IX86_HLE
15768 fputs ("xacquire ", file);
15770 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15772 else if (INTVAL (x) & IX86_HLE_RELEASE)
15773 #ifdef HAVE_AS_IX86_HLE
15774 fputs ("xrelease ", file);
15776 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15778 /* We do not want to print value of the operand. */
15782 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15783 fputs ("{z}", file);
15787 gcc_assert (CONST_INT_P (x));
15788 gcc_assert (INTVAL (x) == ROUND_SAE);
15790 if (ASSEMBLER_DIALECT == ASM_INTEL)
15791 fputs (", ", file);
15793 fputs ("{sae}", file);
15795 if (ASSEMBLER_DIALECT == ASM_ATT)
15796 fputs (", ", file);
15801 gcc_assert (CONST_INT_P (x));
15803 if (ASSEMBLER_DIALECT == ASM_INTEL)
15804 fputs (", ", file);
15806 switch (INTVAL (x))
15808 case ROUND_NEAREST_INT | ROUND_SAE:
15809 fputs ("{rn-sae}", file);
15811 case ROUND_NEG_INF | ROUND_SAE:
15812 fputs ("{rd-sae}", file);
15814 case ROUND_POS_INF | ROUND_SAE:
15815 fputs ("{ru-sae}", file);
15817 case ROUND_ZERO | ROUND_SAE:
15818 fputs ("{rz-sae}", file);
15821 gcc_unreachable ();
15824 if (ASSEMBLER_DIALECT == ASM_ATT)
15825 fputs (", ", file);
15830 if (ASSEMBLER_DIALECT == ASM_ATT)
15836 const char *name = get_some_local_dynamic_name ();
15838 output_operand_lossage ("'%%&' used without any "
15839 "local dynamic TLS references");
15841 assemble_name (file, name);
15850 || optimize_function_for_size_p (cfun)
15851 || !TARGET_BRANCH_PREDICTION_HINTS)
15854 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15857 int pred_val = XINT (x, 0);
15859 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15860 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15862 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15864 = final_forward_branch_p (current_output_insn) == 0;
15866 /* Emit hints only in the case default branch prediction
15867 heuristics would fail. */
15868 if (taken != cputaken)
15870 /* We use 3e (DS) prefix for taken branches and
15871 2e (CS) prefix for not taken branches. */
15873 fputs ("ds ; ", file);
15875 fputs ("cs ; ", file);
15883 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15889 if (ASSEMBLER_DIALECT == ASM_ATT)
15892 /* The kernel uses a different segment register for performance
15893 reasons; a system call would not have to trash the userspace
15894 segment register, which would be expensive. */
15895 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15896 fputs ("fs", file);
15898 fputs ("gs", file);
15902 putc (TARGET_AVX2 ? 'i' : 'f', file);
15906 if (TARGET_64BIT && Pmode != word_mode)
15907 fputs ("addr32 ", file);
15911 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15912 fputs ("bnd ", file);
15916 output_operand_lossage ("invalid operand code '%c'", code);
15921 print_reg (x, code, file);
15923 else if (MEM_P (x))
15925 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15926 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15927 && GET_MODE (x) != BLKmode)
15930 switch (GET_MODE_SIZE (GET_MODE (x)))
15932 case 1: size = "BYTE"; break;
15933 case 2: size = "WORD"; break;
15934 case 4: size = "DWORD"; break;
15935 case 8: size = "QWORD"; break;
15936 case 12: size = "TBYTE"; break;
15938 if (GET_MODE (x) == XFmode)
15943 case 32: size = "YMMWORD"; break;
15944 case 64: size = "ZMMWORD"; break;
15946 gcc_unreachable ();
15949 /* Check for explicit size override (codes 'b', 'w', 'k',
15953 else if (code == 'w')
15955 else if (code == 'k')
15957 else if (code == 'q')
15959 else if (code == 'x')
15962 fputs (size, file);
15963 fputs (" PTR ", file);
15967 /* Avoid (%rip) for call operands. */
15968 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15969 && !CONST_INT_P (x))
15970 output_addr_const (file, x);
15971 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15972 output_operand_lossage ("invalid constraints for operand");
15974 output_address (x);
15977 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15982 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15983 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15985 if (ASSEMBLER_DIALECT == ASM_ATT)
15987 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15989 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15990 (unsigned long long) (int) l);
15992 fprintf (file, "0x%08x", (unsigned int) l);
15995 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
16000 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16001 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16003 if (ASSEMBLER_DIALECT == ASM_ATT)
16005 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16008 /* These float cases don't actually occur as immediate operands. */
16009 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16013 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16014 fputs (dstr, file);
16019 /* We have patterns that allow zero sets of memory, for instance.
16020 In 64-bit mode, we should probably support all 8-byte vectors,
16021 since we can in fact encode that into an immediate. */
16022 if (GET_CODE (x) == CONST_VECTOR)
16024 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16028 if (code != 'P' && code != 'p')
16030 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16032 if (ASSEMBLER_DIALECT == ASM_ATT)
16035 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16036 || GET_CODE (x) == LABEL_REF)
16038 if (ASSEMBLER_DIALECT == ASM_ATT)
16041 fputs ("OFFSET FLAT:", file);
16044 if (CONST_INT_P (x))
16045 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16046 else if (flag_pic || MACHOPIC_INDIRECT)
16047 output_pic_addr_const (file, x, code);
16049 output_addr_const (file, x);
16054 ix86_print_operand_punct_valid_p (unsigned char code)
16056 return (code == '@' || code == '*' || code == '+' || code == '&'
16057 || code == ';' || code == '~' || code == '^' || code == '!');
16060 /* Print a memory operand whose address is ADDR. */
16063 ix86_print_operand_address (FILE *file, rtx addr)
16065 struct ix86_address parts;
16066 rtx base, index, disp;
16072 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16074 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16075 gcc_assert (parts.index == NULL_RTX);
16076 parts.index = XVECEXP (addr, 0, 1);
16077 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16078 addr = XVECEXP (addr, 0, 0);
16081 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16083 gcc_assert (TARGET_64BIT);
16084 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16087 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16089 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16090 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16091 if (parts.base != NULL_RTX)
16093 parts.index = parts.base;
16096 parts.base = XVECEXP (addr, 0, 0);
16097 addr = XVECEXP (addr, 0, 0);
16099 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16101 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16102 gcc_assert (parts.index == NULL_RTX);
16103 parts.index = XVECEXP (addr, 0, 1);
16104 addr = XVECEXP (addr, 0, 0);
16107 ok = ix86_decompose_address (addr, &parts);
16112 index = parts.index;
16114 scale = parts.scale;
16122 if (ASSEMBLER_DIALECT == ASM_ATT)
16124 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16127 gcc_unreachable ();
16130 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16131 if (TARGET_64BIT && !base && !index)
16135 if (GET_CODE (disp) == CONST
16136 && GET_CODE (XEXP (disp, 0)) == PLUS
16137 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16138 symbol = XEXP (XEXP (disp, 0), 0);
16140 if (GET_CODE (symbol) == LABEL_REF
16141 || (GET_CODE (symbol) == SYMBOL_REF
16142 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16145 if (!base && !index)
16147 /* Displacement only requires special attention. */
16149 if (CONST_INT_P (disp))
16151 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16152 fputs ("ds:", file);
16153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16156 output_pic_addr_const (file, disp, 0);
16158 output_addr_const (file, disp);
16162 /* Print SImode register names to force addr32 prefix. */
16163 if (SImode_address_operand (addr, VOIDmode))
16165 #ifdef ENABLE_CHECKING
16166 gcc_assert (TARGET_64BIT);
16167 switch (GET_CODE (addr))
16170 gcc_assert (GET_MODE (addr) == SImode);
16171 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16175 gcc_assert (GET_MODE (addr) == DImode);
16178 gcc_unreachable ();
16181 gcc_assert (!code);
16187 && CONST_INT_P (disp)
16188 && INTVAL (disp) < -16*1024*1024)
16190 /* X32 runs in 64-bit mode, where displacement, DISP, in
16191 address DISP(%r64), is encoded as 32-bit immediate sign-
16192 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16193 address is %r64 + 0xffffffffbffffd00. When %r64 <
16194 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16195 which is invalid for x32. The correct address is %r64
16196 - 0x40000300 == 0xf7ffdd64. To properly encode
16197 -0x40000300(%r64) for x32, we zero-extend negative
16198 displacement by forcing addr32 prefix which truncates
16199 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16200 zero-extend all negative displacements, including -1(%rsp).
16201 However, for small negative displacements, sign-extension
16202 won't cause overflow. We only zero-extend negative
16203 displacements if they < -16*1024*1024, which is also used
16204 to check legitimate address displacements for PIC. */
16208 if (ASSEMBLER_DIALECT == ASM_ATT)
16213 output_pic_addr_const (file, disp, 0);
16214 else if (GET_CODE (disp) == LABEL_REF)
16215 output_asm_label (disp);
16217 output_addr_const (file, disp);
16222 print_reg (base, code, file);
16226 print_reg (index, vsib ? 0 : code, file);
16227 if (scale != 1 || vsib)
16228 fprintf (file, ",%d", scale);
16234 rtx offset = NULL_RTX;
16238 /* Pull out the offset of a symbol; print any symbol itself. */
16239 if (GET_CODE (disp) == CONST
16240 && GET_CODE (XEXP (disp, 0)) == PLUS
16241 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16243 offset = XEXP (XEXP (disp, 0), 1);
16244 disp = gen_rtx_CONST (VOIDmode,
16245 XEXP (XEXP (disp, 0), 0));
16249 output_pic_addr_const (file, disp, 0);
16250 else if (GET_CODE (disp) == LABEL_REF)
16251 output_asm_label (disp);
16252 else if (CONST_INT_P (disp))
16255 output_addr_const (file, disp);
16261 print_reg (base, code, file);
16264 if (INTVAL (offset) >= 0)
16266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16277 print_reg (index, vsib ? 0 : code, file);
16278 if (scale != 1 || vsib)
16279 fprintf (file, "*%d", scale);
16286 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16289 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16293 if (GET_CODE (x) != UNSPEC)
16296 op = XVECEXP (x, 0, 0);
16297 switch (XINT (x, 1))
16299 case UNSPEC_GOTTPOFF:
16300 output_addr_const (file, op);
16301 /* FIXME: This might be @TPOFF in Sun ld. */
16302 fputs ("@gottpoff", file);
16305 output_addr_const (file, op);
16306 fputs ("@tpoff", file);
16308 case UNSPEC_NTPOFF:
16309 output_addr_const (file, op);
16311 fputs ("@tpoff", file);
16313 fputs ("@ntpoff", file);
16315 case UNSPEC_DTPOFF:
16316 output_addr_const (file, op);
16317 fputs ("@dtpoff", file);
16319 case UNSPEC_GOTNTPOFF:
16320 output_addr_const (file, op);
16322 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16323 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16325 fputs ("@gotntpoff", file);
16327 case UNSPEC_INDNTPOFF:
16328 output_addr_const (file, op);
16329 fputs ("@indntpoff", file);
16332 case UNSPEC_MACHOPIC_OFFSET:
16333 output_addr_const (file, op);
16335 machopic_output_function_base_name (file);
16339 case UNSPEC_STACK_CHECK:
16343 gcc_assert (flag_split_stack);
16345 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16346 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16348 gcc_unreachable ();
16351 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16362 /* Split one or more double-mode RTL references into pairs of half-mode
16363 references. The RTL can be REG, offsettable MEM, integer constant, or
16364 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16365 split and "num" is its length. lo_half and hi_half are output arrays
16366 that parallel "operands". */
16369 split_double_mode (machine_mode mode, rtx operands[],
16370 int num, rtx lo_half[], rtx hi_half[])
16372 machine_mode half_mode;
16378 half_mode = DImode;
16381 half_mode = SImode;
16384 gcc_unreachable ();
16387 byte = GET_MODE_SIZE (half_mode);
16391 rtx op = operands[num];
16393 /* simplify_subreg refuse to split volatile memory addresses,
16394 but we still have to handle it. */
16397 lo_half[num] = adjust_address (op, half_mode, 0);
16398 hi_half[num] = adjust_address (op, half_mode, byte);
16402 lo_half[num] = simplify_gen_subreg (half_mode, op,
16403 GET_MODE (op) == VOIDmode
16404 ? mode : GET_MODE (op), 0);
16405 hi_half[num] = simplify_gen_subreg (half_mode, op,
16406 GET_MODE (op) == VOIDmode
16407 ? mode : GET_MODE (op), byte);
16412 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16413 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16414 is the expression of the binary operation. The output may either be
16415 emitted here, or returned to the caller, like all output_* functions.
16417 There is no guarantee that the operands are the same mode, as they
16418 might be within FLOAT or FLOAT_EXTEND expressions. */
16420 #ifndef SYSV386_COMPAT
16421 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16422 wants to fix the assemblers because that causes incompatibility
16423 with gcc. No-one wants to fix gcc because that causes
16424 incompatibility with assemblers... You can use the option of
16425 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16426 #define SYSV386_COMPAT 1
16430 output_387_binary_op (rtx insn, rtx *operands)
16432 static char buf[40];
16435 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16437 #ifdef ENABLE_CHECKING
16438 /* Even if we do not want to check the inputs, this documents input
16439 constraints. Which helps in understanding the following code. */
16440 if (STACK_REG_P (operands[0])
16441 && ((REG_P (operands[1])
16442 && REGNO (operands[0]) == REGNO (operands[1])
16443 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16444 || (REG_P (operands[2])
16445 && REGNO (operands[0]) == REGNO (operands[2])
16446 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16447 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16450 gcc_assert (is_sse);
16453 switch (GET_CODE (operands[3]))
16456 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16457 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16465 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16466 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16474 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16475 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16483 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16484 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16492 gcc_unreachable ();
16499 strcpy (buf, ssep);
16500 if (GET_MODE (operands[0]) == SFmode)
16501 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16503 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16507 strcpy (buf, ssep + 1);
16508 if (GET_MODE (operands[0]) == SFmode)
16509 strcat (buf, "ss\t{%2, %0|%0, %2}");
16511 strcat (buf, "sd\t{%2, %0|%0, %2}");
16517 switch (GET_CODE (operands[3]))
16521 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16522 std::swap (operands[1], operands[2]);
16524 /* know operands[0] == operands[1]. */
16526 if (MEM_P (operands[2]))
16532 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16534 if (STACK_TOP_P (operands[0]))
16535 /* How is it that we are storing to a dead operand[2]?
16536 Well, presumably operands[1] is dead too. We can't
16537 store the result to st(0) as st(0) gets popped on this
16538 instruction. Instead store to operands[2] (which I
16539 think has to be st(1)). st(1) will be popped later.
16540 gcc <= 2.8.1 didn't have this check and generated
16541 assembly code that the Unixware assembler rejected. */
16542 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16544 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16548 if (STACK_TOP_P (operands[0]))
16549 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16551 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16556 if (MEM_P (operands[1]))
16562 if (MEM_P (operands[2]))
16568 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16571 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16572 derived assemblers, confusingly reverse the direction of
16573 the operation for fsub{r} and fdiv{r} when the
16574 destination register is not st(0). The Intel assembler
16575 doesn't have this brain damage. Read !SYSV386_COMPAT to
16576 figure out what the hardware really does. */
16577 if (STACK_TOP_P (operands[0]))
16578 p = "{p\t%0, %2|rp\t%2, %0}";
16580 p = "{rp\t%2, %0|p\t%0, %2}";
16582 if (STACK_TOP_P (operands[0]))
16583 /* As above for fmul/fadd, we can't store to st(0). */
16584 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16586 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16591 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16594 if (STACK_TOP_P (operands[0]))
16595 p = "{rp\t%0, %1|p\t%1, %0}";
16597 p = "{p\t%1, %0|rp\t%0, %1}";
16599 if (STACK_TOP_P (operands[0]))
16600 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16602 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16607 if (STACK_TOP_P (operands[0]))
16609 if (STACK_TOP_P (operands[1]))
16610 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16612 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16615 else if (STACK_TOP_P (operands[1]))
16618 p = "{\t%1, %0|r\t%0, %1}";
16620 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16626 p = "{r\t%2, %0|\t%0, %2}";
16628 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16634 gcc_unreachable ();
16641 /* Check if a 256bit AVX register is referenced inside of EXP. */
16644 ix86_check_avx256_register (const_rtx exp)
16646 if (GET_CODE (exp) == SUBREG)
16647 exp = SUBREG_REG (exp);
16649 return (REG_P (exp)
16650 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16653 /* Return needed mode for entity in optimize_mode_switching pass. */
16656 ix86_avx_u128_mode_needed (rtx_insn *insn)
16662 /* Needed mode is set to AVX_U128_CLEAN if there are
16663 no 256bit modes used in function arguments. */
16664 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16666 link = XEXP (link, 1))
16668 if (GET_CODE (XEXP (link, 0)) == USE)
16670 rtx arg = XEXP (XEXP (link, 0), 0);
16672 if (ix86_check_avx256_register (arg))
16673 return AVX_U128_DIRTY;
16677 return AVX_U128_CLEAN;
16680 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16681 changes state only when a 256bit register is written to, but we need
16682 to prevent the compiler from moving optimal insertion point above
16683 eventual read from 256bit register. */
16684 subrtx_iterator::array_type array;
16685 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16686 if (ix86_check_avx256_register (*iter))
16687 return AVX_U128_DIRTY;
16689 return AVX_U128_ANY;
16692 /* Return mode that i387 must be switched into
16693 prior to the execution of insn. */
16696 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16698 enum attr_i387_cw mode;
16700 /* The mode UNINITIALIZED is used to store control word after a
16701 function call or ASM pattern. The mode ANY specify that function
16702 has no requirements on the control word and make no changes in the
16703 bits we are interested in. */
16706 || (NONJUMP_INSN_P (insn)
16707 && (asm_noperands (PATTERN (insn)) >= 0
16708 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16709 return I387_CW_UNINITIALIZED;
16711 if (recog_memoized (insn) < 0)
16712 return I387_CW_ANY;
16714 mode = get_attr_i387_cw (insn);
16719 if (mode == I387_CW_TRUNC)
16724 if (mode == I387_CW_FLOOR)
16729 if (mode == I387_CW_CEIL)
16734 if (mode == I387_CW_MASK_PM)
16739 gcc_unreachable ();
16742 return I387_CW_ANY;
16745 /* Return mode that entity must be switched into
16746 prior to the execution of insn. */
16749 ix86_mode_needed (int entity, rtx_insn *insn)
16754 return ix86_avx_u128_mode_needed (insn);
16759 return ix86_i387_mode_needed (entity, insn);
16761 gcc_unreachable ();
16766 /* Check if a 256bit AVX register is referenced in stores. */
16769 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16771 if (ix86_check_avx256_register (dest))
16773 bool *used = (bool *) data;
16778 /* Calculate mode of upper 128bit AVX registers after the insn. */
16781 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16783 rtx pat = PATTERN (insn);
16785 if (vzeroupper_operation (pat, VOIDmode)
16786 || vzeroall_operation (pat, VOIDmode))
16787 return AVX_U128_CLEAN;
16789 /* We know that state is clean after CALL insn if there are no
16790 256bit registers used in the function return register. */
16793 bool avx_reg256_found = false;
16794 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16796 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16799 /* Otherwise, return current mode. Remember that if insn
16800 references AVX 256bit registers, the mode was already changed
16801 to DIRTY from MODE_NEEDED. */
16805 /* Return the mode that an insn results in. */
16808 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16813 return ix86_avx_u128_mode_after (mode, insn);
16820 gcc_unreachable ();
16825 ix86_avx_u128_mode_entry (void)
16829 /* Entry mode is set to AVX_U128_DIRTY if there are
16830 256bit modes used in function arguments. */
16831 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16832 arg = TREE_CHAIN (arg))
16834 rtx incoming = DECL_INCOMING_RTL (arg);
16836 if (incoming && ix86_check_avx256_register (incoming))
16837 return AVX_U128_DIRTY;
16840 return AVX_U128_CLEAN;
16843 /* Return a mode that ENTITY is assumed to be
16844 switched to at function entry. */
16847 ix86_mode_entry (int entity)
16852 return ix86_avx_u128_mode_entry ();
16857 return I387_CW_ANY;
16859 gcc_unreachable ();
16864 ix86_avx_u128_mode_exit (void)
16866 rtx reg = crtl->return_rtx;
16868 /* Exit mode is set to AVX_U128_DIRTY if there are
16869 256bit modes used in the function return register. */
16870 if (reg && ix86_check_avx256_register (reg))
16871 return AVX_U128_DIRTY;
16873 return AVX_U128_CLEAN;
16876 /* Return a mode that ENTITY is assumed to be
16877 switched to at function exit. */
16880 ix86_mode_exit (int entity)
16885 return ix86_avx_u128_mode_exit ();
16890 return I387_CW_ANY;
16892 gcc_unreachable ();
16897 ix86_mode_priority (int, int n)
16902 /* Output code to initialize control word copies used by trunc?f?i and
16903 rounding patterns. CURRENT_MODE is set to current control word,
16904 while NEW_MODE is set to new control word. */
16907 emit_i387_cw_initialization (int mode)
16909 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16912 enum ix86_stack_slot slot;
16914 rtx reg = gen_reg_rtx (HImode);
16916 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16917 emit_move_insn (reg, copy_rtx (stored_mode));
16919 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16920 || optimize_insn_for_size_p ())
16924 case I387_CW_TRUNC:
16925 /* round toward zero (truncate) */
16926 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16927 slot = SLOT_CW_TRUNC;
16930 case I387_CW_FLOOR:
16931 /* round down toward -oo */
16932 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16933 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16934 slot = SLOT_CW_FLOOR;
16938 /* round up toward +oo */
16939 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16940 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16941 slot = SLOT_CW_CEIL;
16944 case I387_CW_MASK_PM:
16945 /* mask precision exception for nearbyint() */
16946 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16947 slot = SLOT_CW_MASK_PM;
16951 gcc_unreachable ();
16958 case I387_CW_TRUNC:
16959 /* round toward zero (truncate) */
16960 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16961 slot = SLOT_CW_TRUNC;
16964 case I387_CW_FLOOR:
16965 /* round down toward -oo */
16966 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16967 slot = SLOT_CW_FLOOR;
16971 /* round up toward +oo */
16972 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16973 slot = SLOT_CW_CEIL;
16976 case I387_CW_MASK_PM:
16977 /* mask precision exception for nearbyint() */
16978 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16979 slot = SLOT_CW_MASK_PM;
16983 gcc_unreachable ();
16987 gcc_assert (slot < MAX_386_STACK_LOCALS);
16989 new_mode = assign_386_stack_local (HImode, slot);
16990 emit_move_insn (new_mode, reg);
16993 /* Emit vzeroupper. */
16996 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17000 /* Cancel automatic vzeroupper insertion if there are
17001 live call-saved SSE registers at the insertion point. */
17003 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17004 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17008 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17009 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17012 emit_insn (gen_avx_vzeroupper ());
17015 /* Generate one or more insns to set ENTITY to MODE. */
17017 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17018 is the set of hard registers live at the point where the insn(s)
17019 are to be inserted. */
17022 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17023 HARD_REG_SET regs_live)
17028 if (mode == AVX_U128_CLEAN)
17029 ix86_avx_emit_vzeroupper (regs_live);
17035 if (mode != I387_CW_ANY
17036 && mode != I387_CW_UNINITIALIZED)
17037 emit_i387_cw_initialization (mode);
17040 gcc_unreachable ();
17044 /* Output code for INSN to convert a float to a signed int. OPERANDS
17045 are the insn operands. The output may be [HSD]Imode and the input
17046 operand may be [SDX]Fmode. */
17049 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17051 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17052 int dimode_p = GET_MODE (operands[0]) == DImode;
17053 int round_mode = get_attr_i387_cw (insn);
17055 /* Jump through a hoop or two for DImode, since the hardware has no
17056 non-popping instruction. We used to do this a different way, but
17057 that was somewhat fragile and broke with post-reload splitters. */
17058 if ((dimode_p || fisttp) && !stack_top_dies)
17059 output_asm_insn ("fld\t%y1", operands);
17061 gcc_assert (STACK_TOP_P (operands[1]));
17062 gcc_assert (MEM_P (operands[0]));
17063 gcc_assert (GET_MODE (operands[1]) != TFmode);
17066 output_asm_insn ("fisttp%Z0\t%0", operands);
17069 if (round_mode != I387_CW_ANY)
17070 output_asm_insn ("fldcw\t%3", operands);
17071 if (stack_top_dies || dimode_p)
17072 output_asm_insn ("fistp%Z0\t%0", operands);
17074 output_asm_insn ("fist%Z0\t%0", operands);
17075 if (round_mode != I387_CW_ANY)
17076 output_asm_insn ("fldcw\t%2", operands);
17082 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17083 have the values zero or one, indicates the ffreep insn's operand
17084 from the OPERANDS array. */
17086 static const char *
17087 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17089 if (TARGET_USE_FFREEP)
17090 #ifdef HAVE_AS_IX86_FFREEP
17091 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17094 static char retval[32];
17095 int regno = REGNO (operands[opno]);
17097 gcc_assert (STACK_REGNO_P (regno));
17099 regno -= FIRST_STACK_REG;
17101 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17106 return opno ? "fstp\t%y1" : "fstp\t%y0";
17110 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17111 should be used. UNORDERED_P is true when fucom should be used. */
17114 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17116 int stack_top_dies;
17117 rtx cmp_op0, cmp_op1;
17118 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17122 cmp_op0 = operands[0];
17123 cmp_op1 = operands[1];
17127 cmp_op0 = operands[1];
17128 cmp_op1 = operands[2];
17133 if (GET_MODE (operands[0]) == SFmode)
17135 return "%vucomiss\t{%1, %0|%0, %1}";
17137 return "%vcomiss\t{%1, %0|%0, %1}";
17140 return "%vucomisd\t{%1, %0|%0, %1}";
17142 return "%vcomisd\t{%1, %0|%0, %1}";
17145 gcc_assert (STACK_TOP_P (cmp_op0));
17147 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17149 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17151 if (stack_top_dies)
17153 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17154 return output_387_ffreep (operands, 1);
17157 return "ftst\n\tfnstsw\t%0";
17160 if (STACK_REG_P (cmp_op1)
17162 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17163 && REGNO (cmp_op1) != FIRST_STACK_REG)
17165 /* If both the top of the 387 stack dies, and the other operand
17166 is also a stack register that dies, then this must be a
17167 `fcompp' float compare */
17171 /* There is no double popping fcomi variant. Fortunately,
17172 eflags is immune from the fstp's cc clobbering. */
17174 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17176 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17177 return output_387_ffreep (operands, 0);
17182 return "fucompp\n\tfnstsw\t%0";
17184 return "fcompp\n\tfnstsw\t%0";
17189 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17191 static const char * const alt[16] =
17193 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17194 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17195 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17196 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17198 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17199 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17203 "fcomi\t{%y1, %0|%0, %y1}",
17204 "fcomip\t{%y1, %0|%0, %y1}",
17205 "fucomi\t{%y1, %0|%0, %y1}",
17206 "fucomip\t{%y1, %0|%0, %y1}",
17217 mask = eflags_p << 3;
17218 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17219 mask |= unordered_p << 1;
17220 mask |= stack_top_dies;
17222 gcc_assert (mask < 16);
17231 ix86_output_addr_vec_elt (FILE *file, int value)
17233 const char *directive = ASM_LONG;
17237 directive = ASM_QUAD;
17239 gcc_assert (!TARGET_64BIT);
17242 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17246 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17248 const char *directive = ASM_LONG;
17251 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17252 directive = ASM_QUAD;
17254 gcc_assert (!TARGET_64BIT);
17256 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17257 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17258 fprintf (file, "%s%s%d-%s%d\n",
17259 directive, LPREFIX, value, LPREFIX, rel);
17260 else if (HAVE_AS_GOTOFF_IN_DATA)
17261 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17263 else if (TARGET_MACHO)
17265 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17266 machopic_output_function_base_name (file);
17271 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17272 GOT_SYMBOL_NAME, LPREFIX, value);
17275 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17279 ix86_expand_clear (rtx dest)
17283 /* We play register width games, which are only valid after reload. */
17284 gcc_assert (reload_completed);
17286 /* Avoid HImode and its attendant prefix byte. */
17287 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17288 dest = gen_rtx_REG (SImode, REGNO (dest));
17289 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17291 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17293 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17294 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17300 /* X is an unchanging MEM. If it is a constant pool reference, return
17301 the constant pool rtx, else NULL. */
17304 maybe_get_pool_constant (rtx x)
17306 x = ix86_delegitimize_address (XEXP (x, 0));
17308 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17309 return get_pool_constant (x);
17315 ix86_expand_move (machine_mode mode, rtx operands[])
17318 enum tls_model model;
17323 if (GET_CODE (op1) == SYMBOL_REF)
17327 model = SYMBOL_REF_TLS_MODEL (op1);
17330 op1 = legitimize_tls_address (op1, model, true);
17331 op1 = force_operand (op1, op0);
17334 op1 = convert_to_mode (mode, op1, 1);
17336 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17339 else if (GET_CODE (op1) == CONST
17340 && GET_CODE (XEXP (op1, 0)) == PLUS
17341 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17343 rtx addend = XEXP (XEXP (op1, 0), 1);
17344 rtx symbol = XEXP (XEXP (op1, 0), 0);
17347 model = SYMBOL_REF_TLS_MODEL (symbol);
17349 tmp = legitimize_tls_address (symbol, model, true);
17351 tmp = legitimize_pe_coff_symbol (symbol, true);
17355 tmp = force_operand (tmp, NULL);
17356 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17357 op0, 1, OPTAB_DIRECT);
17360 op1 = convert_to_mode (mode, tmp, 1);
17364 if ((flag_pic || MACHOPIC_INDIRECT)
17365 && symbolic_operand (op1, mode))
17367 if (TARGET_MACHO && !TARGET_64BIT)
17370 /* dynamic-no-pic */
17371 if (MACHOPIC_INDIRECT)
17373 rtx temp = ((reload_in_progress
17374 || ((op0 && REG_P (op0))
17376 ? op0 : gen_reg_rtx (Pmode));
17377 op1 = machopic_indirect_data_reference (op1, temp);
17379 op1 = machopic_legitimize_pic_address (op1, mode,
17380 temp == op1 ? 0 : temp);
17382 if (op0 != op1 && GET_CODE (op0) != MEM)
17384 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17388 if (GET_CODE (op0) == MEM)
17389 op1 = force_reg (Pmode, op1);
17393 if (GET_CODE (temp) != REG)
17394 temp = gen_reg_rtx (Pmode);
17395 temp = legitimize_pic_address (op1, temp);
17400 /* dynamic-no-pic */
17406 op1 = force_reg (mode, op1);
17407 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17409 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17410 op1 = legitimize_pic_address (op1, reg);
17413 op1 = convert_to_mode (mode, op1, 1);
17420 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17421 || !push_operand (op0, mode))
17423 op1 = force_reg (mode, op1);
17425 if (push_operand (op0, mode)
17426 && ! general_no_elim_operand (op1, mode))
17427 op1 = copy_to_mode_reg (mode, op1);
17429 /* Force large constants in 64bit compilation into register
17430 to get them CSEed. */
17431 if (can_create_pseudo_p ()
17432 && (mode == DImode) && TARGET_64BIT
17433 && immediate_operand (op1, mode)
17434 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17435 && !register_operand (op0, mode)
17437 op1 = copy_to_mode_reg (mode, op1);
17439 if (can_create_pseudo_p ()
17440 && FLOAT_MODE_P (mode)
17441 && GET_CODE (op1) == CONST_DOUBLE)
17443 /* If we are loading a floating point constant to a register,
17444 force the value to memory now, since we'll get better code
17445 out the back end. */
17447 op1 = validize_mem (force_const_mem (mode, op1));
17448 if (!register_operand (op0, mode))
17450 rtx temp = gen_reg_rtx (mode);
17451 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17452 emit_move_insn (op0, temp);
17458 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17462 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17464 rtx op0 = operands[0], op1 = operands[1];
17465 unsigned int align = GET_MODE_ALIGNMENT (mode);
17467 if (push_operand (op0, VOIDmode))
17468 op0 = emit_move_resolve_push (mode, op0);
17470 /* Force constants other than zero into memory. We do not know how
17471 the instructions used to build constants modify the upper 64 bits
17472 of the register, once we have that information we may be able
17473 to handle some of them more efficiently. */
17474 if (can_create_pseudo_p ()
17475 && register_operand (op0, mode)
17476 && (CONSTANT_P (op1)
17477 || (GET_CODE (op1) == SUBREG
17478 && CONSTANT_P (SUBREG_REG (op1))))
17479 && !standard_sse_constant_p (op1))
17480 op1 = validize_mem (force_const_mem (mode, op1));
17482 /* We need to check memory alignment for SSE mode since attribute
17483 can make operands unaligned. */
17484 if (can_create_pseudo_p ()
17485 && SSE_REG_MODE_P (mode)
17486 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17487 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17491 /* ix86_expand_vector_move_misalign() does not like constants ... */
17492 if (CONSTANT_P (op1)
17493 || (GET_CODE (op1) == SUBREG
17494 && CONSTANT_P (SUBREG_REG (op1))))
17495 op1 = validize_mem (force_const_mem (mode, op1));
17497 /* ... nor both arguments in memory. */
17498 if (!register_operand (op0, mode)
17499 && !register_operand (op1, mode))
17500 op1 = force_reg (mode, op1);
17502 tmp[0] = op0; tmp[1] = op1;
17503 ix86_expand_vector_move_misalign (mode, tmp);
17507 /* Make operand1 a register if it isn't already. */
17508 if (can_create_pseudo_p ()
17509 && !register_operand (op0, mode)
17510 && !register_operand (op1, mode))
17512 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17516 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17519 /* Split 32-byte AVX unaligned load and store if needed. */
17522 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17525 rtx (*extract) (rtx, rtx, rtx);
17526 rtx (*load_unaligned) (rtx, rtx);
17527 rtx (*store_unaligned) (rtx, rtx);
17530 switch (GET_MODE (op0))
17533 gcc_unreachable ();
17535 extract = gen_avx_vextractf128v32qi;
17536 load_unaligned = gen_avx_loaddquv32qi;
17537 store_unaligned = gen_avx_storedquv32qi;
17541 extract = gen_avx_vextractf128v8sf;
17542 load_unaligned = gen_avx_loadups256;
17543 store_unaligned = gen_avx_storeups256;
17547 extract = gen_avx_vextractf128v4df;
17548 load_unaligned = gen_avx_loadupd256;
17549 store_unaligned = gen_avx_storeupd256;
17556 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17557 && optimize_insn_for_speed_p ())
17559 rtx r = gen_reg_rtx (mode);
17560 m = adjust_address (op1, mode, 0);
17561 emit_move_insn (r, m);
17562 m = adjust_address (op1, mode, 16);
17563 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17564 emit_move_insn (op0, r);
17566 /* Normal *mov<mode>_internal pattern will handle
17567 unaligned loads just fine if misaligned_operand
17568 is true, and without the UNSPEC it can be combined
17569 with arithmetic instructions. */
17570 else if (misaligned_operand (op1, GET_MODE (op1)))
17571 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17573 emit_insn (load_unaligned (op0, op1));
17575 else if (MEM_P (op0))
17577 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17578 && optimize_insn_for_speed_p ())
17580 m = adjust_address (op0, mode, 0);
17581 emit_insn (extract (m, op1, const0_rtx));
17582 m = adjust_address (op0, mode, 16);
17583 emit_insn (extract (m, op1, const1_rtx));
17586 emit_insn (store_unaligned (op0, op1));
17589 gcc_unreachable ();
17592 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17593 straight to ix86_expand_vector_move. */
17594 /* Code generation for scalar reg-reg moves of single and double precision data:
17595 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17599 if (x86_sse_partial_reg_dependency == true)
17604 Code generation for scalar loads of double precision data:
17605 if (x86_sse_split_regs == true)
17606 movlpd mem, reg (gas syntax)
17610 Code generation for unaligned packed loads of single precision data
17611 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17612 if (x86_sse_unaligned_move_optimal)
17615 if (x86_sse_partial_reg_dependency == true)
17627 Code generation for unaligned packed loads of double precision data
17628 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17629 if (x86_sse_unaligned_move_optimal)
17632 if (x86_sse_split_regs == true)
17645 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17647 rtx op0, op1, orig_op0 = NULL_RTX, m;
17648 rtx (*load_unaligned) (rtx, rtx);
17649 rtx (*store_unaligned) (rtx, rtx);
17654 if (GET_MODE_SIZE (mode) == 64)
17656 switch (GET_MODE_CLASS (mode))
17658 case MODE_VECTOR_INT:
17660 if (GET_MODE (op0) != V16SImode)
17665 op0 = gen_reg_rtx (V16SImode);
17668 op0 = gen_lowpart (V16SImode, op0);
17670 op1 = gen_lowpart (V16SImode, op1);
17673 case MODE_VECTOR_FLOAT:
17674 switch (GET_MODE (op0))
17677 gcc_unreachable ();
17679 load_unaligned = gen_avx512f_loaddquv16si;
17680 store_unaligned = gen_avx512f_storedquv16si;
17683 load_unaligned = gen_avx512f_loadups512;
17684 store_unaligned = gen_avx512f_storeups512;
17687 load_unaligned = gen_avx512f_loadupd512;
17688 store_unaligned = gen_avx512f_storeupd512;
17693 emit_insn (load_unaligned (op0, op1));
17694 else if (MEM_P (op0))
17695 emit_insn (store_unaligned (op0, op1));
17697 gcc_unreachable ();
17699 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17703 gcc_unreachable ();
17710 && GET_MODE_SIZE (mode) == 32)
17712 switch (GET_MODE_CLASS (mode))
17714 case MODE_VECTOR_INT:
17716 if (GET_MODE (op0) != V32QImode)
17721 op0 = gen_reg_rtx (V32QImode);
17724 op0 = gen_lowpart (V32QImode, op0);
17726 op1 = gen_lowpart (V32QImode, op1);
17729 case MODE_VECTOR_FLOAT:
17730 ix86_avx256_split_vector_move_misalign (op0, op1);
17732 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17736 gcc_unreachable ();
17744 /* Normal *mov<mode>_internal pattern will handle
17745 unaligned loads just fine if misaligned_operand
17746 is true, and without the UNSPEC it can be combined
17747 with arithmetic instructions. */
17749 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17750 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17751 && misaligned_operand (op1, GET_MODE (op1)))
17752 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17753 /* ??? If we have typed data, then it would appear that using
17754 movdqu is the only way to get unaligned data loaded with
17756 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17758 if (GET_MODE (op0) != V16QImode)
17761 op0 = gen_reg_rtx (V16QImode);
17763 op1 = gen_lowpart (V16QImode, op1);
17764 /* We will eventually emit movups based on insn attributes. */
17765 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17767 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17769 else if (TARGET_SSE2 && mode == V2DFmode)
17774 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17775 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17776 || optimize_insn_for_size_p ())
17778 /* We will eventually emit movups based on insn attributes. */
17779 emit_insn (gen_sse2_loadupd (op0, op1));
17783 /* When SSE registers are split into halves, we can avoid
17784 writing to the top half twice. */
17785 if (TARGET_SSE_SPLIT_REGS)
17787 emit_clobber (op0);
17792 /* ??? Not sure about the best option for the Intel chips.
17793 The following would seem to satisfy; the register is
17794 entirely cleared, breaking the dependency chain. We
17795 then store to the upper half, with a dependency depth
17796 of one. A rumor has it that Intel recommends two movsd
17797 followed by an unpacklpd, but this is unconfirmed. And
17798 given that the dependency depth of the unpacklpd would
17799 still be one, I'm not sure why this would be better. */
17800 zero = CONST0_RTX (V2DFmode);
17803 m = adjust_address (op1, DFmode, 0);
17804 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17805 m = adjust_address (op1, DFmode, 8);
17806 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17813 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17814 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17815 || optimize_insn_for_size_p ())
17817 if (GET_MODE (op0) != V4SFmode)
17820 op0 = gen_reg_rtx (V4SFmode);
17822 op1 = gen_lowpart (V4SFmode, op1);
17823 emit_insn (gen_sse_loadups (op0, op1));
17825 emit_move_insn (orig_op0,
17826 gen_lowpart (GET_MODE (orig_op0), op0));
17830 if (mode != V4SFmode)
17831 t = gen_reg_rtx (V4SFmode);
17835 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17836 emit_move_insn (t, CONST0_RTX (V4SFmode));
17840 m = adjust_address (op1, V2SFmode, 0);
17841 emit_insn (gen_sse_loadlps (t, t, m));
17842 m = adjust_address (op1, V2SFmode, 8);
17843 emit_insn (gen_sse_loadhps (t, t, m));
17844 if (mode != V4SFmode)
17845 emit_move_insn (op0, gen_lowpart (mode, t));
17848 else if (MEM_P (op0))
17850 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17852 op0 = gen_lowpart (V16QImode, op0);
17853 op1 = gen_lowpart (V16QImode, op1);
17854 /* We will eventually emit movups based on insn attributes. */
17855 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17857 else if (TARGET_SSE2 && mode == V2DFmode)
17860 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17861 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17862 || optimize_insn_for_size_p ())
17863 /* We will eventually emit movups based on insn attributes. */
17864 emit_insn (gen_sse2_storeupd (op0, op1));
17867 m = adjust_address (op0, DFmode, 0);
17868 emit_insn (gen_sse2_storelpd (m, op1));
17869 m = adjust_address (op0, DFmode, 8);
17870 emit_insn (gen_sse2_storehpd (m, op1));
17875 if (mode != V4SFmode)
17876 op1 = gen_lowpart (V4SFmode, op1);
17879 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17880 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17881 || optimize_insn_for_size_p ())
17883 op0 = gen_lowpart (V4SFmode, op0);
17884 emit_insn (gen_sse_storeups (op0, op1));
17888 m = adjust_address (op0, V2SFmode, 0);
17889 emit_insn (gen_sse_storelps (m, op1));
17890 m = adjust_address (op0, V2SFmode, 8);
17891 emit_insn (gen_sse_storehps (m, op1));
17896 gcc_unreachable ();
17899 /* Helper function of ix86_fixup_binary_operands to canonicalize
17900 operand order. Returns true if the operands should be swapped. */
17903 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17906 rtx dst = operands[0];
17907 rtx src1 = operands[1];
17908 rtx src2 = operands[2];
17910 /* If the operation is not commutative, we can't do anything. */
17911 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17914 /* Highest priority is that src1 should match dst. */
17915 if (rtx_equal_p (dst, src1))
17917 if (rtx_equal_p (dst, src2))
17920 /* Next highest priority is that immediate constants come second. */
17921 if (immediate_operand (src2, mode))
17923 if (immediate_operand (src1, mode))
17926 /* Lowest priority is that memory references should come second. */
17936 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17937 destination to use for the operation. If different from the true
17938 destination in operands[0], a copy operation will be required. */
17941 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17944 rtx dst = operands[0];
17945 rtx src1 = operands[1];
17946 rtx src2 = operands[2];
17948 /* Canonicalize operand order. */
17949 if (ix86_swap_binary_operands_p (code, mode, operands))
17951 /* It is invalid to swap operands of different modes. */
17952 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17954 std::swap (src1, src2);
17957 /* Both source operands cannot be in memory. */
17958 if (MEM_P (src1) && MEM_P (src2))
17960 /* Optimization: Only read from memory once. */
17961 if (rtx_equal_p (src1, src2))
17963 src2 = force_reg (mode, src2);
17966 else if (rtx_equal_p (dst, src1))
17967 src2 = force_reg (mode, src2);
17969 src1 = force_reg (mode, src1);
17972 /* If the destination is memory, and we do not have matching source
17973 operands, do things in registers. */
17974 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17975 dst = gen_reg_rtx (mode);
17977 /* Source 1 cannot be a constant. */
17978 if (CONSTANT_P (src1))
17979 src1 = force_reg (mode, src1);
17981 /* Source 1 cannot be a non-matching memory. */
17982 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17983 src1 = force_reg (mode, src1);
17985 /* Improve address combine. */
17987 && GET_MODE_CLASS (mode) == MODE_INT
17989 src2 = force_reg (mode, src2);
17991 operands[1] = src1;
17992 operands[2] = src2;
17996 /* Similarly, but assume that the destination has already been
17997 set up properly. */
18000 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18001 machine_mode mode, rtx operands[])
18003 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18004 gcc_assert (dst == operands[0]);
18007 /* Attempt to expand a binary operator. Make the expansion closer to the
18008 actual machine, then just general_operand, which will allow 3 separate
18009 memory references (one output, two input) in a single insn. */
18012 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18015 rtx src1, src2, dst, op, clob;
18017 dst = ix86_fixup_binary_operands (code, mode, operands);
18018 src1 = operands[1];
18019 src2 = operands[2];
18021 /* Emit the instruction. */
18023 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18024 if (reload_in_progress)
18026 /* Reload doesn't know about the flags register, and doesn't know that
18027 it doesn't want to clobber it. We can only do this with PLUS. */
18028 gcc_assert (code == PLUS);
18031 else if (reload_completed
18033 && !rtx_equal_p (dst, src1))
18035 /* This is going to be an LEA; avoid splitting it later. */
18040 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18041 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18044 /* Fix up the destination if needed. */
18045 if (dst != operands[0])
18046 emit_move_insn (operands[0], dst);
18049 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18050 the given OPERANDS. */
18053 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18056 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18057 if (GET_CODE (operands[1]) == SUBREG)
18062 else if (GET_CODE (operands[2]) == SUBREG)
18067 /* Optimize (__m128i) d | (__m128i) e and similar code
18068 when d and e are float vectors into float vector logical
18069 insn. In C/C++ without using intrinsics there is no other way
18070 to express vector logical operation on float vectors than
18071 to cast them temporarily to integer vectors. */
18073 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18074 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18075 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18076 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18077 && SUBREG_BYTE (op1) == 0
18078 && (GET_CODE (op2) == CONST_VECTOR
18079 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18080 && SUBREG_BYTE (op2) == 0))
18081 && can_create_pseudo_p ())
18084 switch (GET_MODE (SUBREG_REG (op1)))
18092 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18093 if (GET_CODE (op2) == CONST_VECTOR)
18095 op2 = gen_lowpart (GET_MODE (dst), op2);
18096 op2 = force_reg (GET_MODE (dst), op2);
18101 op2 = SUBREG_REG (operands[2]);
18102 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18103 op2 = force_reg (GET_MODE (dst), op2);
18105 op1 = SUBREG_REG (op1);
18106 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18107 op1 = force_reg (GET_MODE (dst), op1);
18108 emit_insn (gen_rtx_SET (VOIDmode, dst,
18109 gen_rtx_fmt_ee (code, GET_MODE (dst),
18111 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18117 if (!nonimmediate_operand (operands[1], mode))
18118 operands[1] = force_reg (mode, operands[1]);
18119 if (!nonimmediate_operand (operands[2], mode))
18120 operands[2] = force_reg (mode, operands[2]);
18121 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18122 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18123 gen_rtx_fmt_ee (code, mode, operands[1],
18127 /* Return TRUE or FALSE depending on whether the binary operator meets the
18128 appropriate constraints. */
18131 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18134 rtx dst = operands[0];
18135 rtx src1 = operands[1];
18136 rtx src2 = operands[2];
18138 /* Both source operands cannot be in memory. */
18139 if (MEM_P (src1) && MEM_P (src2))
18142 /* Canonicalize operand order for commutative operators. */
18143 if (ix86_swap_binary_operands_p (code, mode, operands))
18144 std::swap (src1, src2);
18146 /* If the destination is memory, we must have a matching source operand. */
18147 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18150 /* Source 1 cannot be a constant. */
18151 if (CONSTANT_P (src1))
18154 /* Source 1 cannot be a non-matching memory. */
18155 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18156 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18157 return (code == AND
18160 || (TARGET_64BIT && mode == DImode))
18161 && satisfies_constraint_L (src2));
18166 /* Attempt to expand a unary operator. Make the expansion closer to the
18167 actual machine, then just general_operand, which will allow 2 separate
18168 memory references (one output, one input) in a single insn. */
18171 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18174 bool matching_memory = false;
18175 rtx src, dst, op, clob;
18180 /* If the destination is memory, and we do not have matching source
18181 operands, do things in registers. */
18184 if (rtx_equal_p (dst, src))
18185 matching_memory = true;
18187 dst = gen_reg_rtx (mode);
18190 /* When source operand is memory, destination must match. */
18191 if (MEM_P (src) && !matching_memory)
18192 src = force_reg (mode, src);
18194 /* Emit the instruction. */
18196 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18197 if (reload_in_progress || code == NOT)
18199 /* Reload doesn't know about the flags register, and doesn't know that
18200 it doesn't want to clobber it. */
18201 gcc_assert (code == NOT);
18206 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18207 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18210 /* Fix up the destination if needed. */
18211 if (dst != operands[0])
18212 emit_move_insn (operands[0], dst);
18215 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18216 divisor are within the range [0-255]. */
18219 ix86_split_idivmod (machine_mode mode, rtx operands[],
18222 rtx_code_label *end_label, *qimode_label;
18223 rtx insn, div, mod;
18224 rtx scratch, tmp0, tmp1, tmp2;
18225 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18226 rtx (*gen_zero_extend) (rtx, rtx);
18227 rtx (*gen_test_ccno_1) (rtx, rtx);
18232 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18233 gen_test_ccno_1 = gen_testsi_ccno_1;
18234 gen_zero_extend = gen_zero_extendqisi2;
18237 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18238 gen_test_ccno_1 = gen_testdi_ccno_1;
18239 gen_zero_extend = gen_zero_extendqidi2;
18242 gcc_unreachable ();
18245 end_label = gen_label_rtx ();
18246 qimode_label = gen_label_rtx ();
18248 scratch = gen_reg_rtx (mode);
18250 /* Use 8bit unsigned divimod if dividend and divisor are within
18251 the range [0-255]. */
18252 emit_move_insn (scratch, operands[2]);
18253 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18254 scratch, 1, OPTAB_DIRECT);
18255 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18256 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18257 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18258 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18259 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18261 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18262 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18263 JUMP_LABEL (insn) = qimode_label;
18265 /* Generate original signed/unsigned divimod. */
18266 div = gen_divmod4_1 (operands[0], operands[1],
18267 operands[2], operands[3]);
18270 /* Branch to the end. */
18271 emit_jump_insn (gen_jump (end_label));
18274 /* Generate 8bit unsigned divide. */
18275 emit_label (qimode_label);
18276 /* Don't use operands[0] for result of 8bit divide since not all
18277 registers support QImode ZERO_EXTRACT. */
18278 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18279 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18280 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18281 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18285 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18286 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18290 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18291 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18294 /* Extract remainder from AH. */
18295 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18296 if (REG_P (operands[1]))
18297 insn = emit_move_insn (operands[1], tmp1);
18300 /* Need a new scratch register since the old one has result
18302 scratch = gen_reg_rtx (mode);
18303 emit_move_insn (scratch, tmp1);
18304 insn = emit_move_insn (operands[1], scratch);
18306 set_unique_reg_note (insn, REG_EQUAL, mod);
18308 /* Zero extend quotient from AL. */
18309 tmp1 = gen_lowpart (QImode, tmp0);
18310 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18311 set_unique_reg_note (insn, REG_EQUAL, div);
18313 emit_label (end_label);
18316 #define LEA_MAX_STALL (3)
18317 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18319 /* Increase given DISTANCE in half-cycles according to
18320 dependencies between PREV and NEXT instructions.
18321 Add 1 half-cycle if there is no dependency and
18322 go to next cycle if there is some dependecy. */
18324 static unsigned int
18325 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18329 if (!prev || !next)
18330 return distance + (distance & 1) + 2;
18332 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18333 return distance + 1;
18335 FOR_EACH_INSN_USE (use, next)
18336 FOR_EACH_INSN_DEF (def, prev)
18337 if (!DF_REF_IS_ARTIFICIAL (def)
18338 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18339 return distance + (distance & 1) + 2;
18341 return distance + 1;
18344 /* Function checks if instruction INSN defines register number
18345 REGNO1 or REGNO2. */
18348 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18353 FOR_EACH_INSN_DEF (def, insn)
18354 if (DF_REF_REG_DEF_P (def)
18355 && !DF_REF_IS_ARTIFICIAL (def)
18356 && (regno1 == DF_REF_REGNO (def)
18357 || regno2 == DF_REF_REGNO (def)))
18363 /* Function checks if instruction INSN uses register number
18364 REGNO as a part of address expression. */
18367 insn_uses_reg_mem (unsigned int regno, rtx insn)
18371 FOR_EACH_INSN_USE (use, insn)
18372 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18378 /* Search backward for non-agu definition of register number REGNO1
18379 or register number REGNO2 in basic block starting from instruction
18380 START up to head of basic block or instruction INSN.
18382 Function puts true value into *FOUND var if definition was found
18383 and false otherwise.
18385 Distance in half-cycles between START and found instruction or head
18386 of BB is added to DISTANCE and returned. */
18389 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18390 rtx_insn *insn, int distance,
18391 rtx_insn *start, bool *found)
18393 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18394 rtx_insn *prev = start;
18395 rtx_insn *next = NULL;
18401 && distance < LEA_SEARCH_THRESHOLD)
18403 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18405 distance = increase_distance (prev, next, distance);
18406 if (insn_defines_reg (regno1, regno2, prev))
18408 if (recog_memoized (prev) < 0
18409 || get_attr_type (prev) != TYPE_LEA)
18418 if (prev == BB_HEAD (bb))
18421 prev = PREV_INSN (prev);
18427 /* Search backward for non-agu definition of register number REGNO1
18428 or register number REGNO2 in INSN's basic block until
18429 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18430 2. Reach neighbour BBs boundary, or
18431 3. Reach agu definition.
18432 Returns the distance between the non-agu definition point and INSN.
18433 If no definition point, returns -1. */
18436 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18439 basic_block bb = BLOCK_FOR_INSN (insn);
18441 bool found = false;
18443 if (insn != BB_HEAD (bb))
18444 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18445 distance, PREV_INSN (insn),
18448 if (!found && distance < LEA_SEARCH_THRESHOLD)
18452 bool simple_loop = false;
18454 FOR_EACH_EDGE (e, ei, bb->preds)
18457 simple_loop = true;
18462 distance = distance_non_agu_define_in_bb (regno1, regno2,
18464 BB_END (bb), &found);
18467 int shortest_dist = -1;
18468 bool found_in_bb = false;
18470 FOR_EACH_EDGE (e, ei, bb->preds)
18473 = distance_non_agu_define_in_bb (regno1, regno2,
18479 if (shortest_dist < 0)
18480 shortest_dist = bb_dist;
18481 else if (bb_dist > 0)
18482 shortest_dist = MIN (bb_dist, shortest_dist);
18488 distance = shortest_dist;
18492 /* get_attr_type may modify recog data. We want to make sure
18493 that recog data is valid for instruction INSN, on which
18494 distance_non_agu_define is called. INSN is unchanged here. */
18495 extract_insn_cached (insn);
18500 return distance >> 1;
18503 /* Return the distance in half-cycles between INSN and the next
18504 insn that uses register number REGNO in memory address added
18505 to DISTANCE. Return -1 if REGNO0 is set.
18507 Put true value into *FOUND if register usage was found and
18509 Put true value into *REDEFINED if register redefinition was
18510 found and false otherwise. */
18513 distance_agu_use_in_bb (unsigned int regno,
18514 rtx_insn *insn, int distance, rtx_insn *start,
18515 bool *found, bool *redefined)
18517 basic_block bb = NULL;
18518 rtx_insn *next = start;
18519 rtx_insn *prev = NULL;
18522 *redefined = false;
18524 if (start != NULL_RTX)
18526 bb = BLOCK_FOR_INSN (start);
18527 if (start != BB_HEAD (bb))
18528 /* If insn and start belong to the same bb, set prev to insn,
18529 so the call to increase_distance will increase the distance
18530 between insns by 1. */
18536 && distance < LEA_SEARCH_THRESHOLD)
18538 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18540 distance = increase_distance(prev, next, distance);
18541 if (insn_uses_reg_mem (regno, next))
18543 /* Return DISTANCE if OP0 is used in memory
18544 address in NEXT. */
18549 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18551 /* Return -1 if OP0 is set in NEXT. */
18559 if (next == BB_END (bb))
18562 next = NEXT_INSN (next);
18568 /* Return the distance between INSN and the next insn that uses
18569 register number REGNO0 in memory address. Return -1 if no such
18570 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18573 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18575 basic_block bb = BLOCK_FOR_INSN (insn);
18577 bool found = false;
18578 bool redefined = false;
18580 if (insn != BB_END (bb))
18581 distance = distance_agu_use_in_bb (regno0, insn, distance,
18583 &found, &redefined);
18585 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18589 bool simple_loop = false;
18591 FOR_EACH_EDGE (e, ei, bb->succs)
18594 simple_loop = true;
18599 distance = distance_agu_use_in_bb (regno0, insn,
18600 distance, BB_HEAD (bb),
18601 &found, &redefined);
18604 int shortest_dist = -1;
18605 bool found_in_bb = false;
18606 bool redefined_in_bb = false;
18608 FOR_EACH_EDGE (e, ei, bb->succs)
18611 = distance_agu_use_in_bb (regno0, insn,
18612 distance, BB_HEAD (e->dest),
18613 &found_in_bb, &redefined_in_bb);
18616 if (shortest_dist < 0)
18617 shortest_dist = bb_dist;
18618 else if (bb_dist > 0)
18619 shortest_dist = MIN (bb_dist, shortest_dist);
18625 distance = shortest_dist;
18629 if (!found || redefined)
18632 return distance >> 1;
18635 /* Define this macro to tune LEA priority vs ADD, it take effect when
18636 there is a dilemma of choicing LEA or ADD
18637 Negative value: ADD is more preferred than LEA
18639 Positive value: LEA is more preferred than ADD*/
18640 #define IX86_LEA_PRIORITY 0
18642 /* Return true if usage of lea INSN has performance advantage
18643 over a sequence of instructions. Instructions sequence has
18644 SPLIT_COST cycles higher latency than lea latency. */
18647 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18648 unsigned int regno2, int split_cost, bool has_scale)
18650 int dist_define, dist_use;
18652 /* For Silvermont if using a 2-source or 3-source LEA for
18653 non-destructive destination purposes, or due to wanting
18654 ability to use SCALE, the use of LEA is justified. */
18655 if (TARGET_SILVERMONT || TARGET_INTEL)
18659 if (split_cost < 1)
18661 if (regno0 == regno1 || regno0 == regno2)
18666 dist_define = distance_non_agu_define (regno1, regno2, insn);
18667 dist_use = distance_agu_use (regno0, insn);
18669 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18671 /* If there is no non AGU operand definition, no AGU
18672 operand usage and split cost is 0 then both lea
18673 and non lea variants have same priority. Currently
18674 we prefer lea for 64 bit code and non lea on 32 bit
18676 if (dist_use < 0 && split_cost == 0)
18677 return TARGET_64BIT || IX86_LEA_PRIORITY;
18682 /* With longer definitions distance lea is more preferable.
18683 Here we change it to take into account splitting cost and
18685 dist_define += split_cost + IX86_LEA_PRIORITY;
18687 /* If there is no use in memory addess then we just check
18688 that split cost exceeds AGU stall. */
18690 return dist_define > LEA_MAX_STALL;
18692 /* If this insn has both backward non-agu dependence and forward
18693 agu dependence, the one with short distance takes effect. */
18694 return dist_define >= dist_use;
18697 /* Return true if it is legal to clobber flags by INSN and
18698 false otherwise. */
18701 ix86_ok_to_clobber_flags (rtx_insn *insn)
18703 basic_block bb = BLOCK_FOR_INSN (insn);
18709 if (NONDEBUG_INSN_P (insn))
18711 FOR_EACH_INSN_USE (use, insn)
18712 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18715 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18719 if (insn == BB_END (bb))
18722 insn = NEXT_INSN (insn);
18725 live = df_get_live_out(bb);
18726 return !REGNO_REG_SET_P (live, FLAGS_REG);
18729 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18730 move and add to avoid AGU stalls. */
18733 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18735 unsigned int regno0, regno1, regno2;
18737 /* Check if we need to optimize. */
18738 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18741 /* Check it is correct to split here. */
18742 if (!ix86_ok_to_clobber_flags(insn))
18745 regno0 = true_regnum (operands[0]);
18746 regno1 = true_regnum (operands[1]);
18747 regno2 = true_regnum (operands[2]);
18749 /* We need to split only adds with non destructive
18750 destination operand. */
18751 if (regno0 == regno1 || regno0 == regno2)
18754 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18757 /* Return true if we should emit lea instruction instead of mov
18761 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18763 unsigned int regno0, regno1;
18765 /* Check if we need to optimize. */
18766 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18769 /* Use lea for reg to reg moves only. */
18770 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18773 regno0 = true_regnum (operands[0]);
18774 regno1 = true_regnum (operands[1]);
18776 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18779 /* Return true if we need to split lea into a sequence of
18780 instructions to avoid AGU stalls. */
18783 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18785 unsigned int regno0, regno1, regno2;
18787 struct ix86_address parts;
18790 /* Check we need to optimize. */
18791 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18794 /* The "at least two components" test below might not catch simple
18795 move or zero extension insns if parts.base is non-NULL and parts.disp
18796 is const0_rtx as the only components in the address, e.g. if the
18797 register is %rbp or %r13. As this test is much cheaper and moves or
18798 zero extensions are the common case, do this check first. */
18799 if (REG_P (operands[1])
18800 || (SImode_address_operand (operands[1], VOIDmode)
18801 && REG_P (XEXP (operands[1], 0))))
18804 /* Check if it is OK to split here. */
18805 if (!ix86_ok_to_clobber_flags (insn))
18808 ok = ix86_decompose_address (operands[1], &parts);
18811 /* There should be at least two components in the address. */
18812 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18813 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18816 /* We should not split into add if non legitimate pic
18817 operand is used as displacement. */
18818 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18821 regno0 = true_regnum (operands[0]) ;
18822 regno1 = INVALID_REGNUM;
18823 regno2 = INVALID_REGNUM;
18826 regno1 = true_regnum (parts.base);
18828 regno2 = true_regnum (parts.index);
18832 /* Compute how many cycles we will add to execution time
18833 if split lea into a sequence of instructions. */
18834 if (parts.base || parts.index)
18836 /* Have to use mov instruction if non desctructive
18837 destination form is used. */
18838 if (regno1 != regno0 && regno2 != regno0)
18841 /* Have to add index to base if both exist. */
18842 if (parts.base && parts.index)
18845 /* Have to use shift and adds if scale is 2 or greater. */
18846 if (parts.scale > 1)
18848 if (regno0 != regno1)
18850 else if (regno2 == regno0)
18853 split_cost += parts.scale;
18856 /* Have to use add instruction with immediate if
18857 disp is non zero. */
18858 if (parts.disp && parts.disp != const0_rtx)
18861 /* Subtract the price of lea. */
18865 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18869 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18870 matches destination. RTX includes clobber of FLAGS_REG. */
18873 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18878 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18879 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18881 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18884 /* Return true if regno1 def is nearest to the insn. */
18887 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18889 rtx_insn *prev = insn;
18890 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18894 while (prev && prev != start)
18896 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18898 prev = PREV_INSN (prev);
18901 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18903 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18905 prev = PREV_INSN (prev);
18908 /* None of the regs is defined in the bb. */
18912 /* Split lea instructions into a sequence of instructions
18913 which are executed on ALU to avoid AGU stalls.
18914 It is assumed that it is allowed to clobber flags register
18915 at lea position. */
18918 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18920 unsigned int regno0, regno1, regno2;
18921 struct ix86_address parts;
18925 ok = ix86_decompose_address (operands[1], &parts);
18928 target = gen_lowpart (mode, operands[0]);
18930 regno0 = true_regnum (target);
18931 regno1 = INVALID_REGNUM;
18932 regno2 = INVALID_REGNUM;
18936 parts.base = gen_lowpart (mode, parts.base);
18937 regno1 = true_regnum (parts.base);
18942 parts.index = gen_lowpart (mode, parts.index);
18943 regno2 = true_regnum (parts.index);
18947 parts.disp = gen_lowpart (mode, parts.disp);
18949 if (parts.scale > 1)
18951 /* Case r1 = r1 + ... */
18952 if (regno1 == regno0)
18954 /* If we have a case r1 = r1 + C * r2 then we
18955 should use multiplication which is very
18956 expensive. Assume cost model is wrong if we
18957 have such case here. */
18958 gcc_assert (regno2 != regno0);
18960 for (adds = parts.scale; adds > 0; adds--)
18961 ix86_emit_binop (PLUS, mode, target, parts.index);
18965 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18966 if (regno0 != regno2)
18967 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18969 /* Use shift for scaling. */
18970 ix86_emit_binop (ASHIFT, mode, target,
18971 GEN_INT (exact_log2 (parts.scale)));
18974 ix86_emit_binop (PLUS, mode, target, parts.base);
18976 if (parts.disp && parts.disp != const0_rtx)
18977 ix86_emit_binop (PLUS, mode, target, parts.disp);
18980 else if (!parts.base && !parts.index)
18982 gcc_assert(parts.disp);
18983 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18989 if (regno0 != regno2)
18990 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18992 else if (!parts.index)
18994 if (regno0 != regno1)
18995 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18999 if (regno0 == regno1)
19001 else if (regno0 == regno2)
19007 /* Find better operand for SET instruction, depending
19008 on which definition is farther from the insn. */
19009 if (find_nearest_reg_def (insn, regno1, regno2))
19010 tmp = parts.index, tmp1 = parts.base;
19012 tmp = parts.base, tmp1 = parts.index;
19014 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19016 if (parts.disp && parts.disp != const0_rtx)
19017 ix86_emit_binop (PLUS, mode, target, parts.disp);
19019 ix86_emit_binop (PLUS, mode, target, tmp1);
19023 ix86_emit_binop (PLUS, mode, target, tmp);
19026 if (parts.disp && parts.disp != const0_rtx)
19027 ix86_emit_binop (PLUS, mode, target, parts.disp);
19031 /* Return true if it is ok to optimize an ADD operation to LEA
19032 operation to avoid flag register consumation. For most processors,
19033 ADD is faster than LEA. For the processors like BONNELL, if the
19034 destination register of LEA holds an actual address which will be
19035 used soon, LEA is better and otherwise ADD is better. */
19038 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19040 unsigned int regno0 = true_regnum (operands[0]);
19041 unsigned int regno1 = true_regnum (operands[1]);
19042 unsigned int regno2 = true_regnum (operands[2]);
19044 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19045 if (regno0 != regno1 && regno0 != regno2)
19048 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19051 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19054 /* Return true if destination reg of SET_BODY is shift count of
19058 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19064 /* Retrieve destination of SET_BODY. */
19065 switch (GET_CODE (set_body))
19068 set_dest = SET_DEST (set_body);
19069 if (!set_dest || !REG_P (set_dest))
19073 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19074 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19082 /* Retrieve shift count of USE_BODY. */
19083 switch (GET_CODE (use_body))
19086 shift_rtx = XEXP (use_body, 1);
19089 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19090 if (ix86_dep_by_shift_count_body (set_body,
19091 XVECEXP (use_body, 0, i)))
19099 && (GET_CODE (shift_rtx) == ASHIFT
19100 || GET_CODE (shift_rtx) == LSHIFTRT
19101 || GET_CODE (shift_rtx) == ASHIFTRT
19102 || GET_CODE (shift_rtx) == ROTATE
19103 || GET_CODE (shift_rtx) == ROTATERT))
19105 rtx shift_count = XEXP (shift_rtx, 1);
19107 /* Return true if shift count is dest of SET_BODY. */
19108 if (REG_P (shift_count))
19110 /* Add check since it can be invoked before register
19111 allocation in pre-reload schedule. */
19112 if (reload_completed
19113 && true_regnum (set_dest) == true_regnum (shift_count))
19115 else if (REGNO(set_dest) == REGNO(shift_count))
19123 /* Return true if destination reg of SET_INSN is shift count of
19127 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19129 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19130 PATTERN (use_insn));
19133 /* Return TRUE or FALSE depending on whether the unary operator meets the
19134 appropriate constraints. */
19137 ix86_unary_operator_ok (enum rtx_code,
19141 /* If one of operands is memory, source and destination must match. */
19142 if ((MEM_P (operands[0])
19143 || MEM_P (operands[1]))
19144 && ! rtx_equal_p (operands[0], operands[1]))
19149 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19150 are ok, keeping in mind the possible movddup alternative. */
19153 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19155 if (MEM_P (operands[0]))
19156 return rtx_equal_p (operands[0], operands[1 + high]);
19157 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19158 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19162 /* Post-reload splitter for converting an SF or DFmode value in an
19163 SSE register into an unsigned SImode. */
19166 ix86_split_convert_uns_si_sse (rtx operands[])
19168 machine_mode vecmode;
19169 rtx value, large, zero_or_two31, input, two31, x;
19171 large = operands[1];
19172 zero_or_two31 = operands[2];
19173 input = operands[3];
19174 two31 = operands[4];
19175 vecmode = GET_MODE (large);
19176 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19178 /* Load up the value into the low element. We must ensure that the other
19179 elements are valid floats -- zero is the easiest such value. */
19182 if (vecmode == V4SFmode)
19183 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19185 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19189 input = gen_rtx_REG (vecmode, REGNO (input));
19190 emit_move_insn (value, CONST0_RTX (vecmode));
19191 if (vecmode == V4SFmode)
19192 emit_insn (gen_sse_movss (value, value, input));
19194 emit_insn (gen_sse2_movsd (value, value, input));
19197 emit_move_insn (large, two31);
19198 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19200 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19201 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19203 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19204 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19206 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19207 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19209 large = gen_rtx_REG (V4SImode, REGNO (large));
19210 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19212 x = gen_rtx_REG (V4SImode, REGNO (value));
19213 if (vecmode == V4SFmode)
19214 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19216 emit_insn (gen_sse2_cvttpd2dq (x, value));
19219 emit_insn (gen_xorv4si3 (value, value, large));
19222 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19223 Expects the 64-bit DImode to be supplied in a pair of integral
19224 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19225 -mfpmath=sse, !optimize_size only. */
19228 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19230 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19231 rtx int_xmm, fp_xmm;
19232 rtx biases, exponents;
19235 int_xmm = gen_reg_rtx (V4SImode);
19236 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19237 emit_insn (gen_movdi_to_sse (int_xmm, input));
19238 else if (TARGET_SSE_SPLIT_REGS)
19240 emit_clobber (int_xmm);
19241 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19245 x = gen_reg_rtx (V2DImode);
19246 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19247 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19250 x = gen_rtx_CONST_VECTOR (V4SImode,
19251 gen_rtvec (4, GEN_INT (0x43300000UL),
19252 GEN_INT (0x45300000UL),
19253 const0_rtx, const0_rtx));
19254 exponents = validize_mem (force_const_mem (V4SImode, x));
19256 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19257 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19259 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19260 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19261 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19262 (0x1.0p84 + double(fp_value_hi_xmm)).
19263 Note these exponents differ by 32. */
19265 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19267 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19268 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19269 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19270 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19271 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19272 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19273 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19274 biases = validize_mem (force_const_mem (V2DFmode, biases));
19275 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19277 /* Add the upper and lower DFmode values together. */
19279 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19282 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19283 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19284 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19287 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19290 /* Not used, but eases macroization of patterns. */
19292 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19294 gcc_unreachable ();
19297 /* Convert an unsigned SImode value into a DFmode. Only currently used
19298 for SSE, but applicable anywhere. */
19301 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19303 REAL_VALUE_TYPE TWO31r;
19306 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19307 NULL, 1, OPTAB_DIRECT);
19309 fp = gen_reg_rtx (DFmode);
19310 emit_insn (gen_floatsidf2 (fp, x));
19312 real_ldexp (&TWO31r, &dconst1, 31);
19313 x = const_double_from_real_value (TWO31r, DFmode);
19315 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19317 emit_move_insn (target, x);
19320 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19321 32-bit mode; otherwise we have a direct convert instruction. */
19324 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19326 REAL_VALUE_TYPE TWO32r;
19327 rtx fp_lo, fp_hi, x;
19329 fp_lo = gen_reg_rtx (DFmode);
19330 fp_hi = gen_reg_rtx (DFmode);
19332 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19334 real_ldexp (&TWO32r, &dconst1, 32);
19335 x = const_double_from_real_value (TWO32r, DFmode);
19336 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19338 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19340 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19343 emit_move_insn (target, x);
19346 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19347 For x86_32, -mfpmath=sse, !optimize_size only. */
19349 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19351 REAL_VALUE_TYPE ONE16r;
19352 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19354 real_ldexp (&ONE16r, &dconst1, 16);
19355 x = const_double_from_real_value (ONE16r, SFmode);
19356 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19357 NULL, 0, OPTAB_DIRECT);
19358 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19359 NULL, 0, OPTAB_DIRECT);
19360 fp_hi = gen_reg_rtx (SFmode);
19361 fp_lo = gen_reg_rtx (SFmode);
19362 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19363 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19364 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19366 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19368 if (!rtx_equal_p (target, fp_hi))
19369 emit_move_insn (target, fp_hi);
19372 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19373 a vector of unsigned ints VAL to vector of floats TARGET. */
19376 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19379 REAL_VALUE_TYPE TWO16r;
19380 machine_mode intmode = GET_MODE (val);
19381 machine_mode fltmode = GET_MODE (target);
19382 rtx (*cvt) (rtx, rtx);
19384 if (intmode == V4SImode)
19385 cvt = gen_floatv4siv4sf2;
19387 cvt = gen_floatv8siv8sf2;
19388 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19389 tmp[0] = force_reg (intmode, tmp[0]);
19390 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19392 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19393 NULL_RTX, 1, OPTAB_DIRECT);
19394 tmp[3] = gen_reg_rtx (fltmode);
19395 emit_insn (cvt (tmp[3], tmp[1]));
19396 tmp[4] = gen_reg_rtx (fltmode);
19397 emit_insn (cvt (tmp[4], tmp[2]));
19398 real_ldexp (&TWO16r, &dconst1, 16);
19399 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19400 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19401 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19403 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19405 if (tmp[7] != target)
19406 emit_move_insn (target, tmp[7]);
19409 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19410 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19411 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19412 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19415 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19417 REAL_VALUE_TYPE TWO31r;
19418 rtx two31r, tmp[4];
19419 machine_mode mode = GET_MODE (val);
19420 machine_mode scalarmode = GET_MODE_INNER (mode);
19421 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19422 rtx (*cmp) (rtx, rtx, rtx, rtx);
19425 for (i = 0; i < 3; i++)
19426 tmp[i] = gen_reg_rtx (mode);
19427 real_ldexp (&TWO31r, &dconst1, 31);
19428 two31r = const_double_from_real_value (TWO31r, scalarmode);
19429 two31r = ix86_build_const_vector (mode, 1, two31r);
19430 two31r = force_reg (mode, two31r);
19433 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19434 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19435 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19436 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19437 default: gcc_unreachable ();
19439 tmp[3] = gen_rtx_LE (mode, two31r, val);
19440 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19441 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19443 if (intmode == V4SImode || TARGET_AVX2)
19444 *xorp = expand_simple_binop (intmode, ASHIFT,
19445 gen_lowpart (intmode, tmp[0]),
19446 GEN_INT (31), NULL_RTX, 0,
19450 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19451 two31 = ix86_build_const_vector (intmode, 1, two31);
19452 *xorp = expand_simple_binop (intmode, AND,
19453 gen_lowpart (intmode, tmp[0]),
19454 two31, NULL_RTX, 0,
19457 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19461 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19462 then replicate the value for all elements of the vector
19466 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19470 machine_mode scalar_mode;
19493 n_elt = GET_MODE_NUNITS (mode);
19494 v = rtvec_alloc (n_elt);
19495 scalar_mode = GET_MODE_INNER (mode);
19497 RTVEC_ELT (v, 0) = value;
19499 for (i = 1; i < n_elt; ++i)
19500 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19502 return gen_rtx_CONST_VECTOR (mode, v);
19505 gcc_unreachable ();
19509 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19510 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19511 for an SSE register. If VECT is true, then replicate the mask for
19512 all elements of the vector register. If INVERT is true, then create
19513 a mask excluding the sign bit. */
19516 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19518 machine_mode vec_mode, imode;
19519 HOST_WIDE_INT hi, lo;
19524 /* Find the sign bit, sign extended to 2*HWI. */
19534 mode = GET_MODE_INNER (mode);
19536 lo = 0x80000000, hi = lo < 0;
19546 mode = GET_MODE_INNER (mode);
19548 if (HOST_BITS_PER_WIDE_INT >= 64)
19549 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19551 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19556 vec_mode = VOIDmode;
19557 if (HOST_BITS_PER_WIDE_INT >= 64)
19560 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19567 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19571 lo = ~lo, hi = ~hi;
19577 mask = immed_double_const (lo, hi, imode);
19579 vec = gen_rtvec (2, v, mask);
19580 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19581 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19588 gcc_unreachable ();
19592 lo = ~lo, hi = ~hi;
19594 /* Force this value into the low part of a fp vector constant. */
19595 mask = immed_double_const (lo, hi, imode);
19596 mask = gen_lowpart (mode, mask);
19598 if (vec_mode == VOIDmode)
19599 return force_reg (mode, mask);
19601 v = ix86_build_const_vector (vec_mode, vect, mask);
19602 return force_reg (vec_mode, v);
19605 /* Generate code for floating point ABS or NEG. */
19608 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19611 rtx mask, set, dst, src;
19612 bool use_sse = false;
19613 bool vector_mode = VECTOR_MODE_P (mode);
19614 machine_mode vmode = mode;
19618 else if (mode == TFmode)
19620 else if (TARGET_SSE_MATH)
19622 use_sse = SSE_FLOAT_MODE_P (mode);
19623 if (mode == SFmode)
19625 else if (mode == DFmode)
19629 /* NEG and ABS performed with SSE use bitwise mask operations.
19630 Create the appropriate mask now. */
19632 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19639 set = gen_rtx_fmt_e (code, mode, src);
19640 set = gen_rtx_SET (VOIDmode, dst, set);
19647 use = gen_rtx_USE (VOIDmode, mask);
19649 par = gen_rtvec (2, set, use);
19652 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19653 par = gen_rtvec (3, set, use, clob);
19655 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19661 /* Expand a copysign operation. Special case operand 0 being a constant. */
19664 ix86_expand_copysign (rtx operands[])
19666 machine_mode mode, vmode;
19667 rtx dest, op0, op1, mask, nmask;
19669 dest = operands[0];
19673 mode = GET_MODE (dest);
19675 if (mode == SFmode)
19677 else if (mode == DFmode)
19682 if (GET_CODE (op0) == CONST_DOUBLE)
19684 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19686 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19687 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19689 if (mode == SFmode || mode == DFmode)
19691 if (op0 == CONST0_RTX (mode))
19692 op0 = CONST0_RTX (vmode);
19695 rtx v = ix86_build_const_vector (vmode, false, op0);
19697 op0 = force_reg (vmode, v);
19700 else if (op0 != CONST0_RTX (mode))
19701 op0 = force_reg (mode, op0);
19703 mask = ix86_build_signbit_mask (vmode, 0, 0);
19705 if (mode == SFmode)
19706 copysign_insn = gen_copysignsf3_const;
19707 else if (mode == DFmode)
19708 copysign_insn = gen_copysigndf3_const;
19710 copysign_insn = gen_copysigntf3_const;
19712 emit_insn (copysign_insn (dest, op0, op1, mask));
19716 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19718 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19719 mask = ix86_build_signbit_mask (vmode, 0, 0);
19721 if (mode == SFmode)
19722 copysign_insn = gen_copysignsf3_var;
19723 else if (mode == DFmode)
19724 copysign_insn = gen_copysigndf3_var;
19726 copysign_insn = gen_copysigntf3_var;
19728 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19732 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19733 be a constant, and so has already been expanded into a vector constant. */
19736 ix86_split_copysign_const (rtx operands[])
19738 machine_mode mode, vmode;
19739 rtx dest, op0, mask, x;
19741 dest = operands[0];
19743 mask = operands[3];
19745 mode = GET_MODE (dest);
19746 vmode = GET_MODE (mask);
19748 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19749 x = gen_rtx_AND (vmode, dest, mask);
19750 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19752 if (op0 != CONST0_RTX (vmode))
19754 x = gen_rtx_IOR (vmode, dest, op0);
19755 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19759 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19760 so we have to do two masks. */
19763 ix86_split_copysign_var (rtx operands[])
19765 machine_mode mode, vmode;
19766 rtx dest, scratch, op0, op1, mask, nmask, x;
19768 dest = operands[0];
19769 scratch = operands[1];
19772 nmask = operands[4];
19773 mask = operands[5];
19775 mode = GET_MODE (dest);
19776 vmode = GET_MODE (mask);
19778 if (rtx_equal_p (op0, op1))
19780 /* Shouldn't happen often (it's useless, obviously), but when it does
19781 we'd generate incorrect code if we continue below. */
19782 emit_move_insn (dest, op0);
19786 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19788 gcc_assert (REGNO (op1) == REGNO (scratch));
19790 x = gen_rtx_AND (vmode, scratch, mask);
19791 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19794 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19795 x = gen_rtx_NOT (vmode, dest);
19796 x = gen_rtx_AND (vmode, x, op0);
19797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19801 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19803 x = gen_rtx_AND (vmode, scratch, mask);
19805 else /* alternative 2,4 */
19807 gcc_assert (REGNO (mask) == REGNO (scratch));
19808 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19809 x = gen_rtx_AND (vmode, scratch, op1);
19811 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19813 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19815 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19816 x = gen_rtx_AND (vmode, dest, nmask);
19818 else /* alternative 3,4 */
19820 gcc_assert (REGNO (nmask) == REGNO (dest));
19822 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19823 x = gen_rtx_AND (vmode, dest, op0);
19825 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19828 x = gen_rtx_IOR (vmode, dest, scratch);
19829 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19832 /* Return TRUE or FALSE depending on whether the first SET in INSN
19833 has source and destination with matching CC modes, and that the
19834 CC mode is at least as constrained as REQ_MODE. */
19837 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19840 machine_mode set_mode;
19842 set = PATTERN (insn);
19843 if (GET_CODE (set) == PARALLEL)
19844 set = XVECEXP (set, 0, 0);
19845 gcc_assert (GET_CODE (set) == SET);
19846 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19848 set_mode = GET_MODE (SET_DEST (set));
19852 if (req_mode != CCNOmode
19853 && (req_mode != CCmode
19854 || XEXP (SET_SRC (set), 1) != const0_rtx))
19858 if (req_mode == CCGCmode)
19862 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19866 if (req_mode == CCZmode)
19876 if (set_mode != req_mode)
19881 gcc_unreachable ();
19884 return GET_MODE (SET_SRC (set)) == set_mode;
19887 /* Generate insn patterns to do an integer compare of OPERANDS. */
19890 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19892 machine_mode cmpmode;
19895 cmpmode = SELECT_CC_MODE (code, op0, op1);
19896 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19898 /* This is very simple, but making the interface the same as in the
19899 FP case makes the rest of the code easier. */
19900 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19901 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19903 /* Return the test that should be put into the flags user, i.e.
19904 the bcc, scc, or cmov instruction. */
19905 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19908 /* Figure out whether to use ordered or unordered fp comparisons.
19909 Return the appropriate mode to use. */
19912 ix86_fp_compare_mode (enum rtx_code)
19914 /* ??? In order to make all comparisons reversible, we do all comparisons
19915 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19916 all forms trapping and nontrapping comparisons, we can make inequality
19917 comparisons trapping again, since it results in better code when using
19918 FCOM based compares. */
19919 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19923 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19925 machine_mode mode = GET_MODE (op0);
19927 if (SCALAR_FLOAT_MODE_P (mode))
19929 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19930 return ix86_fp_compare_mode (code);
19935 /* Only zero flag is needed. */
19936 case EQ: /* ZF=0 */
19937 case NE: /* ZF!=0 */
19939 /* Codes needing carry flag. */
19940 case GEU: /* CF=0 */
19941 case LTU: /* CF=1 */
19942 /* Detect overflow checks. They need just the carry flag. */
19943 if (GET_CODE (op0) == PLUS
19944 && rtx_equal_p (op1, XEXP (op0, 0)))
19948 case GTU: /* CF=0 & ZF=0 */
19949 case LEU: /* CF=1 | ZF=1 */
19951 /* Codes possibly doable only with sign flag when
19952 comparing against zero. */
19953 case GE: /* SF=OF or SF=0 */
19954 case LT: /* SF<>OF or SF=1 */
19955 if (op1 == const0_rtx)
19958 /* For other cases Carry flag is not required. */
19960 /* Codes doable only with sign flag when comparing
19961 against zero, but we miss jump instruction for it
19962 so we need to use relational tests against overflow
19963 that thus needs to be zero. */
19964 case GT: /* ZF=0 & SF=OF */
19965 case LE: /* ZF=1 | SF<>OF */
19966 if (op1 == const0_rtx)
19970 /* strcmp pattern do (use flags) and combine may ask us for proper
19975 gcc_unreachable ();
19979 /* Return the fixed registers used for condition codes. */
19982 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19989 /* If two condition code modes are compatible, return a condition code
19990 mode which is compatible with both. Otherwise, return
19993 static machine_mode
19994 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19999 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20002 if ((m1 == CCGCmode && m2 == CCGOCmode)
20003 || (m1 == CCGOCmode && m2 == CCGCmode))
20006 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20008 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20014 gcc_unreachable ();
20044 /* These are only compatible with themselves, which we already
20051 /* Return a comparison we can do and that it is equivalent to
20052 swap_condition (code) apart possibly from orderedness.
20053 But, never change orderedness if TARGET_IEEE_FP, returning
20054 UNKNOWN in that case if necessary. */
20056 static enum rtx_code
20057 ix86_fp_swap_condition (enum rtx_code code)
20061 case GT: /* GTU - CF=0 & ZF=0 */
20062 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20063 case GE: /* GEU - CF=0 */
20064 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20065 case UNLT: /* LTU - CF=1 */
20066 return TARGET_IEEE_FP ? UNKNOWN : GT;
20067 case UNLE: /* LEU - CF=1 | ZF=1 */
20068 return TARGET_IEEE_FP ? UNKNOWN : GE;
20070 return swap_condition (code);
20074 /* Return cost of comparison CODE using the best strategy for performance.
20075 All following functions do use number of instructions as a cost metrics.
20076 In future this should be tweaked to compute bytes for optimize_size and
20077 take into account performance of various instructions on various CPUs. */
20080 ix86_fp_comparison_cost (enum rtx_code code)
20084 /* The cost of code using bit-twiddling on %ah. */
20101 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20105 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20108 gcc_unreachable ();
20111 switch (ix86_fp_comparison_strategy (code))
20113 case IX86_FPCMP_COMI:
20114 return arith_cost > 4 ? 3 : 2;
20115 case IX86_FPCMP_SAHF:
20116 return arith_cost > 4 ? 4 : 3;
20122 /* Return strategy to use for floating-point. We assume that fcomi is always
20123 preferrable where available, since that is also true when looking at size
20124 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20126 enum ix86_fpcmp_strategy
20127 ix86_fp_comparison_strategy (enum rtx_code)
20129 /* Do fcomi/sahf based test when profitable. */
20132 return IX86_FPCMP_COMI;
20134 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20135 return IX86_FPCMP_SAHF;
20137 return IX86_FPCMP_ARITH;
20140 /* Swap, force into registers, or otherwise massage the two operands
20141 to a fp comparison. The operands are updated in place; the new
20142 comparison code is returned. */
20144 static enum rtx_code
20145 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20147 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20148 rtx op0 = *pop0, op1 = *pop1;
20149 machine_mode op_mode = GET_MODE (op0);
20150 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20152 /* All of the unordered compare instructions only work on registers.
20153 The same is true of the fcomi compare instructions. The XFmode
20154 compare instructions require registers except when comparing
20155 against zero or when converting operand 1 from fixed point to
20159 && (fpcmp_mode == CCFPUmode
20160 || (op_mode == XFmode
20161 && ! (standard_80387_constant_p (op0) == 1
20162 || standard_80387_constant_p (op1) == 1)
20163 && GET_CODE (op1) != FLOAT)
20164 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20166 op0 = force_reg (op_mode, op0);
20167 op1 = force_reg (op_mode, op1);
20171 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20172 things around if they appear profitable, otherwise force op0
20173 into a register. */
20175 if (standard_80387_constant_p (op0) == 0
20177 && ! (standard_80387_constant_p (op1) == 0
20180 enum rtx_code new_code = ix86_fp_swap_condition (code);
20181 if (new_code != UNKNOWN)
20183 std::swap (op0, op1);
20189 op0 = force_reg (op_mode, op0);
20191 if (CONSTANT_P (op1))
20193 int tmp = standard_80387_constant_p (op1);
20195 op1 = validize_mem (force_const_mem (op_mode, op1));
20199 op1 = force_reg (op_mode, op1);
20202 op1 = force_reg (op_mode, op1);
20206 /* Try to rearrange the comparison to make it cheaper. */
20207 if (ix86_fp_comparison_cost (code)
20208 > ix86_fp_comparison_cost (swap_condition (code))
20209 && (REG_P (op1) || can_create_pseudo_p ()))
20211 std::swap (op0, op1);
20212 code = swap_condition (code);
20214 op0 = force_reg (op_mode, op0);
20222 /* Convert comparison codes we use to represent FP comparison to integer
20223 code that will result in proper branch. Return UNKNOWN if no such code
20227 ix86_fp_compare_code_to_integer (enum rtx_code code)
20256 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20259 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20261 machine_mode fpcmp_mode, intcmp_mode;
20264 fpcmp_mode = ix86_fp_compare_mode (code);
20265 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20267 /* Do fcomi/sahf based test when profitable. */
20268 switch (ix86_fp_comparison_strategy (code))
20270 case IX86_FPCMP_COMI:
20271 intcmp_mode = fpcmp_mode;
20272 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20273 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20278 case IX86_FPCMP_SAHF:
20279 intcmp_mode = fpcmp_mode;
20280 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20281 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20285 scratch = gen_reg_rtx (HImode);
20286 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20287 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20290 case IX86_FPCMP_ARITH:
20291 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20292 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20293 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20295 scratch = gen_reg_rtx (HImode);
20296 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20298 /* In the unordered case, we have to check C2 for NaN's, which
20299 doesn't happen to work out to anything nice combination-wise.
20300 So do some bit twiddling on the value we've got in AH to come
20301 up with an appropriate set of condition codes. */
20303 intcmp_mode = CCNOmode;
20308 if (code == GT || !TARGET_IEEE_FP)
20310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20315 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20316 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20317 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20318 intcmp_mode = CCmode;
20324 if (code == LT && TARGET_IEEE_FP)
20326 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20327 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20328 intcmp_mode = CCmode;
20333 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20339 if (code == GE || !TARGET_IEEE_FP)
20341 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20346 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20347 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20353 if (code == LE && TARGET_IEEE_FP)
20355 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20356 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20357 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20358 intcmp_mode = CCmode;
20363 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20369 if (code == EQ && TARGET_IEEE_FP)
20371 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20372 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20373 intcmp_mode = CCmode;
20378 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20384 if (code == NE && TARGET_IEEE_FP)
20386 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20387 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20393 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20403 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20408 gcc_unreachable ();
20416 /* Return the test that should be put into the flags user, i.e.
20417 the bcc, scc, or cmov instruction. */
20418 return gen_rtx_fmt_ee (code, VOIDmode,
20419 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20424 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20428 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20429 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20431 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20433 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20434 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20437 ret = ix86_expand_int_compare (code, op0, op1);
20443 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20445 machine_mode mode = GET_MODE (op0);
20457 tmp = ix86_expand_compare (code, op0, op1);
20458 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20459 gen_rtx_LABEL_REF (VOIDmode, label),
20461 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20468 /* Expand DImode branch into multiple compare+branch. */
20471 rtx_code_label *label2;
20472 enum rtx_code code1, code2, code3;
20473 machine_mode submode;
20475 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20477 std::swap (op0, op1);
20478 code = swap_condition (code);
20481 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20482 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20484 submode = mode == DImode ? SImode : DImode;
20486 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20487 avoid two branches. This costs one extra insn, so disable when
20488 optimizing for size. */
20490 if ((code == EQ || code == NE)
20491 && (!optimize_insn_for_size_p ()
20492 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20497 if (hi[1] != const0_rtx)
20498 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20499 NULL_RTX, 0, OPTAB_WIDEN);
20502 if (lo[1] != const0_rtx)
20503 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20504 NULL_RTX, 0, OPTAB_WIDEN);
20506 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20507 NULL_RTX, 0, OPTAB_WIDEN);
20509 ix86_expand_branch (code, tmp, const0_rtx, label);
20513 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20514 op1 is a constant and the low word is zero, then we can just
20515 examine the high word. Similarly for low word -1 and
20516 less-or-equal-than or greater-than. */
20518 if (CONST_INT_P (hi[1]))
20521 case LT: case LTU: case GE: case GEU:
20522 if (lo[1] == const0_rtx)
20524 ix86_expand_branch (code, hi[0], hi[1], label);
20528 case LE: case LEU: case GT: case GTU:
20529 if (lo[1] == constm1_rtx)
20531 ix86_expand_branch (code, hi[0], hi[1], label);
20539 /* Otherwise, we need two or three jumps. */
20541 label2 = gen_label_rtx ();
20544 code2 = swap_condition (code);
20545 code3 = unsigned_condition (code);
20549 case LT: case GT: case LTU: case GTU:
20552 case LE: code1 = LT; code2 = GT; break;
20553 case GE: code1 = GT; code2 = LT; break;
20554 case LEU: code1 = LTU; code2 = GTU; break;
20555 case GEU: code1 = GTU; code2 = LTU; break;
20557 case EQ: code1 = UNKNOWN; code2 = NE; break;
20558 case NE: code2 = UNKNOWN; break;
20561 gcc_unreachable ();
20566 * if (hi(a) < hi(b)) goto true;
20567 * if (hi(a) > hi(b)) goto false;
20568 * if (lo(a) < lo(b)) goto true;
20572 if (code1 != UNKNOWN)
20573 ix86_expand_branch (code1, hi[0], hi[1], label);
20574 if (code2 != UNKNOWN)
20575 ix86_expand_branch (code2, hi[0], hi[1], label2);
20577 ix86_expand_branch (code3, lo[0], lo[1], label);
20579 if (code2 != UNKNOWN)
20580 emit_label (label2);
20585 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20590 /* Split branch based on floating point condition. */
20592 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20593 rtx target1, rtx target2, rtx tmp)
20598 if (target2 != pc_rtx)
20600 std::swap (target1, target2);
20601 code = reverse_condition_maybe_unordered (code);
20604 condition = ix86_expand_fp_compare (code, op1, op2,
20607 i = emit_jump_insn (gen_rtx_SET
20609 gen_rtx_IF_THEN_ELSE (VOIDmode,
20610 condition, target1, target2)));
20611 if (split_branch_probability >= 0)
20612 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20616 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20620 gcc_assert (GET_MODE (dest) == QImode);
20622 ret = ix86_expand_compare (code, op0, op1);
20623 PUT_MODE (ret, QImode);
20624 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20627 /* Expand comparison setting or clearing carry flag. Return true when
20628 successful and set pop for the operation. */
20630 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20632 machine_mode mode =
20633 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20635 /* Do not handle double-mode compares that go through special path. */
20636 if (mode == (TARGET_64BIT ? TImode : DImode))
20639 if (SCALAR_FLOAT_MODE_P (mode))
20642 rtx_insn *compare_seq;
20644 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20646 /* Shortcut: following common codes never translate
20647 into carry flag compares. */
20648 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20649 || code == ORDERED || code == UNORDERED)
20652 /* These comparisons require zero flag; swap operands so they won't. */
20653 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20654 && !TARGET_IEEE_FP)
20656 std::swap (op0, op1);
20657 code = swap_condition (code);
20660 /* Try to expand the comparison and verify that we end up with
20661 carry flag based comparison. This fails to be true only when
20662 we decide to expand comparison using arithmetic that is not
20663 too common scenario. */
20665 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20666 compare_seq = get_insns ();
20669 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20670 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20671 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20673 code = GET_CODE (compare_op);
20675 if (code != LTU && code != GEU)
20678 emit_insn (compare_seq);
20683 if (!INTEGRAL_MODE_P (mode))
20692 /* Convert a==0 into (unsigned)a<1. */
20695 if (op1 != const0_rtx)
20698 code = (code == EQ ? LTU : GEU);
20701 /* Convert a>b into b<a or a>=b-1. */
20704 if (CONST_INT_P (op1))
20706 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20707 /* Bail out on overflow. We still can swap operands but that
20708 would force loading of the constant into register. */
20709 if (op1 == const0_rtx
20710 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20712 code = (code == GTU ? GEU : LTU);
20716 std::swap (op0, op1);
20717 code = (code == GTU ? LTU : GEU);
20721 /* Convert a>=0 into (unsigned)a<0x80000000. */
20724 if (mode == DImode || op1 != const0_rtx)
20726 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20727 code = (code == LT ? GEU : LTU);
20731 if (mode == DImode || op1 != constm1_rtx)
20733 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20734 code = (code == LE ? GEU : LTU);
20740 /* Swapping operands may cause constant to appear as first operand. */
20741 if (!nonimmediate_operand (op0, VOIDmode))
20743 if (!can_create_pseudo_p ())
20745 op0 = force_reg (mode, op0);
20747 *pop = ix86_expand_compare (code, op0, op1);
20748 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20753 ix86_expand_int_movcc (rtx operands[])
20755 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20756 rtx_insn *compare_seq;
20758 machine_mode mode = GET_MODE (operands[0]);
20759 bool sign_bit_compare_p = false;
20760 rtx op0 = XEXP (operands[1], 0);
20761 rtx op1 = XEXP (operands[1], 1);
20763 if (GET_MODE (op0) == TImode
20764 || (GET_MODE (op0) == DImode
20769 compare_op = ix86_expand_compare (code, op0, op1);
20770 compare_seq = get_insns ();
20773 compare_code = GET_CODE (compare_op);
20775 if ((op1 == const0_rtx && (code == GE || code == LT))
20776 || (op1 == constm1_rtx && (code == GT || code == LE)))
20777 sign_bit_compare_p = true;
20779 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20780 HImode insns, we'd be swallowed in word prefix ops. */
20782 if ((mode != HImode || TARGET_FAST_PREFIX)
20783 && (mode != (TARGET_64BIT ? TImode : DImode))
20784 && CONST_INT_P (operands[2])
20785 && CONST_INT_P (operands[3]))
20787 rtx out = operands[0];
20788 HOST_WIDE_INT ct = INTVAL (operands[2]);
20789 HOST_WIDE_INT cf = INTVAL (operands[3]);
20790 HOST_WIDE_INT diff;
20793 /* Sign bit compares are better done using shifts than we do by using
20795 if (sign_bit_compare_p
20796 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20798 /* Detect overlap between destination and compare sources. */
20801 if (!sign_bit_compare_p)
20804 bool fpcmp = false;
20806 compare_code = GET_CODE (compare_op);
20808 flags = XEXP (compare_op, 0);
20810 if (GET_MODE (flags) == CCFPmode
20811 || GET_MODE (flags) == CCFPUmode)
20815 = ix86_fp_compare_code_to_integer (compare_code);
20818 /* To simplify rest of code, restrict to the GEU case. */
20819 if (compare_code == LTU)
20821 std::swap (ct, cf);
20822 compare_code = reverse_condition (compare_code);
20823 code = reverse_condition (code);
20828 PUT_CODE (compare_op,
20829 reverse_condition_maybe_unordered
20830 (GET_CODE (compare_op)));
20832 PUT_CODE (compare_op,
20833 reverse_condition (GET_CODE (compare_op)));
20837 if (reg_overlap_mentioned_p (out, op0)
20838 || reg_overlap_mentioned_p (out, op1))
20839 tmp = gen_reg_rtx (mode);
20841 if (mode == DImode)
20842 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20844 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20845 flags, compare_op));
20849 if (code == GT || code == GE)
20850 code = reverse_condition (code);
20853 std::swap (ct, cf);
20856 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20869 tmp = expand_simple_binop (mode, PLUS,
20871 copy_rtx (tmp), 1, OPTAB_DIRECT);
20882 tmp = expand_simple_binop (mode, IOR,
20884 copy_rtx (tmp), 1, OPTAB_DIRECT);
20886 else if (diff == -1 && ct)
20896 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20898 tmp = expand_simple_binop (mode, PLUS,
20899 copy_rtx (tmp), GEN_INT (cf),
20900 copy_rtx (tmp), 1, OPTAB_DIRECT);
20908 * andl cf - ct, dest
20918 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20921 tmp = expand_simple_binop (mode, AND,
20923 gen_int_mode (cf - ct, mode),
20924 copy_rtx (tmp), 1, OPTAB_DIRECT);
20926 tmp = expand_simple_binop (mode, PLUS,
20927 copy_rtx (tmp), GEN_INT (ct),
20928 copy_rtx (tmp), 1, OPTAB_DIRECT);
20931 if (!rtx_equal_p (tmp, out))
20932 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20939 machine_mode cmp_mode = GET_MODE (op0);
20940 enum rtx_code new_code;
20942 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20944 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20946 /* We may be reversing unordered compare to normal compare, that
20947 is not valid in general (we may convert non-trapping condition
20948 to trapping one), however on i386 we currently emit all
20949 comparisons unordered. */
20950 new_code = reverse_condition_maybe_unordered (code);
20953 new_code = ix86_reverse_condition (code, cmp_mode);
20954 if (new_code != UNKNOWN)
20956 std::swap (ct, cf);
20962 compare_code = UNKNOWN;
20963 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20964 && CONST_INT_P (op1))
20966 if (op1 == const0_rtx
20967 && (code == LT || code == GE))
20968 compare_code = code;
20969 else if (op1 == constm1_rtx)
20973 else if (code == GT)
20978 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20979 if (compare_code != UNKNOWN
20980 && GET_MODE (op0) == GET_MODE (out)
20981 && (cf == -1 || ct == -1))
20983 /* If lea code below could be used, only optimize
20984 if it results in a 2 insn sequence. */
20986 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20987 || diff == 3 || diff == 5 || diff == 9)
20988 || (compare_code == LT && ct == -1)
20989 || (compare_code == GE && cf == -1))
20992 * notl op1 (if necessary)
21000 code = reverse_condition (code);
21003 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21005 out = expand_simple_binop (mode, IOR,
21007 out, 1, OPTAB_DIRECT);
21008 if (out != operands[0])
21009 emit_move_insn (operands[0], out);
21016 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21017 || diff == 3 || diff == 5 || diff == 9)
21018 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21020 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21026 * lea cf(dest*(ct-cf)),dest
21030 * This also catches the degenerate setcc-only case.
21036 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21039 /* On x86_64 the lea instruction operates on Pmode, so we need
21040 to get arithmetics done in proper mode to match. */
21042 tmp = copy_rtx (out);
21046 out1 = copy_rtx (out);
21047 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21051 tmp = gen_rtx_PLUS (mode, tmp, out1);
21057 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21060 if (!rtx_equal_p (tmp, out))
21063 out = force_operand (tmp, copy_rtx (out));
21065 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21067 if (!rtx_equal_p (out, operands[0]))
21068 emit_move_insn (operands[0], copy_rtx (out));
21074 * General case: Jumpful:
21075 * xorl dest,dest cmpl op1, op2
21076 * cmpl op1, op2 movl ct, dest
21077 * setcc dest jcc 1f
21078 * decl dest movl cf, dest
21079 * andl (cf-ct),dest 1:
21082 * Size 20. Size 14.
21084 * This is reasonably steep, but branch mispredict costs are
21085 * high on modern cpus, so consider failing only if optimizing
21089 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21090 && BRANCH_COST (optimize_insn_for_speed_p (),
21095 machine_mode cmp_mode = GET_MODE (op0);
21096 enum rtx_code new_code;
21098 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21100 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21102 /* We may be reversing unordered compare to normal compare,
21103 that is not valid in general (we may convert non-trapping
21104 condition to trapping one), however on i386 we currently
21105 emit all comparisons unordered. */
21106 new_code = reverse_condition_maybe_unordered (code);
21110 new_code = ix86_reverse_condition (code, cmp_mode);
21111 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21112 compare_code = reverse_condition (compare_code);
21115 if (new_code != UNKNOWN)
21123 if (compare_code != UNKNOWN)
21125 /* notl op1 (if needed)
21130 For x < 0 (resp. x <= -1) there will be no notl,
21131 so if possible swap the constants to get rid of the
21133 True/false will be -1/0 while code below (store flag
21134 followed by decrement) is 0/-1, so the constants need
21135 to be exchanged once more. */
21137 if (compare_code == GE || !cf)
21139 code = reverse_condition (code);
21143 std::swap (ct, cf);
21145 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21149 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21151 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21153 copy_rtx (out), 1, OPTAB_DIRECT);
21156 out = expand_simple_binop (mode, AND, copy_rtx (out),
21157 gen_int_mode (cf - ct, mode),
21158 copy_rtx (out), 1, OPTAB_DIRECT);
21160 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21161 copy_rtx (out), 1, OPTAB_DIRECT);
21162 if (!rtx_equal_p (out, operands[0]))
21163 emit_move_insn (operands[0], copy_rtx (out));
21169 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21171 /* Try a few things more with specific constants and a variable. */
21174 rtx var, orig_out, out, tmp;
21176 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21179 /* If one of the two operands is an interesting constant, load a
21180 constant with the above and mask it in with a logical operation. */
21182 if (CONST_INT_P (operands[2]))
21185 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21186 operands[3] = constm1_rtx, op = and_optab;
21187 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21188 operands[3] = const0_rtx, op = ior_optab;
21192 else if (CONST_INT_P (operands[3]))
21195 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21196 operands[2] = constm1_rtx, op = and_optab;
21197 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21198 operands[2] = const0_rtx, op = ior_optab;
21205 orig_out = operands[0];
21206 tmp = gen_reg_rtx (mode);
21209 /* Recurse to get the constant loaded. */
21210 if (ix86_expand_int_movcc (operands) == 0)
21213 /* Mask in the interesting variable. */
21214 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21216 if (!rtx_equal_p (out, orig_out))
21217 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21223 * For comparison with above,
21233 if (! nonimmediate_operand (operands[2], mode))
21234 operands[2] = force_reg (mode, operands[2]);
21235 if (! nonimmediate_operand (operands[3], mode))
21236 operands[3] = force_reg (mode, operands[3]);
21238 if (! register_operand (operands[2], VOIDmode)
21240 || ! register_operand (operands[3], VOIDmode)))
21241 operands[2] = force_reg (mode, operands[2]);
21244 && ! register_operand (operands[3], VOIDmode))
21245 operands[3] = force_reg (mode, operands[3]);
21247 emit_insn (compare_seq);
21248 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21249 gen_rtx_IF_THEN_ELSE (mode,
21250 compare_op, operands[2],
21255 /* Swap, force into registers, or otherwise massage the two operands
21256 to an sse comparison with a mask result. Thus we differ a bit from
21257 ix86_prepare_fp_compare_args which expects to produce a flags result.
21259 The DEST operand exists to help determine whether to commute commutative
21260 operators. The POP0/POP1 operands are updated in place. The new
21261 comparison code is returned, or UNKNOWN if not implementable. */
21263 static enum rtx_code
21264 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21265 rtx *pop0, rtx *pop1)
21271 /* AVX supports all the needed comparisons. */
21274 /* We have no LTGT as an operator. We could implement it with
21275 NE & ORDERED, but this requires an extra temporary. It's
21276 not clear that it's worth it. */
21283 /* These are supported directly. */
21290 /* AVX has 3 operand comparisons, no need to swap anything. */
21293 /* For commutative operators, try to canonicalize the destination
21294 operand to be first in the comparison - this helps reload to
21295 avoid extra moves. */
21296 if (!dest || !rtx_equal_p (dest, *pop1))
21304 /* These are not supported directly before AVX, and furthermore
21305 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21306 comparison operands to transform into something that is
21308 std::swap (*pop0, *pop1);
21309 code = swap_condition (code);
21313 gcc_unreachable ();
21319 /* Detect conditional moves that exactly match min/max operational
21320 semantics. Note that this is IEEE safe, as long as we don't
21321 interchange the operands.
21323 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21324 and TRUE if the operation is successful and instructions are emitted. */
21327 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21328 rtx cmp_op1, rtx if_true, rtx if_false)
21336 else if (code == UNGE)
21337 std::swap (if_true, if_false);
21341 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21343 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21348 mode = GET_MODE (dest);
21350 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21351 but MODE may be a vector mode and thus not appropriate. */
21352 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21354 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21357 if_true = force_reg (mode, if_true);
21358 v = gen_rtvec (2, if_true, if_false);
21359 tmp = gen_rtx_UNSPEC (mode, v, u);
21363 code = is_min ? SMIN : SMAX;
21364 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21367 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21371 /* Expand an sse vector comparison. Return the register with the result. */
21374 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21375 rtx op_true, rtx op_false)
21377 machine_mode mode = GET_MODE (dest);
21378 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21380 /* In general case result of comparison can differ from operands' type. */
21381 machine_mode cmp_mode;
21383 /* In AVX512F the result of comparison is an integer mask. */
21384 bool maskcmp = false;
21387 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21389 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21390 gcc_assert (cmp_mode != BLKmode);
21395 cmp_mode = cmp_ops_mode;
21398 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21399 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21400 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21403 || reg_overlap_mentioned_p (dest, op_true)
21404 || reg_overlap_mentioned_p (dest, op_false))
21405 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21407 /* Compare patterns for int modes are unspec in AVX512F only. */
21408 if (maskcmp && (code == GT || code == EQ))
21410 rtx (*gen)(rtx, rtx, rtx);
21412 switch (cmp_ops_mode)
21415 gcc_assert (TARGET_AVX512BW);
21416 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21419 gcc_assert (TARGET_AVX512BW);
21420 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21423 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21426 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21434 emit_insn (gen (dest, cmp_op0, cmp_op1));
21438 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21440 if (cmp_mode != mode && !maskcmp)
21442 x = force_reg (cmp_ops_mode, x);
21443 convert_move (dest, x, false);
21446 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21451 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21452 operations. This is used for both scalar and vector conditional moves. */
21455 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21457 machine_mode mode = GET_MODE (dest);
21458 machine_mode cmpmode = GET_MODE (cmp);
21460 /* In AVX512F the result of comparison is an integer mask. */
21461 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21465 if (vector_all_ones_operand (op_true, mode)
21466 && rtx_equal_p (op_false, CONST0_RTX (mode))
21469 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21471 else if (op_false == CONST0_RTX (mode)
21474 op_true = force_reg (mode, op_true);
21475 x = gen_rtx_AND (mode, cmp, op_true);
21476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21478 else if (op_true == CONST0_RTX (mode)
21481 op_false = force_reg (mode, op_false);
21482 x = gen_rtx_NOT (mode, cmp);
21483 x = gen_rtx_AND (mode, x, op_false);
21484 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21486 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21489 op_false = force_reg (mode, op_false);
21490 x = gen_rtx_IOR (mode, cmp, op_false);
21491 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21493 else if (TARGET_XOP
21496 op_true = force_reg (mode, op_true);
21498 if (!nonimmediate_operand (op_false, mode))
21499 op_false = force_reg (mode, op_false);
21501 emit_insn (gen_rtx_SET (mode, dest,
21502 gen_rtx_IF_THEN_ELSE (mode, cmp,
21508 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21511 if (!nonimmediate_operand (op_true, mode))
21512 op_true = force_reg (mode, op_true);
21514 op_false = force_reg (mode, op_false);
21520 gen = gen_sse4_1_blendvps;
21524 gen = gen_sse4_1_blendvpd;
21532 gen = gen_sse4_1_pblendvb;
21533 if (mode != V16QImode)
21534 d = gen_reg_rtx (V16QImode);
21535 op_false = gen_lowpart (V16QImode, op_false);
21536 op_true = gen_lowpart (V16QImode, op_true);
21537 cmp = gen_lowpart (V16QImode, cmp);
21542 gen = gen_avx_blendvps256;
21546 gen = gen_avx_blendvpd256;
21554 gen = gen_avx2_pblendvb;
21555 if (mode != V32QImode)
21556 d = gen_reg_rtx (V32QImode);
21557 op_false = gen_lowpart (V32QImode, op_false);
21558 op_true = gen_lowpart (V32QImode, op_true);
21559 cmp = gen_lowpart (V32QImode, cmp);
21564 gen = gen_avx512bw_blendmv64qi;
21567 gen = gen_avx512bw_blendmv32hi;
21570 gen = gen_avx512f_blendmv16si;
21573 gen = gen_avx512f_blendmv8di;
21576 gen = gen_avx512f_blendmv8df;
21579 gen = gen_avx512f_blendmv16sf;
21588 emit_insn (gen (d, op_false, op_true, cmp));
21590 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21594 op_true = force_reg (mode, op_true);
21596 t2 = gen_reg_rtx (mode);
21598 t3 = gen_reg_rtx (mode);
21602 x = gen_rtx_AND (mode, op_true, cmp);
21603 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21605 x = gen_rtx_NOT (mode, cmp);
21606 x = gen_rtx_AND (mode, x, op_false);
21607 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21609 x = gen_rtx_IOR (mode, t3, t2);
21610 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21615 /* Expand a floating-point conditional move. Return true if successful. */
21618 ix86_expand_fp_movcc (rtx operands[])
21620 machine_mode mode = GET_MODE (operands[0]);
21621 enum rtx_code code = GET_CODE (operands[1]);
21622 rtx tmp, compare_op;
21623 rtx op0 = XEXP (operands[1], 0);
21624 rtx op1 = XEXP (operands[1], 1);
21626 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21628 machine_mode cmode;
21630 /* Since we've no cmove for sse registers, don't force bad register
21631 allocation just to gain access to it. Deny movcc when the
21632 comparison mode doesn't match the move mode. */
21633 cmode = GET_MODE (op0);
21634 if (cmode == VOIDmode)
21635 cmode = GET_MODE (op1);
21639 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21640 if (code == UNKNOWN)
21643 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21644 operands[2], operands[3]))
21647 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21648 operands[2], operands[3]);
21649 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21653 if (GET_MODE (op0) == TImode
21654 || (GET_MODE (op0) == DImode
21658 /* The floating point conditional move instructions don't directly
21659 support conditions resulting from a signed integer comparison. */
21661 compare_op = ix86_expand_compare (code, op0, op1);
21662 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21664 tmp = gen_reg_rtx (QImode);
21665 ix86_expand_setcc (tmp, code, op0, op1);
21667 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21670 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21671 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21672 operands[2], operands[3])));
21677 /* Expand a floating-point vector conditional move; a vcond operation
21678 rather than a movcc operation. */
21681 ix86_expand_fp_vcond (rtx operands[])
21683 enum rtx_code code = GET_CODE (operands[3]);
21686 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21687 &operands[4], &operands[5]);
21688 if (code == UNKNOWN)
21691 switch (GET_CODE (operands[3]))
21694 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21695 operands[5], operands[0], operands[0]);
21696 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21697 operands[5], operands[1], operands[2]);
21701 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21702 operands[5], operands[0], operands[0]);
21703 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21704 operands[5], operands[1], operands[2]);
21708 gcc_unreachable ();
21710 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21712 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21716 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21717 operands[5], operands[1], operands[2]))
21720 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21721 operands[1], operands[2]);
21722 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21726 /* Expand a signed/unsigned integral vector conditional move. */
21729 ix86_expand_int_vcond (rtx operands[])
21731 machine_mode data_mode = GET_MODE (operands[0]);
21732 machine_mode mode = GET_MODE (operands[4]);
21733 enum rtx_code code = GET_CODE (operands[3]);
21734 bool negate = false;
21737 cop0 = operands[4];
21738 cop1 = operands[5];
21740 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21741 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21742 if ((code == LT || code == GE)
21743 && data_mode == mode
21744 && cop1 == CONST0_RTX (mode)
21745 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21746 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21747 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21748 && (GET_MODE_SIZE (data_mode) == 16
21749 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21751 rtx negop = operands[2 - (code == LT)];
21752 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21753 if (negop == CONST1_RTX (data_mode))
21755 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21756 operands[0], 1, OPTAB_DIRECT);
21757 if (res != operands[0])
21758 emit_move_insn (operands[0], res);
21761 else if (GET_MODE_INNER (data_mode) != DImode
21762 && vector_all_ones_operand (negop, data_mode))
21764 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21765 operands[0], 0, OPTAB_DIRECT);
21766 if (res != operands[0])
21767 emit_move_insn (operands[0], res);
21772 if (!nonimmediate_operand (cop1, mode))
21773 cop1 = force_reg (mode, cop1);
21774 if (!general_operand (operands[1], data_mode))
21775 operands[1] = force_reg (data_mode, operands[1]);
21776 if (!general_operand (operands[2], data_mode))
21777 operands[2] = force_reg (data_mode, operands[2]);
21779 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21781 && (mode == V16QImode || mode == V8HImode
21782 || mode == V4SImode || mode == V2DImode))
21786 /* Canonicalize the comparison to EQ, GT, GTU. */
21797 code = reverse_condition (code);
21803 code = reverse_condition (code);
21809 std::swap (cop0, cop1);
21810 code = swap_condition (code);
21814 gcc_unreachable ();
21817 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21818 if (mode == V2DImode)
21823 /* SSE4.1 supports EQ. */
21824 if (!TARGET_SSE4_1)
21830 /* SSE4.2 supports GT/GTU. */
21831 if (!TARGET_SSE4_2)
21836 gcc_unreachable ();
21840 /* Unsigned parallel compare is not supported by the hardware.
21841 Play some tricks to turn this into a signed comparison
21845 cop0 = force_reg (mode, cop0);
21857 rtx (*gen_sub3) (rtx, rtx, rtx);
21861 case V16SImode: gen_sub3 = gen_subv16si3; break;
21862 case V8DImode: gen_sub3 = gen_subv8di3; break;
21863 case V8SImode: gen_sub3 = gen_subv8si3; break;
21864 case V4DImode: gen_sub3 = gen_subv4di3; break;
21865 case V4SImode: gen_sub3 = gen_subv4si3; break;
21866 case V2DImode: gen_sub3 = gen_subv2di3; break;
21868 gcc_unreachable ();
21870 /* Subtract (-(INT MAX) - 1) from both operands to make
21872 mask = ix86_build_signbit_mask (mode, true, false);
21873 t1 = gen_reg_rtx (mode);
21874 emit_insn (gen_sub3 (t1, cop0, mask));
21876 t2 = gen_reg_rtx (mode);
21877 emit_insn (gen_sub3 (t2, cop1, mask));
21891 /* Perform a parallel unsigned saturating subtraction. */
21892 x = gen_reg_rtx (mode);
21893 emit_insn (gen_rtx_SET (VOIDmode, x,
21894 gen_rtx_US_MINUS (mode, cop0, cop1)));
21897 cop1 = CONST0_RTX (mode);
21903 gcc_unreachable ();
21908 /* Allow the comparison to be done in one mode, but the movcc to
21909 happen in another mode. */
21910 if (data_mode == mode)
21912 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21913 operands[1+negate], operands[2-negate]);
21917 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21918 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21919 operands[1+negate], operands[2-negate]);
21920 if (GET_MODE (x) == mode)
21921 x = gen_lowpart (data_mode, x);
21924 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21925 operands[2-negate]);
21929 /* AVX512F does support 64-byte integer vector operations,
21930 thus the longest vector we are faced with is V64QImode. */
21931 #define MAX_VECT_LEN 64
21933 struct expand_vec_perm_d
21935 rtx target, op0, op1;
21936 unsigned char perm[MAX_VECT_LEN];
21937 machine_mode vmode;
21938 unsigned char nelt;
21939 bool one_operand_p;
21944 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21945 struct expand_vec_perm_d *d)
21947 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21948 expander, so args are either in d, or in op0, op1 etc. */
21949 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21950 machine_mode maskmode = mode;
21951 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21956 if (TARGET_AVX512VL && TARGET_AVX512BW)
21957 gen = gen_avx512vl_vpermi2varv8hi3;
21960 if (TARGET_AVX512VL && TARGET_AVX512BW)
21961 gen = gen_avx512vl_vpermi2varv16hi3;
21964 if (TARGET_AVX512VBMI)
21965 gen = gen_avx512bw_vpermi2varv64qi3;
21968 if (TARGET_AVX512BW)
21969 gen = gen_avx512bw_vpermi2varv32hi3;
21972 if (TARGET_AVX512VL)
21973 gen = gen_avx512vl_vpermi2varv4si3;
21976 if (TARGET_AVX512VL)
21977 gen = gen_avx512vl_vpermi2varv8si3;
21980 if (TARGET_AVX512F)
21981 gen = gen_avx512f_vpermi2varv16si3;
21984 if (TARGET_AVX512VL)
21986 gen = gen_avx512vl_vpermi2varv4sf3;
21987 maskmode = V4SImode;
21991 if (TARGET_AVX512VL)
21993 gen = gen_avx512vl_vpermi2varv8sf3;
21994 maskmode = V8SImode;
21998 if (TARGET_AVX512F)
22000 gen = gen_avx512f_vpermi2varv16sf3;
22001 maskmode = V16SImode;
22005 if (TARGET_AVX512VL)
22006 gen = gen_avx512vl_vpermi2varv2di3;
22009 if (TARGET_AVX512VL)
22010 gen = gen_avx512vl_vpermi2varv4di3;
22013 if (TARGET_AVX512F)
22014 gen = gen_avx512f_vpermi2varv8di3;
22017 if (TARGET_AVX512VL)
22019 gen = gen_avx512vl_vpermi2varv2df3;
22020 maskmode = V2DImode;
22024 if (TARGET_AVX512VL)
22026 gen = gen_avx512vl_vpermi2varv4df3;
22027 maskmode = V4DImode;
22031 if (TARGET_AVX512F)
22033 gen = gen_avx512f_vpermi2varv8df3;
22034 maskmode = V8DImode;
22044 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22045 expander, so args are either in d, or in op0, op1 etc. */
22049 target = d->target;
22052 for (int i = 0; i < d->nelt; ++i)
22053 vec[i] = GEN_INT (d->perm[i]);
22054 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22057 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22061 /* Expand a variable vector permutation. */
22064 ix86_expand_vec_perm (rtx operands[])
22066 rtx target = operands[0];
22067 rtx op0 = operands[1];
22068 rtx op1 = operands[2];
22069 rtx mask = operands[3];
22070 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22071 machine_mode mode = GET_MODE (op0);
22072 machine_mode maskmode = GET_MODE (mask);
22074 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22076 /* Number of elements in the vector. */
22077 w = GET_MODE_NUNITS (mode);
22078 e = GET_MODE_UNIT_SIZE (mode);
22079 gcc_assert (w <= 64);
22081 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22086 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22088 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22089 an constant shuffle operand. With a tiny bit of effort we can
22090 use VPERMD instead. A re-interpretation stall for V4DFmode is
22091 unfortunate but there's no avoiding it.
22092 Similarly for V16HImode we don't have instructions for variable
22093 shuffling, while for V32QImode we can use after preparing suitable
22094 masks vpshufb; vpshufb; vpermq; vpor. */
22096 if (mode == V16HImode)
22098 maskmode = mode = V32QImode;
22104 maskmode = mode = V8SImode;
22108 t1 = gen_reg_rtx (maskmode);
22110 /* Replicate the low bits of the V4DImode mask into V8SImode:
22112 t1 = { A A B B C C D D }. */
22113 for (i = 0; i < w / 2; ++i)
22114 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22115 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22116 vt = force_reg (maskmode, vt);
22117 mask = gen_lowpart (maskmode, mask);
22118 if (maskmode == V8SImode)
22119 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22121 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22123 /* Multiply the shuffle indicies by two. */
22124 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22127 /* Add one to the odd shuffle indicies:
22128 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22129 for (i = 0; i < w / 2; ++i)
22131 vec[i * 2] = const0_rtx;
22132 vec[i * 2 + 1] = const1_rtx;
22134 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22135 vt = validize_mem (force_const_mem (maskmode, vt));
22136 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22139 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22140 operands[3] = mask = t1;
22141 target = gen_reg_rtx (mode);
22142 op0 = gen_lowpart (mode, op0);
22143 op1 = gen_lowpart (mode, op1);
22149 /* The VPERMD and VPERMPS instructions already properly ignore
22150 the high bits of the shuffle elements. No need for us to
22151 perform an AND ourselves. */
22152 if (one_operand_shuffle)
22154 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22155 if (target != operands[0])
22156 emit_move_insn (operands[0],
22157 gen_lowpart (GET_MODE (operands[0]), target));
22161 t1 = gen_reg_rtx (V8SImode);
22162 t2 = gen_reg_rtx (V8SImode);
22163 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22164 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22170 mask = gen_lowpart (V8SImode, mask);
22171 if (one_operand_shuffle)
22172 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22175 t1 = gen_reg_rtx (V8SFmode);
22176 t2 = gen_reg_rtx (V8SFmode);
22177 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22178 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22184 /* By combining the two 128-bit input vectors into one 256-bit
22185 input vector, we can use VPERMD and VPERMPS for the full
22186 two-operand shuffle. */
22187 t1 = gen_reg_rtx (V8SImode);
22188 t2 = gen_reg_rtx (V8SImode);
22189 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22190 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22191 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22192 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22196 t1 = gen_reg_rtx (V8SFmode);
22197 t2 = gen_reg_rtx (V8SImode);
22198 mask = gen_lowpart (V4SImode, mask);
22199 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22200 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22201 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22202 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22206 t1 = gen_reg_rtx (V32QImode);
22207 t2 = gen_reg_rtx (V32QImode);
22208 t3 = gen_reg_rtx (V32QImode);
22209 vt2 = GEN_INT (-128);
22210 for (i = 0; i < 32; i++)
22212 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22213 vt = force_reg (V32QImode, vt);
22214 for (i = 0; i < 32; i++)
22215 vec[i] = i < 16 ? vt2 : const0_rtx;
22216 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22217 vt2 = force_reg (V32QImode, vt2);
22218 /* From mask create two adjusted masks, which contain the same
22219 bits as mask in the low 7 bits of each vector element.
22220 The first mask will have the most significant bit clear
22221 if it requests element from the same 128-bit lane
22222 and MSB set if it requests element from the other 128-bit lane.
22223 The second mask will have the opposite values of the MSB,
22224 and additionally will have its 128-bit lanes swapped.
22225 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22226 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22227 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22228 stands for other 12 bytes. */
22229 /* The bit whether element is from the same lane or the other
22230 lane is bit 4, so shift it up by 3 to the MSB position. */
22231 t5 = gen_reg_rtx (V4DImode);
22232 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22234 /* Clear MSB bits from the mask just in case it had them set. */
22235 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22236 /* After this t1 will have MSB set for elements from other lane. */
22237 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22238 /* Clear bits other than MSB. */
22239 emit_insn (gen_andv32qi3 (t1, t1, vt));
22240 /* Or in the lower bits from mask into t3. */
22241 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22242 /* And invert MSB bits in t1, so MSB is set for elements from the same
22244 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22245 /* Swap 128-bit lanes in t3. */
22246 t6 = gen_reg_rtx (V4DImode);
22247 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22248 const2_rtx, GEN_INT (3),
22249 const0_rtx, const1_rtx));
22250 /* And or in the lower bits from mask into t1. */
22251 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22252 if (one_operand_shuffle)
22254 /* Each of these shuffles will put 0s in places where
22255 element from the other 128-bit lane is needed, otherwise
22256 will shuffle in the requested value. */
22257 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22258 gen_lowpart (V32QImode, t6)));
22259 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22260 /* For t3 the 128-bit lanes are swapped again. */
22261 t7 = gen_reg_rtx (V4DImode);
22262 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22263 const2_rtx, GEN_INT (3),
22264 const0_rtx, const1_rtx));
22265 /* And oring both together leads to the result. */
22266 emit_insn (gen_iorv32qi3 (target, t1,
22267 gen_lowpart (V32QImode, t7)));
22268 if (target != operands[0])
22269 emit_move_insn (operands[0],
22270 gen_lowpart (GET_MODE (operands[0]), target));
22274 t4 = gen_reg_rtx (V32QImode);
22275 /* Similarly to the above one_operand_shuffle code,
22276 just for repeated twice for each operand. merge_two:
22277 code will merge the two results together. */
22278 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22279 gen_lowpart (V32QImode, t6)));
22280 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22281 gen_lowpart (V32QImode, t6)));
22282 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22283 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22284 t7 = gen_reg_rtx (V4DImode);
22285 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22286 const2_rtx, GEN_INT (3),
22287 const0_rtx, const1_rtx));
22288 t8 = gen_reg_rtx (V4DImode);
22289 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22290 const2_rtx, GEN_INT (3),
22291 const0_rtx, const1_rtx));
22292 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22293 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22299 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22306 /* The XOP VPPERM insn supports three inputs. By ignoring the
22307 one_operand_shuffle special case, we avoid creating another
22308 set of constant vectors in memory. */
22309 one_operand_shuffle = false;
22311 /* mask = mask & {2*w-1, ...} */
22312 vt = GEN_INT (2*w - 1);
22316 /* mask = mask & {w-1, ...} */
22317 vt = GEN_INT (w - 1);
22320 for (i = 0; i < w; i++)
22322 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22323 mask = expand_simple_binop (maskmode, AND, mask, vt,
22324 NULL_RTX, 0, OPTAB_DIRECT);
22326 /* For non-QImode operations, convert the word permutation control
22327 into a byte permutation control. */
22328 if (mode != V16QImode)
22330 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22331 GEN_INT (exact_log2 (e)),
22332 NULL_RTX, 0, OPTAB_DIRECT);
22334 /* Convert mask to vector of chars. */
22335 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22337 /* Replicate each of the input bytes into byte positions:
22338 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22339 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22340 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22341 for (i = 0; i < 16; ++i)
22342 vec[i] = GEN_INT (i/e * e);
22343 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22344 vt = validize_mem (force_const_mem (V16QImode, vt));
22346 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22348 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22350 /* Convert it into the byte positions by doing
22351 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22352 for (i = 0; i < 16; ++i)
22353 vec[i] = GEN_INT (i % e);
22354 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22355 vt = validize_mem (force_const_mem (V16QImode, vt));
22356 emit_insn (gen_addv16qi3 (mask, mask, vt));
22359 /* The actual shuffle operations all operate on V16QImode. */
22360 op0 = gen_lowpart (V16QImode, op0);
22361 op1 = gen_lowpart (V16QImode, op1);
22365 if (GET_MODE (target) != V16QImode)
22366 target = gen_reg_rtx (V16QImode);
22367 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22368 if (target != operands[0])
22369 emit_move_insn (operands[0],
22370 gen_lowpart (GET_MODE (operands[0]), target));
22372 else if (one_operand_shuffle)
22374 if (GET_MODE (target) != V16QImode)
22375 target = gen_reg_rtx (V16QImode);
22376 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22377 if (target != operands[0])
22378 emit_move_insn (operands[0],
22379 gen_lowpart (GET_MODE (operands[0]), target));
22386 /* Shuffle the two input vectors independently. */
22387 t1 = gen_reg_rtx (V16QImode);
22388 t2 = gen_reg_rtx (V16QImode);
22389 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22390 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22393 /* Then merge them together. The key is whether any given control
22394 element contained a bit set that indicates the second word. */
22395 mask = operands[3];
22397 if (maskmode == V2DImode && !TARGET_SSE4_1)
22399 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22400 more shuffle to convert the V2DI input mask into a V4SI
22401 input mask. At which point the masking that expand_int_vcond
22402 will work as desired. */
22403 rtx t3 = gen_reg_rtx (V4SImode);
22404 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22405 const0_rtx, const0_rtx,
22406 const2_rtx, const2_rtx));
22408 maskmode = V4SImode;
22412 for (i = 0; i < w; i++)
22414 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22415 vt = force_reg (maskmode, vt);
22416 mask = expand_simple_binop (maskmode, AND, mask, vt,
22417 NULL_RTX, 0, OPTAB_DIRECT);
22419 if (GET_MODE (target) != mode)
22420 target = gen_reg_rtx (mode);
22422 xops[1] = gen_lowpart (mode, t2);
22423 xops[2] = gen_lowpart (mode, t1);
22424 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22427 ok = ix86_expand_int_vcond (xops);
22429 if (target != operands[0])
22430 emit_move_insn (operands[0],
22431 gen_lowpart (GET_MODE (operands[0]), target));
22435 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22436 true if we should do zero extension, else sign extension. HIGH_P is
22437 true if we want the N/2 high elements, else the low elements. */
22440 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22442 machine_mode imode = GET_MODE (src);
22447 rtx (*unpack)(rtx, rtx);
22448 rtx (*extract)(rtx, rtx) = NULL;
22449 machine_mode halfmode = BLKmode;
22455 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22457 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22458 halfmode = V32QImode;
22460 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22464 unpack = gen_avx2_zero_extendv16qiv16hi2;
22466 unpack = gen_avx2_sign_extendv16qiv16hi2;
22467 halfmode = V16QImode;
22469 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22473 unpack = gen_avx512f_zero_extendv16hiv16si2;
22475 unpack = gen_avx512f_sign_extendv16hiv16si2;
22476 halfmode = V16HImode;
22478 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22482 unpack = gen_avx2_zero_extendv8hiv8si2;
22484 unpack = gen_avx2_sign_extendv8hiv8si2;
22485 halfmode = V8HImode;
22487 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22491 unpack = gen_avx512f_zero_extendv8siv8di2;
22493 unpack = gen_avx512f_sign_extendv8siv8di2;
22494 halfmode = V8SImode;
22496 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22500 unpack = gen_avx2_zero_extendv4siv4di2;
22502 unpack = gen_avx2_sign_extendv4siv4di2;
22503 halfmode = V4SImode;
22505 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22509 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22511 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22515 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22517 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22521 unpack = gen_sse4_1_zero_extendv2siv2di2;
22523 unpack = gen_sse4_1_sign_extendv2siv2di2;
22526 gcc_unreachable ();
22529 if (GET_MODE_SIZE (imode) >= 32)
22531 tmp = gen_reg_rtx (halfmode);
22532 emit_insn (extract (tmp, src));
22536 /* Shift higher 8 bytes to lower 8 bytes. */
22537 tmp = gen_reg_rtx (V1TImode);
22538 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22540 tmp = gen_lowpart (imode, tmp);
22545 emit_insn (unpack (dest, tmp));
22549 rtx (*unpack)(rtx, rtx, rtx);
22555 unpack = gen_vec_interleave_highv16qi;
22557 unpack = gen_vec_interleave_lowv16qi;
22561 unpack = gen_vec_interleave_highv8hi;
22563 unpack = gen_vec_interleave_lowv8hi;
22567 unpack = gen_vec_interleave_highv4si;
22569 unpack = gen_vec_interleave_lowv4si;
22572 gcc_unreachable ();
22576 tmp = force_reg (imode, CONST0_RTX (imode));
22578 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22579 src, pc_rtx, pc_rtx);
22581 rtx tmp2 = gen_reg_rtx (imode);
22582 emit_insn (unpack (tmp2, src, tmp));
22583 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22587 /* Expand conditional increment or decrement using adb/sbb instructions.
22588 The default case using setcc followed by the conditional move can be
22589 done by generic code. */
22591 ix86_expand_int_addcc (rtx operands[])
22593 enum rtx_code code = GET_CODE (operands[1]);
22595 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22597 rtx val = const0_rtx;
22598 bool fpcmp = false;
22600 rtx op0 = XEXP (operands[1], 0);
22601 rtx op1 = XEXP (operands[1], 1);
22603 if (operands[3] != const1_rtx
22604 && operands[3] != constm1_rtx)
22606 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22608 code = GET_CODE (compare_op);
22610 flags = XEXP (compare_op, 0);
22612 if (GET_MODE (flags) == CCFPmode
22613 || GET_MODE (flags) == CCFPUmode)
22616 code = ix86_fp_compare_code_to_integer (code);
22623 PUT_CODE (compare_op,
22624 reverse_condition_maybe_unordered
22625 (GET_CODE (compare_op)));
22627 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22630 mode = GET_MODE (operands[0]);
22632 /* Construct either adc or sbb insn. */
22633 if ((code == LTU) == (operands[3] == constm1_rtx))
22638 insn = gen_subqi3_carry;
22641 insn = gen_subhi3_carry;
22644 insn = gen_subsi3_carry;
22647 insn = gen_subdi3_carry;
22650 gcc_unreachable ();
22658 insn = gen_addqi3_carry;
22661 insn = gen_addhi3_carry;
22664 insn = gen_addsi3_carry;
22667 insn = gen_adddi3_carry;
22670 gcc_unreachable ();
22673 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22679 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22680 but works for floating pointer parameters and nonoffsetable memories.
22681 For pushes, it returns just stack offsets; the values will be saved
22682 in the right order. Maximally three parts are generated. */
22685 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22690 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22692 size = (GET_MODE_SIZE (mode) + 4) / 8;
22694 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22695 gcc_assert (size >= 2 && size <= 4);
22697 /* Optimize constant pool reference to immediates. This is used by fp
22698 moves, that force all constants to memory to allow combining. */
22699 if (MEM_P (operand) && MEM_READONLY_P (operand))
22701 rtx tmp = maybe_get_pool_constant (operand);
22706 if (MEM_P (operand) && !offsettable_memref_p (operand))
22708 /* The only non-offsetable memories we handle are pushes. */
22709 int ok = push_operand (operand, VOIDmode);
22713 operand = copy_rtx (operand);
22714 PUT_MODE (operand, word_mode);
22715 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22719 if (GET_CODE (operand) == CONST_VECTOR)
22721 machine_mode imode = int_mode_for_mode (mode);
22722 /* Caution: if we looked through a constant pool memory above,
22723 the operand may actually have a different mode now. That's
22724 ok, since we want to pun this all the way back to an integer. */
22725 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22726 gcc_assert (operand != NULL);
22732 if (mode == DImode)
22733 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22738 if (REG_P (operand))
22740 gcc_assert (reload_completed);
22741 for (i = 0; i < size; i++)
22742 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22744 else if (offsettable_memref_p (operand))
22746 operand = adjust_address (operand, SImode, 0);
22747 parts[0] = operand;
22748 for (i = 1; i < size; i++)
22749 parts[i] = adjust_address (operand, SImode, 4 * i);
22751 else if (GET_CODE (operand) == CONST_DOUBLE)
22756 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22760 real_to_target (l, &r, mode);
22761 parts[3] = gen_int_mode (l[3], SImode);
22762 parts[2] = gen_int_mode (l[2], SImode);
22765 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22766 long double may not be 80-bit. */
22767 real_to_target (l, &r, mode);
22768 parts[2] = gen_int_mode (l[2], SImode);
22771 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22774 gcc_unreachable ();
22776 parts[1] = gen_int_mode (l[1], SImode);
22777 parts[0] = gen_int_mode (l[0], SImode);
22780 gcc_unreachable ();
22785 if (mode == TImode)
22786 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22787 if (mode == XFmode || mode == TFmode)
22789 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22790 if (REG_P (operand))
22792 gcc_assert (reload_completed);
22793 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22794 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22796 else if (offsettable_memref_p (operand))
22798 operand = adjust_address (operand, DImode, 0);
22799 parts[0] = operand;
22800 parts[1] = adjust_address (operand, upper_mode, 8);
22802 else if (GET_CODE (operand) == CONST_DOUBLE)
22807 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22808 real_to_target (l, &r, mode);
22810 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22811 if (HOST_BITS_PER_WIDE_INT >= 64)
22814 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22815 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22818 parts[0] = immed_double_const (l[0], l[1], DImode);
22820 if (upper_mode == SImode)
22821 parts[1] = gen_int_mode (l[2], SImode);
22822 else if (HOST_BITS_PER_WIDE_INT >= 64)
22825 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22826 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22829 parts[1] = immed_double_const (l[2], l[3], DImode);
22832 gcc_unreachable ();
22839 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22840 Return false when normal moves are needed; true when all required
22841 insns have been emitted. Operands 2-4 contain the input values
22842 int the correct order; operands 5-7 contain the output values. */
22845 ix86_split_long_move (rtx operands[])
22850 int collisions = 0;
22851 machine_mode mode = GET_MODE (operands[0]);
22852 bool collisionparts[4];
22854 /* The DFmode expanders may ask us to move double.
22855 For 64bit target this is single move. By hiding the fact
22856 here we simplify i386.md splitters. */
22857 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22859 /* Optimize constant pool reference to immediates. This is used by
22860 fp moves, that force all constants to memory to allow combining. */
22862 if (MEM_P (operands[1])
22863 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22864 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22865 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22866 if (push_operand (operands[0], VOIDmode))
22868 operands[0] = copy_rtx (operands[0]);
22869 PUT_MODE (operands[0], word_mode);
22872 operands[0] = gen_lowpart (DImode, operands[0]);
22873 operands[1] = gen_lowpart (DImode, operands[1]);
22874 emit_move_insn (operands[0], operands[1]);
22878 /* The only non-offsettable memory we handle is push. */
22879 if (push_operand (operands[0], VOIDmode))
22882 gcc_assert (!MEM_P (operands[0])
22883 || offsettable_memref_p (operands[0]));
22885 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22886 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22888 /* When emitting push, take care for source operands on the stack. */
22889 if (push && MEM_P (operands[1])
22890 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22892 rtx src_base = XEXP (part[1][nparts - 1], 0);
22894 /* Compensate for the stack decrement by 4. */
22895 if (!TARGET_64BIT && nparts == 3
22896 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22897 src_base = plus_constant (Pmode, src_base, 4);
22899 /* src_base refers to the stack pointer and is
22900 automatically decreased by emitted push. */
22901 for (i = 0; i < nparts; i++)
22902 part[1][i] = change_address (part[1][i],
22903 GET_MODE (part[1][i]), src_base);
22906 /* We need to do copy in the right order in case an address register
22907 of the source overlaps the destination. */
22908 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22912 for (i = 0; i < nparts; i++)
22915 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22916 if (collisionparts[i])
22920 /* Collision in the middle part can be handled by reordering. */
22921 if (collisions == 1 && nparts == 3 && collisionparts [1])
22923 std::swap (part[0][1], part[0][2]);
22924 std::swap (part[1][1], part[1][2]);
22926 else if (collisions == 1
22928 && (collisionparts [1] || collisionparts [2]))
22930 if (collisionparts [1])
22932 std::swap (part[0][1], part[0][2]);
22933 std::swap (part[1][1], part[1][2]);
22937 std::swap (part[0][2], part[0][3]);
22938 std::swap (part[1][2], part[1][3]);
22942 /* If there are more collisions, we can't handle it by reordering.
22943 Do an lea to the last part and use only one colliding move. */
22944 else if (collisions > 1)
22950 base = part[0][nparts - 1];
22952 /* Handle the case when the last part isn't valid for lea.
22953 Happens in 64-bit mode storing the 12-byte XFmode. */
22954 if (GET_MODE (base) != Pmode)
22955 base = gen_rtx_REG (Pmode, REGNO (base));
22957 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22958 part[1][0] = replace_equiv_address (part[1][0], base);
22959 for (i = 1; i < nparts; i++)
22961 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22962 part[1][i] = replace_equiv_address (part[1][i], tmp);
22973 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22974 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22975 stack_pointer_rtx, GEN_INT (-4)));
22976 emit_move_insn (part[0][2], part[1][2]);
22978 else if (nparts == 4)
22980 emit_move_insn (part[0][3], part[1][3]);
22981 emit_move_insn (part[0][2], part[1][2]);
22986 /* In 64bit mode we don't have 32bit push available. In case this is
22987 register, it is OK - we will just use larger counterpart. We also
22988 retype memory - these comes from attempt to avoid REX prefix on
22989 moving of second half of TFmode value. */
22990 if (GET_MODE (part[1][1]) == SImode)
22992 switch (GET_CODE (part[1][1]))
22995 part[1][1] = adjust_address (part[1][1], DImode, 0);
22999 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23003 gcc_unreachable ();
23006 if (GET_MODE (part[1][0]) == SImode)
23007 part[1][0] = part[1][1];
23010 emit_move_insn (part[0][1], part[1][1]);
23011 emit_move_insn (part[0][0], part[1][0]);
23015 /* Choose correct order to not overwrite the source before it is copied. */
23016 if ((REG_P (part[0][0])
23017 && REG_P (part[1][1])
23018 && (REGNO (part[0][0]) == REGNO (part[1][1])
23020 && REGNO (part[0][0]) == REGNO (part[1][2]))
23022 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23024 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23026 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23028 operands[2 + i] = part[0][j];
23029 operands[6 + i] = part[1][j];
23034 for (i = 0; i < nparts; i++)
23036 operands[2 + i] = part[0][i];
23037 operands[6 + i] = part[1][i];
23041 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23042 if (optimize_insn_for_size_p ())
23044 for (j = 0; j < nparts - 1; j++)
23045 if (CONST_INT_P (operands[6 + j])
23046 && operands[6 + j] != const0_rtx
23047 && REG_P (operands[2 + j]))
23048 for (i = j; i < nparts - 1; i++)
23049 if (CONST_INT_P (operands[7 + i])
23050 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23051 operands[7 + i] = operands[2 + j];
23054 for (i = 0; i < nparts; i++)
23055 emit_move_insn (operands[2 + i], operands[6 + i]);
23060 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23061 left shift by a constant, either using a single shift or
23062 a sequence of add instructions. */
23065 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23067 rtx (*insn)(rtx, rtx, rtx);
23070 || (count * ix86_cost->add <= ix86_cost->shift_const
23071 && !optimize_insn_for_size_p ()))
23073 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23074 while (count-- > 0)
23075 emit_insn (insn (operand, operand, operand));
23079 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23080 emit_insn (insn (operand, operand, GEN_INT (count)));
23085 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23087 rtx (*gen_ashl3)(rtx, rtx, rtx);
23088 rtx (*gen_shld)(rtx, rtx, rtx);
23089 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23091 rtx low[2], high[2];
23094 if (CONST_INT_P (operands[2]))
23096 split_double_mode (mode, operands, 2, low, high);
23097 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23099 if (count >= half_width)
23101 emit_move_insn (high[0], low[1]);
23102 emit_move_insn (low[0], const0_rtx);
23104 if (count > half_width)
23105 ix86_expand_ashl_const (high[0], count - half_width, mode);
23109 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23111 if (!rtx_equal_p (operands[0], operands[1]))
23112 emit_move_insn (operands[0], operands[1]);
23114 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23115 ix86_expand_ashl_const (low[0], count, mode);
23120 split_double_mode (mode, operands, 1, low, high);
23122 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23124 if (operands[1] == const1_rtx)
23126 /* Assuming we've chosen a QImode capable registers, then 1 << N
23127 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23128 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23130 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23132 ix86_expand_clear (low[0]);
23133 ix86_expand_clear (high[0]);
23134 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23136 d = gen_lowpart (QImode, low[0]);
23137 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23138 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23139 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23141 d = gen_lowpart (QImode, high[0]);
23142 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23143 s = gen_rtx_NE (QImode, flags, const0_rtx);
23144 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23147 /* Otherwise, we can get the same results by manually performing
23148 a bit extract operation on bit 5/6, and then performing the two
23149 shifts. The two methods of getting 0/1 into low/high are exactly
23150 the same size. Avoiding the shift in the bit extract case helps
23151 pentium4 a bit; no one else seems to care much either way. */
23154 machine_mode half_mode;
23155 rtx (*gen_lshr3)(rtx, rtx, rtx);
23156 rtx (*gen_and3)(rtx, rtx, rtx);
23157 rtx (*gen_xor3)(rtx, rtx, rtx);
23158 HOST_WIDE_INT bits;
23161 if (mode == DImode)
23163 half_mode = SImode;
23164 gen_lshr3 = gen_lshrsi3;
23165 gen_and3 = gen_andsi3;
23166 gen_xor3 = gen_xorsi3;
23171 half_mode = DImode;
23172 gen_lshr3 = gen_lshrdi3;
23173 gen_and3 = gen_anddi3;
23174 gen_xor3 = gen_xordi3;
23178 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23179 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23181 x = gen_lowpart (half_mode, operands[2]);
23182 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23184 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23185 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23186 emit_move_insn (low[0], high[0]);
23187 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23190 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23191 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23195 if (operands[1] == constm1_rtx)
23197 /* For -1 << N, we can avoid the shld instruction, because we
23198 know that we're shifting 0...31/63 ones into a -1. */
23199 emit_move_insn (low[0], constm1_rtx);
23200 if (optimize_insn_for_size_p ())
23201 emit_move_insn (high[0], low[0]);
23203 emit_move_insn (high[0], constm1_rtx);
23207 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23209 if (!rtx_equal_p (operands[0], operands[1]))
23210 emit_move_insn (operands[0], operands[1]);
23212 split_double_mode (mode, operands, 1, low, high);
23213 emit_insn (gen_shld (high[0], low[0], operands[2]));
23216 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23218 if (TARGET_CMOVE && scratch)
23220 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23221 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23223 ix86_expand_clear (scratch);
23224 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23228 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23229 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23231 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23236 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23238 rtx (*gen_ashr3)(rtx, rtx, rtx)
23239 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23240 rtx (*gen_shrd)(rtx, rtx, rtx);
23241 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23243 rtx low[2], high[2];
23246 if (CONST_INT_P (operands[2]))
23248 split_double_mode (mode, operands, 2, low, high);
23249 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23251 if (count == GET_MODE_BITSIZE (mode) - 1)
23253 emit_move_insn (high[0], high[1]);
23254 emit_insn (gen_ashr3 (high[0], high[0],
23255 GEN_INT (half_width - 1)));
23256 emit_move_insn (low[0], high[0]);
23259 else if (count >= half_width)
23261 emit_move_insn (low[0], high[1]);
23262 emit_move_insn (high[0], low[0]);
23263 emit_insn (gen_ashr3 (high[0], high[0],
23264 GEN_INT (half_width - 1)));
23266 if (count > half_width)
23267 emit_insn (gen_ashr3 (low[0], low[0],
23268 GEN_INT (count - half_width)));
23272 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23274 if (!rtx_equal_p (operands[0], operands[1]))
23275 emit_move_insn (operands[0], operands[1]);
23277 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23278 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23283 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23285 if (!rtx_equal_p (operands[0], operands[1]))
23286 emit_move_insn (operands[0], operands[1]);
23288 split_double_mode (mode, operands, 1, low, high);
23290 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23291 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23293 if (TARGET_CMOVE && scratch)
23295 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23296 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23298 emit_move_insn (scratch, high[0]);
23299 emit_insn (gen_ashr3 (scratch, scratch,
23300 GEN_INT (half_width - 1)));
23301 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23306 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23307 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23309 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23315 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23317 rtx (*gen_lshr3)(rtx, rtx, rtx)
23318 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23319 rtx (*gen_shrd)(rtx, rtx, rtx);
23320 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23322 rtx low[2], high[2];
23325 if (CONST_INT_P (operands[2]))
23327 split_double_mode (mode, operands, 2, low, high);
23328 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23330 if (count >= half_width)
23332 emit_move_insn (low[0], high[1]);
23333 ix86_expand_clear (high[0]);
23335 if (count > half_width)
23336 emit_insn (gen_lshr3 (low[0], low[0],
23337 GEN_INT (count - half_width)));
23341 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23343 if (!rtx_equal_p (operands[0], operands[1]))
23344 emit_move_insn (operands[0], operands[1]);
23346 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23347 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23352 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23354 if (!rtx_equal_p (operands[0], operands[1]))
23355 emit_move_insn (operands[0], operands[1]);
23357 split_double_mode (mode, operands, 1, low, high);
23359 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23360 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23362 if (TARGET_CMOVE && scratch)
23364 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23365 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23367 ix86_expand_clear (scratch);
23368 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23373 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23374 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23376 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23381 /* Predict just emitted jump instruction to be taken with probability PROB. */
23383 predict_jump (int prob)
23385 rtx insn = get_last_insn ();
23386 gcc_assert (JUMP_P (insn));
23387 add_int_reg_note (insn, REG_BR_PROB, prob);
23390 /* Helper function for the string operations below. Dest VARIABLE whether
23391 it is aligned to VALUE bytes. If true, jump to the label. */
23392 static rtx_code_label *
23393 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23395 rtx_code_label *label = gen_label_rtx ();
23396 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23397 if (GET_MODE (variable) == DImode)
23398 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23400 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23401 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23404 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23406 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23410 /* Adjust COUNTER by the VALUE. */
23412 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23414 rtx (*gen_add)(rtx, rtx, rtx)
23415 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23417 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23420 /* Zero extend possibly SImode EXP to Pmode register. */
23422 ix86_zero_extend_to_Pmode (rtx exp)
23424 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23427 /* Divide COUNTREG by SCALE. */
23429 scale_counter (rtx countreg, int scale)
23435 if (CONST_INT_P (countreg))
23436 return GEN_INT (INTVAL (countreg) / scale);
23437 gcc_assert (REG_P (countreg));
23439 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23440 GEN_INT (exact_log2 (scale)),
23441 NULL, 1, OPTAB_DIRECT);
23445 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23446 DImode for constant loop counts. */
23448 static machine_mode
23449 counter_mode (rtx count_exp)
23451 if (GET_MODE (count_exp) != VOIDmode)
23452 return GET_MODE (count_exp);
23453 if (!CONST_INT_P (count_exp))
23455 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23460 /* Copy the address to a Pmode register. This is used for x32 to
23461 truncate DImode TLS address to a SImode register. */
23464 ix86_copy_addr_to_reg (rtx addr)
23467 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23469 reg = copy_addr_to_reg (addr);
23470 REG_POINTER (reg) = 1;
23475 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23476 reg = copy_to_mode_reg (DImode, addr);
23477 REG_POINTER (reg) = 1;
23478 return gen_rtx_SUBREG (SImode, reg, 0);
23482 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23483 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23484 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23485 memory by VALUE (supposed to be in MODE).
23487 The size is rounded down to whole number of chunk size moved at once.
23488 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23492 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23493 rtx destptr, rtx srcptr, rtx value,
23494 rtx count, machine_mode mode, int unroll,
23495 int expected_size, bool issetmem)
23497 rtx_code_label *out_label, *top_label;
23499 machine_mode iter_mode = counter_mode (count);
23500 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23501 rtx piece_size = GEN_INT (piece_size_n);
23502 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23506 top_label = gen_label_rtx ();
23507 out_label = gen_label_rtx ();
23508 iter = gen_reg_rtx (iter_mode);
23510 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23511 NULL, 1, OPTAB_DIRECT);
23512 /* Those two should combine. */
23513 if (piece_size == const1_rtx)
23515 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23517 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23519 emit_move_insn (iter, const0_rtx);
23521 emit_label (top_label);
23523 tmp = convert_modes (Pmode, iter_mode, iter, true);
23525 /* This assert could be relaxed - in this case we'll need to compute
23526 smallest power of two, containing in PIECE_SIZE_N and pass it to
23528 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23529 destmem = offset_address (destmem, tmp, piece_size_n);
23530 destmem = adjust_address (destmem, mode, 0);
23534 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23535 srcmem = adjust_address (srcmem, mode, 0);
23537 /* When unrolling for chips that reorder memory reads and writes,
23538 we can save registers by using single temporary.
23539 Also using 4 temporaries is overkill in 32bit mode. */
23540 if (!TARGET_64BIT && 0)
23542 for (i = 0; i < unroll; i++)
23547 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23549 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23551 emit_move_insn (destmem, srcmem);
23557 gcc_assert (unroll <= 4);
23558 for (i = 0; i < unroll; i++)
23560 tmpreg[i] = gen_reg_rtx (mode);
23564 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23566 emit_move_insn (tmpreg[i], srcmem);
23568 for (i = 0; i < unroll; i++)
23573 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23575 emit_move_insn (destmem, tmpreg[i]);
23580 for (i = 0; i < unroll; i++)
23584 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23585 emit_move_insn (destmem, value);
23588 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23589 true, OPTAB_LIB_WIDEN);
23591 emit_move_insn (iter, tmp);
23593 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23595 if (expected_size != -1)
23597 expected_size /= GET_MODE_SIZE (mode) * unroll;
23598 if (expected_size == 0)
23600 else if (expected_size > REG_BR_PROB_BASE)
23601 predict_jump (REG_BR_PROB_BASE - 1);
23603 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23606 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23607 iter = ix86_zero_extend_to_Pmode (iter);
23608 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23609 true, OPTAB_LIB_WIDEN);
23610 if (tmp != destptr)
23611 emit_move_insn (destptr, tmp);
23614 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23615 true, OPTAB_LIB_WIDEN);
23617 emit_move_insn (srcptr, tmp);
23619 emit_label (out_label);
23622 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23623 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23624 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23625 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23626 ORIG_VALUE is the original value passed to memset to fill the memory with.
23627 Other arguments have same meaning as for previous function. */
23630 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23631 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23633 machine_mode mode, bool issetmem)
23638 HOST_WIDE_INT rounded_count;
23640 /* If possible, it is shorter to use rep movs.
23641 TODO: Maybe it is better to move this logic to decide_alg. */
23642 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23643 && (!issetmem || orig_value == const0_rtx))
23646 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23647 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23649 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23650 GET_MODE_SIZE (mode)));
23651 if (mode != QImode)
23653 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23654 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23655 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23658 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23659 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23661 rounded_count = (INTVAL (count)
23662 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23663 destmem = shallow_copy_rtx (destmem);
23664 set_mem_size (destmem, rounded_count);
23666 else if (MEM_SIZE_KNOWN_P (destmem))
23667 clear_mem_size (destmem);
23671 value = force_reg (mode, gen_lowpart (mode, value));
23672 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23676 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23677 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23678 if (mode != QImode)
23680 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23681 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23682 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23685 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23686 if (CONST_INT_P (count))
23688 rounded_count = (INTVAL (count)
23689 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23690 srcmem = shallow_copy_rtx (srcmem);
23691 set_mem_size (srcmem, rounded_count);
23695 if (MEM_SIZE_KNOWN_P (srcmem))
23696 clear_mem_size (srcmem);
23698 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23703 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23705 SRC is passed by pointer to be updated on return.
23706 Return value is updated DST. */
23708 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23709 HOST_WIDE_INT size_to_move)
23711 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23712 enum insn_code code;
23713 machine_mode move_mode;
23716 /* Find the widest mode in which we could perform moves.
23717 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23718 it until move of such size is supported. */
23719 piece_size = 1 << floor_log2 (size_to_move);
23720 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23721 code = optab_handler (mov_optab, move_mode);
23722 while (code == CODE_FOR_nothing && piece_size > 1)
23725 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23726 code = optab_handler (mov_optab, move_mode);
23729 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23730 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23731 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23733 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23734 move_mode = mode_for_vector (word_mode, nunits);
23735 code = optab_handler (mov_optab, move_mode);
23736 if (code == CODE_FOR_nothing)
23738 move_mode = word_mode;
23739 piece_size = GET_MODE_SIZE (move_mode);
23740 code = optab_handler (mov_optab, move_mode);
23743 gcc_assert (code != CODE_FOR_nothing);
23745 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23746 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23748 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23749 gcc_assert (size_to_move % piece_size == 0);
23750 adjust = GEN_INT (piece_size);
23751 for (i = 0; i < size_to_move; i += piece_size)
23753 /* We move from memory to memory, so we'll need to do it via
23754 a temporary register. */
23755 tempreg = gen_reg_rtx (move_mode);
23756 emit_insn (GEN_FCN (code) (tempreg, src));
23757 emit_insn (GEN_FCN (code) (dst, tempreg));
23759 emit_move_insn (destptr,
23760 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23761 emit_move_insn (srcptr,
23762 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23764 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23766 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23770 /* Update DST and SRC rtx. */
23775 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23777 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23778 rtx destptr, rtx srcptr, rtx count, int max_size)
23781 if (CONST_INT_P (count))
23783 HOST_WIDE_INT countval = INTVAL (count);
23784 HOST_WIDE_INT epilogue_size = countval % max_size;
23787 /* For now MAX_SIZE should be a power of 2. This assert could be
23788 relaxed, but it'll require a bit more complicated epilogue
23790 gcc_assert ((max_size & (max_size - 1)) == 0);
23791 for (i = max_size; i >= 1; i >>= 1)
23793 if (epilogue_size & i)
23794 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23800 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23801 count, 1, OPTAB_DIRECT);
23802 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23803 count, QImode, 1, 4, false);
23807 /* When there are stringops, we can cheaply increase dest and src pointers.
23808 Otherwise we save code size by maintaining offset (zero is readily
23809 available from preceding rep operation) and using x86 addressing modes.
23811 if (TARGET_SINGLE_STRINGOP)
23815 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23816 src = change_address (srcmem, SImode, srcptr);
23817 dest = change_address (destmem, SImode, destptr);
23818 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23819 emit_label (label);
23820 LABEL_NUSES (label) = 1;
23824 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23825 src = change_address (srcmem, HImode, srcptr);
23826 dest = change_address (destmem, HImode, destptr);
23827 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23828 emit_label (label);
23829 LABEL_NUSES (label) = 1;
23833 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23834 src = change_address (srcmem, QImode, srcptr);
23835 dest = change_address (destmem, QImode, destptr);
23836 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23837 emit_label (label);
23838 LABEL_NUSES (label) = 1;
23843 rtx offset = force_reg (Pmode, const0_rtx);
23848 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23849 src = change_address (srcmem, SImode, srcptr);
23850 dest = change_address (destmem, SImode, destptr);
23851 emit_move_insn (dest, src);
23852 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23853 true, OPTAB_LIB_WIDEN);
23855 emit_move_insn (offset, tmp);
23856 emit_label (label);
23857 LABEL_NUSES (label) = 1;
23861 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23862 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23863 src = change_address (srcmem, HImode, tmp);
23864 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23865 dest = change_address (destmem, HImode, tmp);
23866 emit_move_insn (dest, src);
23867 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23868 true, OPTAB_LIB_WIDEN);
23870 emit_move_insn (offset, tmp);
23871 emit_label (label);
23872 LABEL_NUSES (label) = 1;
23876 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23877 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23878 src = change_address (srcmem, QImode, tmp);
23879 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23880 dest = change_address (destmem, QImode, tmp);
23881 emit_move_insn (dest, src);
23882 emit_label (label);
23883 LABEL_NUSES (label) = 1;
23888 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23889 with value PROMOTED_VAL.
23890 SRC is passed by pointer to be updated on return.
23891 Return value is updated DST. */
23893 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23894 HOST_WIDE_INT size_to_move)
23896 rtx dst = destmem, adjust;
23897 enum insn_code code;
23898 machine_mode move_mode;
23901 /* Find the widest mode in which we could perform moves.
23902 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23903 it until move of such size is supported. */
23904 move_mode = GET_MODE (promoted_val);
23905 if (move_mode == VOIDmode)
23906 move_mode = QImode;
23907 if (size_to_move < GET_MODE_SIZE (move_mode))
23909 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23910 promoted_val = gen_lowpart (move_mode, promoted_val);
23912 piece_size = GET_MODE_SIZE (move_mode);
23913 code = optab_handler (mov_optab, move_mode);
23914 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23916 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23918 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23919 gcc_assert (size_to_move % piece_size == 0);
23920 adjust = GEN_INT (piece_size);
23921 for (i = 0; i < size_to_move; i += piece_size)
23923 if (piece_size <= GET_MODE_SIZE (word_mode))
23925 emit_insn (gen_strset (destptr, dst, promoted_val));
23926 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23931 emit_insn (GEN_FCN (code) (dst, promoted_val));
23933 emit_move_insn (destptr,
23934 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23936 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23940 /* Update DST rtx. */
23943 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23945 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23946 rtx count, int max_size)
23949 expand_simple_binop (counter_mode (count), AND, count,
23950 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23951 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23952 gen_lowpart (QImode, value), count, QImode,
23953 1, max_size / 2, true);
23956 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23958 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23959 rtx count, int max_size)
23963 if (CONST_INT_P (count))
23965 HOST_WIDE_INT countval = INTVAL (count);
23966 HOST_WIDE_INT epilogue_size = countval % max_size;
23969 /* For now MAX_SIZE should be a power of 2. This assert could be
23970 relaxed, but it'll require a bit more complicated epilogue
23972 gcc_assert ((max_size & (max_size - 1)) == 0);
23973 for (i = max_size; i >= 1; i >>= 1)
23975 if (epilogue_size & i)
23977 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23978 destmem = emit_memset (destmem, destptr, vec_value, i);
23980 destmem = emit_memset (destmem, destptr, value, i);
23987 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23992 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23995 dest = change_address (destmem, DImode, destptr);
23996 emit_insn (gen_strset (destptr, dest, value));
23997 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23998 emit_insn (gen_strset (destptr, dest, value));
24002 dest = change_address (destmem, SImode, destptr);
24003 emit_insn (gen_strset (destptr, dest, value));
24004 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24005 emit_insn (gen_strset (destptr, dest, value));
24006 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24007 emit_insn (gen_strset (destptr, dest, value));
24008 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24009 emit_insn (gen_strset (destptr, dest, value));
24011 emit_label (label);
24012 LABEL_NUSES (label) = 1;
24016 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24019 dest = change_address (destmem, DImode, destptr);
24020 emit_insn (gen_strset (destptr, dest, value));
24024 dest = change_address (destmem, SImode, destptr);
24025 emit_insn (gen_strset (destptr, dest, value));
24026 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24027 emit_insn (gen_strset (destptr, dest, value));
24029 emit_label (label);
24030 LABEL_NUSES (label) = 1;
24034 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24035 dest = change_address (destmem, SImode, destptr);
24036 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24037 emit_label (label);
24038 LABEL_NUSES (label) = 1;
24042 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24043 dest = change_address (destmem, HImode, destptr);
24044 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24045 emit_label (label);
24046 LABEL_NUSES (label) = 1;
24050 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24051 dest = change_address (destmem, QImode, destptr);
24052 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24053 emit_label (label);
24054 LABEL_NUSES (label) = 1;
24058 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24059 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24060 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24062 Return value is updated DESTMEM. */
24064 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24065 rtx destptr, rtx srcptr, rtx value,
24066 rtx vec_value, rtx count, int align,
24067 int desired_alignment, bool issetmem)
24070 for (i = 1; i < desired_alignment; i <<= 1)
24074 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24077 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24078 destmem = emit_memset (destmem, destptr, vec_value, i);
24080 destmem = emit_memset (destmem, destptr, value, i);
24083 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24084 ix86_adjust_counter (count, i);
24085 emit_label (label);
24086 LABEL_NUSES (label) = 1;
24087 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24093 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24094 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24095 and jump to DONE_LABEL. */
24097 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24098 rtx destptr, rtx srcptr,
24099 rtx value, rtx vec_value,
24100 rtx count, int size,
24101 rtx done_label, bool issetmem)
24103 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24104 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24108 /* If we do not have vector value to copy, we must reduce size. */
24113 if (GET_MODE (value) == VOIDmode && size > 8)
24115 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24116 mode = GET_MODE (value);
24119 mode = GET_MODE (vec_value), value = vec_value;
24123 /* Choose appropriate vector mode. */
24125 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24126 else if (size >= 16)
24127 mode = TARGET_SSE ? V16QImode : DImode;
24128 srcmem = change_address (srcmem, mode, srcptr);
24130 destmem = change_address (destmem, mode, destptr);
24131 modesize = GEN_INT (GET_MODE_SIZE (mode));
24132 gcc_assert (GET_MODE_SIZE (mode) <= size);
24133 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24136 emit_move_insn (destmem, gen_lowpart (mode, value));
24139 emit_move_insn (destmem, srcmem);
24140 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24142 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24145 destmem = offset_address (destmem, count, 1);
24146 destmem = offset_address (destmem, GEN_INT (-2 * size),
24147 GET_MODE_SIZE (mode));
24150 srcmem = offset_address (srcmem, count, 1);
24151 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24152 GET_MODE_SIZE (mode));
24154 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24157 emit_move_insn (destmem, gen_lowpart (mode, value));
24160 emit_move_insn (destmem, srcmem);
24161 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24163 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24165 emit_jump_insn (gen_jump (done_label));
24168 emit_label (label);
24169 LABEL_NUSES (label) = 1;
24172 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24173 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24174 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24175 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24176 DONE_LABEL is a label after the whole copying sequence. The label is created
24177 on demand if *DONE_LABEL is NULL.
24178 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24179 bounds after the initial copies.
24181 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24182 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24183 we will dispatch to a library call for large blocks.
24185 In pseudocode we do:
24189 Assume that SIZE is 4. Bigger sizes are handled analogously
24192 copy 4 bytes from SRCPTR to DESTPTR
24193 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24198 copy 1 byte from SRCPTR to DESTPTR
24201 copy 2 bytes from SRCPTR to DESTPTR
24202 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24207 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24208 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24210 OLD_DESPTR = DESTPTR;
24211 Align DESTPTR up to DESIRED_ALIGN
24212 SRCPTR += DESTPTR - OLD_DESTPTR
24213 COUNT -= DEST_PTR - OLD_DESTPTR
24215 Round COUNT down to multiple of SIZE
24216 << optional caller supplied zero size guard is here >>
24217 << optional caller suppplied dynamic check is here >>
24218 << caller supplied main copy loop is here >>
24223 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24224 rtx *destptr, rtx *srcptr,
24226 rtx value, rtx vec_value,
24228 rtx_code_label **done_label,
24232 unsigned HOST_WIDE_INT *min_size,
24233 bool dynamic_check,
24236 rtx_code_label *loop_label = NULL, *label;
24239 int prolog_size = 0;
24242 /* Chose proper value to copy. */
24243 if (issetmem && VECTOR_MODE_P (mode))
24244 mode_value = vec_value;
24246 mode_value = value;
24247 gcc_assert (GET_MODE_SIZE (mode) <= size);
24249 /* See if block is big or small, handle small blocks. */
24250 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24253 loop_label = gen_label_rtx ();
24256 *done_label = gen_label_rtx ();
24258 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24262 /* Handle sizes > 3. */
24263 for (;size2 > 2; size2 >>= 1)
24264 expand_small_movmem_or_setmem (destmem, srcmem,
24268 size2, *done_label, issetmem);
24269 /* Nothing to copy? Jump to DONE_LABEL if so */
24270 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24273 /* Do a byte copy. */
24274 destmem = change_address (destmem, QImode, *destptr);
24276 emit_move_insn (destmem, gen_lowpart (QImode, value));
24279 srcmem = change_address (srcmem, QImode, *srcptr);
24280 emit_move_insn (destmem, srcmem);
24283 /* Handle sizes 2 and 3. */
24284 label = ix86_expand_aligntest (*count, 2, false);
24285 destmem = change_address (destmem, HImode, *destptr);
24286 destmem = offset_address (destmem, *count, 1);
24287 destmem = offset_address (destmem, GEN_INT (-2), 2);
24289 emit_move_insn (destmem, gen_lowpart (HImode, value));
24292 srcmem = change_address (srcmem, HImode, *srcptr);
24293 srcmem = offset_address (srcmem, *count, 1);
24294 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24295 emit_move_insn (destmem, srcmem);
24298 emit_label (label);
24299 LABEL_NUSES (label) = 1;
24300 emit_jump_insn (gen_jump (*done_label));
24304 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24305 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24307 /* Start memcpy for COUNT >= SIZE. */
24310 emit_label (loop_label);
24311 LABEL_NUSES (loop_label) = 1;
24314 /* Copy first desired_align bytes. */
24316 srcmem = change_address (srcmem, mode, *srcptr);
24317 destmem = change_address (destmem, mode, *destptr);
24318 modesize = GEN_INT (GET_MODE_SIZE (mode));
24319 for (n = 0; prolog_size < desired_align - align; n++)
24322 emit_move_insn (destmem, mode_value);
24325 emit_move_insn (destmem, srcmem);
24326 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24328 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24329 prolog_size += GET_MODE_SIZE (mode);
24333 /* Copy last SIZE bytes. */
24334 destmem = offset_address (destmem, *count, 1);
24335 destmem = offset_address (destmem,
24336 GEN_INT (-size - prolog_size),
24339 emit_move_insn (destmem, mode_value);
24342 srcmem = offset_address (srcmem, *count, 1);
24343 srcmem = offset_address (srcmem,
24344 GEN_INT (-size - prolog_size),
24346 emit_move_insn (destmem, srcmem);
24348 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24350 destmem = offset_address (destmem, modesize, 1);
24352 emit_move_insn (destmem, mode_value);
24355 srcmem = offset_address (srcmem, modesize, 1);
24356 emit_move_insn (destmem, srcmem);
24360 /* Align destination. */
24361 if (desired_align > 1 && desired_align > align)
24363 rtx saveddest = *destptr;
24365 gcc_assert (desired_align <= size);
24366 /* Align destptr up, place it to new register. */
24367 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24368 GEN_INT (prolog_size),
24369 NULL_RTX, 1, OPTAB_DIRECT);
24370 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24371 REG_POINTER (*destptr) = 1;
24372 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24373 GEN_INT (-desired_align),
24374 *destptr, 1, OPTAB_DIRECT);
24375 /* See how many bytes we skipped. */
24376 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24378 saveddest, 1, OPTAB_DIRECT);
24379 /* Adjust srcptr and count. */
24381 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24382 saveddest, *srcptr, 1, OPTAB_DIRECT);
24383 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24384 saveddest, *count, 1, OPTAB_DIRECT);
24385 /* We copied at most size + prolog_size. */
24386 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24387 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24391 /* Our loops always round down the bock size, but for dispatch to library
24392 we need precise value. */
24394 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24395 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24399 gcc_assert (prolog_size == 0);
24400 /* Decrease count, so we won't end up copying last word twice. */
24401 if (!CONST_INT_P (*count))
24402 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24403 constm1_rtx, *count, 1, OPTAB_DIRECT);
24405 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24407 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24412 /* This function is like the previous one, except here we know how many bytes
24413 need to be copied. That allows us to update alignment not only of DST, which
24414 is returned, but also of SRC, which is passed as a pointer for that
24417 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24418 rtx srcreg, rtx value, rtx vec_value,
24419 int desired_align, int align_bytes,
24423 rtx orig_dst = dst;
24424 rtx orig_src = NULL;
24425 int piece_size = 1;
24426 int copied_bytes = 0;
24430 gcc_assert (srcp != NULL);
24435 for (piece_size = 1;
24436 piece_size <= desired_align && copied_bytes < align_bytes;
24439 if (align_bytes & piece_size)
24443 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24444 dst = emit_memset (dst, destreg, vec_value, piece_size);
24446 dst = emit_memset (dst, destreg, value, piece_size);
24449 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24450 copied_bytes += piece_size;
24453 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24454 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24455 if (MEM_SIZE_KNOWN_P (orig_dst))
24456 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24460 int src_align_bytes = get_mem_align_offset (src, desired_align
24462 if (src_align_bytes >= 0)
24463 src_align_bytes = desired_align - src_align_bytes;
24464 if (src_align_bytes >= 0)
24466 unsigned int src_align;
24467 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24469 if ((src_align_bytes & (src_align - 1))
24470 == (align_bytes & (src_align - 1)))
24473 if (src_align > (unsigned int) desired_align)
24474 src_align = desired_align;
24475 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24476 set_mem_align (src, src_align * BITS_PER_UNIT);
24478 if (MEM_SIZE_KNOWN_P (orig_src))
24479 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24486 /* Return true if ALG can be used in current context.
24487 Assume we expand memset if MEMSET is true. */
24489 alg_usable_p (enum stringop_alg alg, bool memset)
24491 if (alg == no_stringop)
24493 if (alg == vector_loop)
24494 return TARGET_SSE || TARGET_AVX;
24495 /* Algorithms using the rep prefix want at least edi and ecx;
24496 additionally, memset wants eax and memcpy wants esi. Don't
24497 consider such algorithms if the user has appropriated those
24498 registers for their own purposes. */
24499 if (alg == rep_prefix_1_byte
24500 || alg == rep_prefix_4_byte
24501 || alg == rep_prefix_8_byte)
24502 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24503 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24507 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24508 static enum stringop_alg
24509 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24510 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24511 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24513 const struct stringop_algs * algs;
24514 bool optimize_for_speed;
24516 const struct processor_costs *cost;
24518 bool any_alg_usable_p = false;
24521 *dynamic_check = -1;
24523 /* Even if the string operation call is cold, we still might spend a lot
24524 of time processing large blocks. */
24525 if (optimize_function_for_size_p (cfun)
24526 || (optimize_insn_for_size_p ()
24528 || (expected_size != -1 && expected_size < 256))))
24529 optimize_for_speed = false;
24531 optimize_for_speed = true;
24533 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24535 algs = &cost->memset[TARGET_64BIT != 0];
24537 algs = &cost->memcpy[TARGET_64BIT != 0];
24539 /* See maximal size for user defined algorithm. */
24540 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24542 enum stringop_alg candidate = algs->size[i].alg;
24543 bool usable = alg_usable_p (candidate, memset);
24544 any_alg_usable_p |= usable;
24546 if (candidate != libcall && candidate && usable)
24547 max = algs->size[i].max;
24550 /* If expected size is not known but max size is small enough
24551 so inline version is a win, set expected size into
24553 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24554 && expected_size == -1)
24555 expected_size = min_size / 2 + max_size / 2;
24557 /* If user specified the algorithm, honnor it if possible. */
24558 if (ix86_stringop_alg != no_stringop
24559 && alg_usable_p (ix86_stringop_alg, memset))
24560 return ix86_stringop_alg;
24561 /* rep; movq or rep; movl is the smallest variant. */
24562 else if (!optimize_for_speed)
24565 if (!count || (count & 3) || (memset && !zero_memset))
24566 return alg_usable_p (rep_prefix_1_byte, memset)
24567 ? rep_prefix_1_byte : loop_1_byte;
24569 return alg_usable_p (rep_prefix_4_byte, memset)
24570 ? rep_prefix_4_byte : loop;
24572 /* Very tiny blocks are best handled via the loop, REP is expensive to
24574 else if (expected_size != -1 && expected_size < 4)
24575 return loop_1_byte;
24576 else if (expected_size != -1)
24578 enum stringop_alg alg = libcall;
24579 bool alg_noalign = false;
24580 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24582 /* We get here if the algorithms that were not libcall-based
24583 were rep-prefix based and we are unable to use rep prefixes
24584 based on global register usage. Break out of the loop and
24585 use the heuristic below. */
24586 if (algs->size[i].max == 0)
24588 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24590 enum stringop_alg candidate = algs->size[i].alg;
24592 if (candidate != libcall && alg_usable_p (candidate, memset))
24595 alg_noalign = algs->size[i].noalign;
24597 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24598 last non-libcall inline algorithm. */
24599 if (TARGET_INLINE_ALL_STRINGOPS)
24601 /* When the current size is best to be copied by a libcall,
24602 but we are still forced to inline, run the heuristic below
24603 that will pick code for medium sized blocks. */
24604 if (alg != libcall)
24606 *noalign = alg_noalign;
24609 else if (!any_alg_usable_p)
24612 else if (alg_usable_p (candidate, memset))
24614 *noalign = algs->size[i].noalign;
24620 /* When asked to inline the call anyway, try to pick meaningful choice.
24621 We look for maximal size of block that is faster to copy by hand and
24622 take blocks of at most of that size guessing that average size will
24623 be roughly half of the block.
24625 If this turns out to be bad, we might simply specify the preferred
24626 choice in ix86_costs. */
24627 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24628 && (algs->unknown_size == libcall
24629 || !alg_usable_p (algs->unknown_size, memset)))
24631 enum stringop_alg alg;
24633 /* If there aren't any usable algorithms, then recursing on
24634 smaller sizes isn't going to find anything. Just return the
24635 simple byte-at-a-time copy loop. */
24636 if (!any_alg_usable_p)
24638 /* Pick something reasonable. */
24639 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24640 *dynamic_check = 128;
24641 return loop_1_byte;
24645 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24646 zero_memset, dynamic_check, noalign);
24647 gcc_assert (*dynamic_check == -1);
24648 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24649 *dynamic_check = max;
24651 gcc_assert (alg != libcall);
24654 return (alg_usable_p (algs->unknown_size, memset)
24655 ? algs->unknown_size : libcall);
24658 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24659 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24661 decide_alignment (int align,
24662 enum stringop_alg alg,
24664 machine_mode move_mode)
24666 int desired_align = 0;
24668 gcc_assert (alg != no_stringop);
24670 if (alg == libcall)
24672 if (move_mode == VOIDmode)
24675 desired_align = GET_MODE_SIZE (move_mode);
24676 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24677 copying whole cacheline at once. */
24678 if (TARGET_PENTIUMPRO
24679 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24684 if (desired_align < align)
24685 desired_align = align;
24686 if (expected_size != -1 && expected_size < 4)
24687 desired_align = align;
24689 return desired_align;
24693 /* Helper function for memcpy. For QImode value 0xXY produce
24694 0xXYXYXYXY of wide specified by MODE. This is essentially
24695 a * 0x10101010, but we can do slightly better than
24696 synth_mult by unwinding the sequence by hand on CPUs with
24699 promote_duplicated_reg (machine_mode mode, rtx val)
24701 machine_mode valmode = GET_MODE (val);
24703 int nops = mode == DImode ? 3 : 2;
24705 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24706 if (val == const0_rtx)
24707 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24708 if (CONST_INT_P (val))
24710 HOST_WIDE_INT v = INTVAL (val) & 255;
24714 if (mode == DImode)
24715 v |= (v << 16) << 16;
24716 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24719 if (valmode == VOIDmode)
24721 if (valmode != QImode)
24722 val = gen_lowpart (QImode, val);
24723 if (mode == QImode)
24725 if (!TARGET_PARTIAL_REG_STALL)
24727 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24728 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24729 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24730 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24732 rtx reg = convert_modes (mode, QImode, val, true);
24733 tmp = promote_duplicated_reg (mode, const1_rtx);
24734 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24739 rtx reg = convert_modes (mode, QImode, val, true);
24741 if (!TARGET_PARTIAL_REG_STALL)
24742 if (mode == SImode)
24743 emit_insn (gen_movsi_insv_1 (reg, reg));
24745 emit_insn (gen_movdi_insv_1 (reg, reg));
24748 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24749 NULL, 1, OPTAB_DIRECT);
24751 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24753 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24754 NULL, 1, OPTAB_DIRECT);
24755 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24756 if (mode == SImode)
24758 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24759 NULL, 1, OPTAB_DIRECT);
24760 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24765 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24766 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24767 alignment from ALIGN to DESIRED_ALIGN. */
24769 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24775 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24776 promoted_val = promote_duplicated_reg (DImode, val);
24777 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24778 promoted_val = promote_duplicated_reg (SImode, val);
24779 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24780 promoted_val = promote_duplicated_reg (HImode, val);
24782 promoted_val = val;
24784 return promoted_val;
24787 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24788 operations when profitable. The code depends upon architecture, block size
24789 and alignment, but always has one of the following overall structures:
24791 Aligned move sequence:
24793 1) Prologue guard: Conditional that jumps up to epilogues for small
24794 blocks that can be handled by epilogue alone. This is faster
24795 but also needed for correctness, since prologue assume the block
24796 is larger than the desired alignment.
24798 Optional dynamic check for size and libcall for large
24799 blocks is emitted here too, with -minline-stringops-dynamically.
24801 2) Prologue: copy first few bytes in order to get destination
24802 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24803 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24804 copied. We emit either a jump tree on power of two sized
24805 blocks, or a byte loop.
24807 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24808 with specified algorithm.
24810 4) Epilogue: code copying tail of the block that is too small to be
24811 handled by main body (or up to size guarded by prologue guard).
24813 Misaligned move sequence
24815 1) missaligned move prologue/epilogue containing:
24816 a) Prologue handling small memory blocks and jumping to done_label
24817 (skipped if blocks are known to be large enough)
24818 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24819 needed by single possibly misaligned move
24820 (skipped if alignment is not needed)
24821 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24823 2) Zero size guard dispatching to done_label, if needed
24825 3) dispatch to library call, if needed,
24827 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24828 with specified algorithm. */
24830 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24831 rtx align_exp, rtx expected_align_exp,
24832 rtx expected_size_exp, rtx min_size_exp,
24833 rtx max_size_exp, rtx probable_max_size_exp,
24838 rtx_code_label *label = NULL;
24840 rtx_code_label *jump_around_label = NULL;
24841 HOST_WIDE_INT align = 1;
24842 unsigned HOST_WIDE_INT count = 0;
24843 HOST_WIDE_INT expected_size = -1;
24844 int size_needed = 0, epilogue_size_needed;
24845 int desired_align = 0, align_bytes = 0;
24846 enum stringop_alg alg;
24847 rtx promoted_val = NULL;
24848 rtx vec_promoted_val = NULL;
24849 bool force_loopy_epilogue = false;
24851 bool need_zero_guard = false;
24853 machine_mode move_mode = VOIDmode;
24854 int unroll_factor = 1;
24855 /* TODO: Once value ranges are available, fill in proper data. */
24856 unsigned HOST_WIDE_INT min_size = 0;
24857 unsigned HOST_WIDE_INT max_size = -1;
24858 unsigned HOST_WIDE_INT probable_max_size = -1;
24859 bool misaligned_prologue_used = false;
24861 if (CONST_INT_P (align_exp))
24862 align = INTVAL (align_exp);
24863 /* i386 can do misaligned access on reasonably increased cost. */
24864 if (CONST_INT_P (expected_align_exp)
24865 && INTVAL (expected_align_exp) > align)
24866 align = INTVAL (expected_align_exp);
24867 /* ALIGN is the minimum of destination and source alignment, but we care here
24868 just about destination alignment. */
24870 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24871 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24873 if (CONST_INT_P (count_exp))
24875 min_size = max_size = probable_max_size = count = expected_size
24876 = INTVAL (count_exp);
24877 /* When COUNT is 0, there is nothing to do. */
24884 min_size = INTVAL (min_size_exp);
24886 max_size = INTVAL (max_size_exp);
24887 if (probable_max_size_exp)
24888 probable_max_size = INTVAL (probable_max_size_exp);
24889 if (CONST_INT_P (expected_size_exp))
24890 expected_size = INTVAL (expected_size_exp);
24893 /* Make sure we don't need to care about overflow later on. */
24894 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24897 /* Step 0: Decide on preferred algorithm, desired alignment and
24898 size of chunks to be copied by main loop. */
24899 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24901 issetmem && val_exp == const0_rtx,
24902 &dynamic_check, &noalign);
24903 if (alg == libcall)
24905 gcc_assert (alg != no_stringop);
24907 /* For now vector-version of memset is generated only for memory zeroing, as
24908 creating of promoted vector value is very cheap in this case. */
24909 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24910 alg = unrolled_loop;
24913 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24914 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24916 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24919 move_mode = word_mode;
24925 gcc_unreachable ();
24927 need_zero_guard = true;
24928 move_mode = QImode;
24931 need_zero_guard = true;
24933 case unrolled_loop:
24934 need_zero_guard = true;
24935 unroll_factor = (TARGET_64BIT ? 4 : 2);
24938 need_zero_guard = true;
24940 /* Find the widest supported mode. */
24941 move_mode = word_mode;
24942 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24943 != CODE_FOR_nothing)
24944 move_mode = GET_MODE_WIDER_MODE (move_mode);
24946 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24947 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24948 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24950 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24951 move_mode = mode_for_vector (word_mode, nunits);
24952 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24953 move_mode = word_mode;
24955 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24957 case rep_prefix_8_byte:
24958 move_mode = DImode;
24960 case rep_prefix_4_byte:
24961 move_mode = SImode;
24963 case rep_prefix_1_byte:
24964 move_mode = QImode;
24967 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24968 epilogue_size_needed = size_needed;
24970 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24971 if (!TARGET_ALIGN_STRINGOPS || noalign)
24972 align = desired_align;
24974 /* Step 1: Prologue guard. */
24976 /* Alignment code needs count to be in register. */
24977 if (CONST_INT_P (count_exp) && desired_align > align)
24979 if (INTVAL (count_exp) > desired_align
24980 && INTVAL (count_exp) > size_needed)
24983 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24984 if (align_bytes <= 0)
24987 align_bytes = desired_align - align_bytes;
24989 if (align_bytes == 0)
24990 count_exp = force_reg (counter_mode (count_exp), count_exp);
24992 gcc_assert (desired_align >= 1 && align >= 1);
24994 /* Misaligned move sequences handle both prologue and epilogue at once.
24995 Default code generation results in a smaller code for large alignments
24996 and also avoids redundant job when sizes are known precisely. */
24997 misaligned_prologue_used
24998 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24999 && MAX (desired_align, epilogue_size_needed) <= 32
25000 && desired_align <= epilogue_size_needed
25001 && ((desired_align > align && !align_bytes)
25002 || (!count && epilogue_size_needed > 1)));
25004 /* Do the cheap promotion to allow better CSE across the
25005 main loop and epilogue (ie one load of the big constant in the
25007 For now the misaligned move sequences do not have fast path
25008 without broadcasting. */
25009 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25011 if (alg == vector_loop)
25013 gcc_assert (val_exp == const0_rtx);
25014 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25015 promoted_val = promote_duplicated_reg_to_size (val_exp,
25016 GET_MODE_SIZE (word_mode),
25017 desired_align, align);
25021 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25022 desired_align, align);
25025 /* Misaligned move sequences handles both prologues and epilogues at once.
25026 Default code generation results in smaller code for large alignments and
25027 also avoids redundant job when sizes are known precisely. */
25028 if (misaligned_prologue_used)
25030 /* Misaligned move prologue handled small blocks by itself. */
25031 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25032 (dst, src, &destreg, &srcreg,
25033 move_mode, promoted_val, vec_promoted_val,
25035 &jump_around_label,
25036 desired_align < align
25037 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25038 desired_align, align, &min_size, dynamic_check, issetmem);
25040 src = change_address (src, BLKmode, srcreg);
25041 dst = change_address (dst, BLKmode, destreg);
25042 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25043 epilogue_size_needed = 0;
25044 if (need_zero_guard && !min_size)
25046 /* It is possible that we copied enough so the main loop will not
25048 gcc_assert (size_needed > 1);
25049 if (jump_around_label == NULL_RTX)
25050 jump_around_label = gen_label_rtx ();
25051 emit_cmp_and_jump_insns (count_exp,
25052 GEN_INT (size_needed),
25053 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25054 if (expected_size == -1
25055 || expected_size < (desired_align - align) / 2 + size_needed)
25056 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25058 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25061 /* Ensure that alignment prologue won't copy past end of block. */
25062 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25064 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25065 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25066 Make sure it is power of 2. */
25067 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25069 /* To improve performance of small blocks, we jump around the VAL
25070 promoting mode. This mean that if the promoted VAL is not constant,
25071 we might not use it in the epilogue and have to use byte
25073 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25074 force_loopy_epilogue = true;
25075 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25076 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25078 /* If main algorithm works on QImode, no epilogue is needed.
25079 For small sizes just don't align anything. */
25080 if (size_needed == 1)
25081 desired_align = align;
25086 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25088 label = gen_label_rtx ();
25089 emit_cmp_and_jump_insns (count_exp,
25090 GEN_INT (epilogue_size_needed),
25091 LTU, 0, counter_mode (count_exp), 1, label);
25092 if (expected_size == -1 || expected_size < epilogue_size_needed)
25093 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25095 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25099 /* Emit code to decide on runtime whether library call or inline should be
25101 if (dynamic_check != -1)
25103 if (!issetmem && CONST_INT_P (count_exp))
25105 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25107 emit_block_move_via_libcall (dst, src, count_exp, false);
25108 count_exp = const0_rtx;
25114 rtx_code_label *hot_label = gen_label_rtx ();
25115 if (jump_around_label == NULL_RTX)
25116 jump_around_label = gen_label_rtx ();
25117 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25118 LEU, 0, counter_mode (count_exp),
25120 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25122 set_storage_via_libcall (dst, count_exp, val_exp, false);
25124 emit_block_move_via_libcall (dst, src, count_exp, false);
25125 emit_jump (jump_around_label);
25126 emit_label (hot_label);
25130 /* Step 2: Alignment prologue. */
25131 /* Do the expensive promotion once we branched off the small blocks. */
25132 if (issetmem && !promoted_val)
25133 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25134 desired_align, align);
25136 if (desired_align > align && !misaligned_prologue_used)
25138 if (align_bytes == 0)
25140 /* Except for the first move in prologue, we no longer know
25141 constant offset in aliasing info. It don't seems to worth
25142 the pain to maintain it for the first move, so throw away
25144 dst = change_address (dst, BLKmode, destreg);
25146 src = change_address (src, BLKmode, srcreg);
25147 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25148 promoted_val, vec_promoted_val,
25149 count_exp, align, desired_align,
25151 /* At most desired_align - align bytes are copied. */
25152 if (min_size < (unsigned)(desired_align - align))
25155 min_size -= desired_align - align;
25159 /* If we know how many bytes need to be stored before dst is
25160 sufficiently aligned, maintain aliasing info accurately. */
25161 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25169 count_exp = plus_constant (counter_mode (count_exp),
25170 count_exp, -align_bytes);
25171 count -= align_bytes;
25172 min_size -= align_bytes;
25173 max_size -= align_bytes;
25175 if (need_zero_guard
25177 && (count < (unsigned HOST_WIDE_INT) size_needed
25178 || (align_bytes == 0
25179 && count < ((unsigned HOST_WIDE_INT) size_needed
25180 + desired_align - align))))
25182 /* It is possible that we copied enough so the main loop will not
25184 gcc_assert (size_needed > 1);
25185 if (label == NULL_RTX)
25186 label = gen_label_rtx ();
25187 emit_cmp_and_jump_insns (count_exp,
25188 GEN_INT (size_needed),
25189 LTU, 0, counter_mode (count_exp), 1, label);
25190 if (expected_size == -1
25191 || expected_size < (desired_align - align) / 2 + size_needed)
25192 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25194 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25197 if (label && size_needed == 1)
25199 emit_label (label);
25200 LABEL_NUSES (label) = 1;
25202 epilogue_size_needed = 1;
25204 promoted_val = val_exp;
25206 else if (label == NULL_RTX && !misaligned_prologue_used)
25207 epilogue_size_needed = size_needed;
25209 /* Step 3: Main loop. */
25216 gcc_unreachable ();
25219 case unrolled_loop:
25220 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25221 count_exp, move_mode, unroll_factor,
25222 expected_size, issetmem);
25225 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25226 vec_promoted_val, count_exp, move_mode,
25227 unroll_factor, expected_size, issetmem);
25229 case rep_prefix_8_byte:
25230 case rep_prefix_4_byte:
25231 case rep_prefix_1_byte:
25232 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25233 val_exp, count_exp, move_mode, issetmem);
25236 /* Adjust properly the offset of src and dest memory for aliasing. */
25237 if (CONST_INT_P (count_exp))
25240 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25241 (count / size_needed) * size_needed);
25242 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25243 (count / size_needed) * size_needed);
25248 src = change_address (src, BLKmode, srcreg);
25249 dst = change_address (dst, BLKmode, destreg);
25252 /* Step 4: Epilogue to copy the remaining bytes. */
25256 /* When the main loop is done, COUNT_EXP might hold original count,
25257 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25258 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25259 bytes. Compensate if needed. */
25261 if (size_needed < epilogue_size_needed)
25264 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25265 GEN_INT (size_needed - 1), count_exp, 1,
25267 if (tmp != count_exp)
25268 emit_move_insn (count_exp, tmp);
25270 emit_label (label);
25271 LABEL_NUSES (label) = 1;
25274 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25276 if (force_loopy_epilogue)
25277 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25278 epilogue_size_needed);
25282 expand_setmem_epilogue (dst, destreg, promoted_val,
25283 vec_promoted_val, count_exp,
25284 epilogue_size_needed);
25286 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25287 epilogue_size_needed);
25290 if (jump_around_label)
25291 emit_label (jump_around_label);
25296 /* Expand the appropriate insns for doing strlen if not just doing
25299 out = result, initialized with the start address
25300 align_rtx = alignment of the address.
25301 scratch = scratch register, initialized with the startaddress when
25302 not aligned, otherwise undefined
25304 This is just the body. It needs the initializations mentioned above and
25305 some address computing at the end. These things are done in i386.md. */
25308 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25312 rtx_code_label *align_2_label = NULL;
25313 rtx_code_label *align_3_label = NULL;
25314 rtx_code_label *align_4_label = gen_label_rtx ();
25315 rtx_code_label *end_0_label = gen_label_rtx ();
25317 rtx tmpreg = gen_reg_rtx (SImode);
25318 rtx scratch = gen_reg_rtx (SImode);
25322 if (CONST_INT_P (align_rtx))
25323 align = INTVAL (align_rtx);
25325 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25327 /* Is there a known alignment and is it less than 4? */
25330 rtx scratch1 = gen_reg_rtx (Pmode);
25331 emit_move_insn (scratch1, out);
25332 /* Is there a known alignment and is it not 2? */
25335 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25336 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25338 /* Leave just the 3 lower bits. */
25339 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25340 NULL_RTX, 0, OPTAB_WIDEN);
25342 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25343 Pmode, 1, align_4_label);
25344 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25345 Pmode, 1, align_2_label);
25346 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25347 Pmode, 1, align_3_label);
25351 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25352 check if is aligned to 4 - byte. */
25354 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25355 NULL_RTX, 0, OPTAB_WIDEN);
25357 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25358 Pmode, 1, align_4_label);
25361 mem = change_address (src, QImode, out);
25363 /* Now compare the bytes. */
25365 /* Compare the first n unaligned byte on a byte per byte basis. */
25366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25367 QImode, 1, end_0_label);
25369 /* Increment the address. */
25370 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25372 /* Not needed with an alignment of 2 */
25375 emit_label (align_2_label);
25377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25380 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25382 emit_label (align_3_label);
25385 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25388 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25391 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25392 align this loop. It gives only huge programs, but does not help to
25394 emit_label (align_4_label);
25396 mem = change_address (src, SImode, out);
25397 emit_move_insn (scratch, mem);
25398 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25400 /* This formula yields a nonzero result iff one of the bytes is zero.
25401 This saves three branches inside loop and many cycles. */
25403 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25404 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25405 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25406 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25407 gen_int_mode (0x80808080, SImode)));
25408 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25413 rtx reg = gen_reg_rtx (SImode);
25414 rtx reg2 = gen_reg_rtx (Pmode);
25415 emit_move_insn (reg, tmpreg);
25416 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25418 /* If zero is not in the first two bytes, move two bytes forward. */
25419 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25422 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25423 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25426 /* Emit lea manually to avoid clobbering of flags. */
25427 emit_insn (gen_rtx_SET (SImode, reg2,
25428 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25430 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25431 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25432 emit_insn (gen_rtx_SET (VOIDmode, out,
25433 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25439 rtx_code_label *end_2_label = gen_label_rtx ();
25440 /* Is zero in the first two bytes? */
25442 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25443 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25444 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25445 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25446 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25448 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25449 JUMP_LABEL (tmp) = end_2_label;
25451 /* Not in the first two. Move two bytes forward. */
25452 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25453 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25455 emit_label (end_2_label);
25459 /* Avoid branch in fixing the byte. */
25460 tmpreg = gen_lowpart (QImode, tmpreg);
25461 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25462 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25463 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25464 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25466 emit_label (end_0_label);
25469 /* Expand strlen. */
25472 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25474 rtx addr, scratch1, scratch2, scratch3, scratch4;
25476 /* The generic case of strlen expander is long. Avoid it's
25477 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25479 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25480 && !TARGET_INLINE_ALL_STRINGOPS
25481 && !optimize_insn_for_size_p ()
25482 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25485 addr = force_reg (Pmode, XEXP (src, 0));
25486 scratch1 = gen_reg_rtx (Pmode);
25488 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25489 && !optimize_insn_for_size_p ())
25491 /* Well it seems that some optimizer does not combine a call like
25492 foo(strlen(bar), strlen(bar));
25493 when the move and the subtraction is done here. It does calculate
25494 the length just once when these instructions are done inside of
25495 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25496 often used and I use one fewer register for the lifetime of
25497 output_strlen_unroll() this is better. */
25499 emit_move_insn (out, addr);
25501 ix86_expand_strlensi_unroll_1 (out, src, align);
25503 /* strlensi_unroll_1 returns the address of the zero at the end of
25504 the string, like memchr(), so compute the length by subtracting
25505 the start address. */
25506 emit_insn (ix86_gen_sub3 (out, out, addr));
25512 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25513 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25516 scratch2 = gen_reg_rtx (Pmode);
25517 scratch3 = gen_reg_rtx (Pmode);
25518 scratch4 = force_reg (Pmode, constm1_rtx);
25520 emit_move_insn (scratch3, addr);
25521 eoschar = force_reg (QImode, eoschar);
25523 src = replace_equiv_address_nv (src, scratch3);
25525 /* If .md starts supporting :P, this can be done in .md. */
25526 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25527 scratch4), UNSPEC_SCAS);
25528 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25529 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25530 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25535 /* For given symbol (function) construct code to compute address of it's PLT
25536 entry in large x86-64 PIC model. */
25538 construct_plt_address (rtx symbol)
25542 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25543 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25544 gcc_assert (Pmode == DImode);
25546 tmp = gen_reg_rtx (Pmode);
25547 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25549 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25550 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25555 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25557 rtx pop, bool sibcall)
25560 rtx use = NULL, call;
25561 unsigned int vec_len = 0;
25563 if (pop == const0_rtx)
25565 gcc_assert (!TARGET_64BIT || !pop);
25567 if (TARGET_MACHO && !TARGET_64BIT)
25570 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25571 fnaddr = machopic_indirect_call_target (fnaddr);
25576 /* Static functions and indirect calls don't need the pic register. */
25579 || (ix86_cmodel == CM_LARGE_PIC
25580 && DEFAULT_ABI != MS_ABI))
25581 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25582 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25584 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25585 if (ix86_use_pseudo_pic_reg ())
25586 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25587 pic_offset_table_rtx);
25591 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25592 parameters passed in vector registers. */
25594 && (INTVAL (callarg2) > 0
25595 || (INTVAL (callarg2) == 0
25596 && (TARGET_SSE || !flag_skip_rax_setup))))
25598 rtx al = gen_rtx_REG (QImode, AX_REG);
25599 emit_move_insn (al, callarg2);
25600 use_reg (&use, al);
25603 if (ix86_cmodel == CM_LARGE_PIC
25606 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25607 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25608 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25610 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25611 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25613 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25614 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25617 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25621 /* We should add bounds as destination register in case
25622 pointer with bounds may be returned. */
25623 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25625 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25626 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25627 if (GET_CODE (retval) == PARALLEL)
25629 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25630 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25631 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25632 retval = chkp_join_splitted_slot (retval, par);
25636 retval = gen_rtx_PARALLEL (VOIDmode,
25637 gen_rtvec (3, retval, b0, b1));
25638 chkp_put_regs_to_expr_list (retval);
25642 call = gen_rtx_SET (VOIDmode, retval, call);
25644 vec[vec_len++] = call;
25648 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25649 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25650 vec[vec_len++] = pop;
25653 if (TARGET_64BIT_MS_ABI
25654 && (!callarg2 || INTVAL (callarg2) != -2))
25656 int const cregs_size
25657 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25660 for (i = 0; i < cregs_size; i++)
25662 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25663 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25665 clobber_reg (&use, gen_rtx_REG (mode, regno));
25670 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25671 call = emit_call_insn (call);
25673 CALL_INSN_FUNCTION_USAGE (call) = use;
25678 /* Output the assembly for a call instruction. */
25681 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25683 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25684 bool seh_nop_p = false;
25687 if (SIBLING_CALL_P (insn))
25690 xasm = "%!jmp\t%P0";
25691 /* SEH epilogue detection requires the indirect branch case
25692 to include REX.W. */
25693 else if (TARGET_SEH)
25694 xasm = "%!rex.W jmp %A0";
25696 xasm = "%!jmp\t%A0";
25698 output_asm_insn (xasm, &call_op);
25702 /* SEH unwinding can require an extra nop to be emitted in several
25703 circumstances. Determine if we have one of those. */
25708 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25710 /* If we get to another real insn, we don't need the nop. */
25714 /* If we get to the epilogue note, prevent a catch region from
25715 being adjacent to the standard epilogue sequence. If non-
25716 call-exceptions, we'll have done this during epilogue emission. */
25717 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25718 && !flag_non_call_exceptions
25719 && !can_throw_internal (insn))
25726 /* If we didn't find a real insn following the call, prevent the
25727 unwinder from looking into the next function. */
25733 xasm = "%!call\t%P0";
25735 xasm = "%!call\t%A0";
25737 output_asm_insn (xasm, &call_op);
25745 /* Clear stack slot assignments remembered from previous functions.
25746 This is called from INIT_EXPANDERS once before RTL is emitted for each
25749 static struct machine_function *
25750 ix86_init_machine_status (void)
25752 struct machine_function *f;
25754 f = ggc_cleared_alloc<machine_function> ();
25755 f->use_fast_prologue_epilogue_nregs = -1;
25756 f->call_abi = ix86_abi;
25761 /* Return a MEM corresponding to a stack slot with mode MODE.
25762 Allocate a new slot if necessary.
25764 The RTL for a function can have several slots available: N is
25765 which slot to use. */
25768 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25770 struct stack_local_entry *s;
25772 gcc_assert (n < MAX_386_STACK_LOCALS);
25774 for (s = ix86_stack_locals; s; s = s->next)
25775 if (s->mode == mode && s->n == n)
25776 return validize_mem (copy_rtx (s->rtl));
25778 s = ggc_alloc<stack_local_entry> ();
25781 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25783 s->next = ix86_stack_locals;
25784 ix86_stack_locals = s;
25785 return validize_mem (copy_rtx (s->rtl));
25789 ix86_instantiate_decls (void)
25791 struct stack_local_entry *s;
25793 for (s = ix86_stack_locals; s; s = s->next)
25794 if (s->rtl != NULL_RTX)
25795 instantiate_decl_rtl (s->rtl);
25798 /* Check whether x86 address PARTS is a pc-relative address. */
25801 rip_relative_addr_p (struct ix86_address *parts)
25803 rtx base, index, disp;
25805 base = parts->base;
25806 index = parts->index;
25807 disp = parts->disp;
25809 if (disp && !base && !index)
25815 if (GET_CODE (disp) == CONST)
25816 symbol = XEXP (disp, 0);
25817 if (GET_CODE (symbol) == PLUS
25818 && CONST_INT_P (XEXP (symbol, 1)))
25819 symbol = XEXP (symbol, 0);
25821 if (GET_CODE (symbol) == LABEL_REF
25822 || (GET_CODE (symbol) == SYMBOL_REF
25823 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25824 || (GET_CODE (symbol) == UNSPEC
25825 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25826 || XINT (symbol, 1) == UNSPEC_PCREL
25827 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25834 /* Calculate the length of the memory address in the instruction encoding.
25835 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25836 or other prefixes. We never generate addr32 prefix for LEA insn. */
25839 memory_address_length (rtx addr, bool lea)
25841 struct ix86_address parts;
25842 rtx base, index, disp;
25846 if (GET_CODE (addr) == PRE_DEC
25847 || GET_CODE (addr) == POST_INC
25848 || GET_CODE (addr) == PRE_MODIFY
25849 || GET_CODE (addr) == POST_MODIFY)
25852 ok = ix86_decompose_address (addr, &parts);
25855 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25857 /* If this is not LEA instruction, add the length of addr32 prefix. */
25858 if (TARGET_64BIT && !lea
25859 && (SImode_address_operand (addr, VOIDmode)
25860 || (parts.base && GET_MODE (parts.base) == SImode)
25861 || (parts.index && GET_MODE (parts.index) == SImode)))
25865 index = parts.index;
25868 if (base && GET_CODE (base) == SUBREG)
25869 base = SUBREG_REG (base);
25870 if (index && GET_CODE (index) == SUBREG)
25871 index = SUBREG_REG (index);
25873 gcc_assert (base == NULL_RTX || REG_P (base));
25874 gcc_assert (index == NULL_RTX || REG_P (index));
25877 - esp as the base always wants an index,
25878 - ebp as the base always wants a displacement,
25879 - r12 as the base always wants an index,
25880 - r13 as the base always wants a displacement. */
25882 /* Register Indirect. */
25883 if (base && !index && !disp)
25885 /* esp (for its index) and ebp (for its displacement) need
25886 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25888 if (base == arg_pointer_rtx
25889 || base == frame_pointer_rtx
25890 || REGNO (base) == SP_REG
25891 || REGNO (base) == BP_REG
25892 || REGNO (base) == R12_REG
25893 || REGNO (base) == R13_REG)
25897 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25898 is not disp32, but disp32(%rip), so for disp32
25899 SIB byte is needed, unless print_operand_address
25900 optimizes it into disp32(%rip) or (%rip) is implied
25902 else if (disp && !base && !index)
25905 if (rip_relative_addr_p (&parts))
25910 /* Find the length of the displacement constant. */
25913 if (base && satisfies_constraint_K (disp))
25918 /* ebp always wants a displacement. Similarly r13. */
25919 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25922 /* An index requires the two-byte modrm form.... */
25924 /* ...like esp (or r12), which always wants an index. */
25925 || base == arg_pointer_rtx
25926 || base == frame_pointer_rtx
25927 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25934 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25935 is set, expect that insn have 8bit immediate alternative. */
25937 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25941 extract_insn_cached (insn);
25942 for (i = recog_data.n_operands - 1; i >= 0; --i)
25943 if (CONSTANT_P (recog_data.operand[i]))
25945 enum attr_mode mode = get_attr_mode (insn);
25948 if (shortform && CONST_INT_P (recog_data.operand[i]))
25950 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25957 ival = trunc_int_for_mode (ival, HImode);
25960 ival = trunc_int_for_mode (ival, SImode);
25965 if (IN_RANGE (ival, -128, 127))
25982 /* Immediates for DImode instructions are encoded
25983 as 32bit sign extended values. */
25988 fatal_insn ("unknown insn mode", insn);
25994 /* Compute default value for "length_address" attribute. */
25996 ix86_attr_length_address_default (rtx_insn *insn)
26000 if (get_attr_type (insn) == TYPE_LEA)
26002 rtx set = PATTERN (insn), addr;
26004 if (GET_CODE (set) == PARALLEL)
26005 set = XVECEXP (set, 0, 0);
26007 gcc_assert (GET_CODE (set) == SET);
26009 addr = SET_SRC (set);
26011 return memory_address_length (addr, true);
26014 extract_insn_cached (insn);
26015 for (i = recog_data.n_operands - 1; i >= 0; --i)
26016 if (MEM_P (recog_data.operand[i]))
26018 constrain_operands_cached (insn, reload_completed);
26019 if (which_alternative != -1)
26021 const char *constraints = recog_data.constraints[i];
26022 int alt = which_alternative;
26024 while (*constraints == '=' || *constraints == '+')
26027 while (*constraints++ != ',')
26029 /* Skip ignored operands. */
26030 if (*constraints == 'X')
26033 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26038 /* Compute default value for "length_vex" attribute. It includes
26039 2 or 3 byte VEX prefix and 1 opcode byte. */
26042 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26047 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26048 byte VEX prefix. */
26049 if (!has_0f_opcode || has_vex_w)
26052 /* We can always use 2 byte VEX prefix in 32bit. */
26056 extract_insn_cached (insn);
26058 for (i = recog_data.n_operands - 1; i >= 0; --i)
26059 if (REG_P (recog_data.operand[i]))
26061 /* REX.W bit uses 3 byte VEX prefix. */
26062 if (GET_MODE (recog_data.operand[i]) == DImode
26063 && GENERAL_REG_P (recog_data.operand[i]))
26068 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26069 if (MEM_P (recog_data.operand[i])
26070 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26077 /* Return the maximum number of instructions a cpu can issue. */
26080 ix86_issue_rate (void)
26084 case PROCESSOR_PENTIUM:
26085 case PROCESSOR_BONNELL:
26086 case PROCESSOR_SILVERMONT:
26087 case PROCESSOR_KNL:
26088 case PROCESSOR_INTEL:
26090 case PROCESSOR_BTVER2:
26091 case PROCESSOR_PENTIUM4:
26092 case PROCESSOR_NOCONA:
26095 case PROCESSOR_PENTIUMPRO:
26096 case PROCESSOR_ATHLON:
26098 case PROCESSOR_AMDFAM10:
26099 case PROCESSOR_GENERIC:
26100 case PROCESSOR_BTVER1:
26103 case PROCESSOR_BDVER1:
26104 case PROCESSOR_BDVER2:
26105 case PROCESSOR_BDVER3:
26106 case PROCESSOR_BDVER4:
26107 case PROCESSOR_CORE2:
26108 case PROCESSOR_NEHALEM:
26109 case PROCESSOR_SANDYBRIDGE:
26110 case PROCESSOR_HASWELL:
26118 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26119 by DEP_INSN and nothing set by DEP_INSN. */
26122 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26126 /* Simplify the test for uninteresting insns. */
26127 if (insn_type != TYPE_SETCC
26128 && insn_type != TYPE_ICMOV
26129 && insn_type != TYPE_FCMOV
26130 && insn_type != TYPE_IBR)
26133 if ((set = single_set (dep_insn)) != 0)
26135 set = SET_DEST (set);
26138 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26139 && XVECLEN (PATTERN (dep_insn), 0) == 2
26140 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26141 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26143 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26144 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26149 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26152 /* This test is true if the dependent insn reads the flags but
26153 not any other potentially set register. */
26154 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26157 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26163 /* Return true iff USE_INSN has a memory address with operands set by
26167 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26170 extract_insn_cached (use_insn);
26171 for (i = recog_data.n_operands - 1; i >= 0; --i)
26172 if (MEM_P (recog_data.operand[i]))
26174 rtx addr = XEXP (recog_data.operand[i], 0);
26175 return modified_in_p (addr, set_insn) != 0;
26180 /* Helper function for exact_store_load_dependency.
26181 Return true if addr is found in insn. */
26183 exact_dependency_1 (rtx addr, rtx insn)
26185 enum rtx_code code;
26186 const char *format_ptr;
26189 code = GET_CODE (insn);
26193 if (rtx_equal_p (addr, insn))
26208 format_ptr = GET_RTX_FORMAT (code);
26209 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26211 switch (*format_ptr++)
26214 if (exact_dependency_1 (addr, XEXP (insn, i)))
26218 for (j = 0; j < XVECLEN (insn, i); j++)
26219 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26227 /* Return true if there exists exact dependency for store & load, i.e.
26228 the same memory address is used in them. */
26230 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26234 set1 = single_set (store);
26237 if (!MEM_P (SET_DEST (set1)))
26239 set2 = single_set (load);
26242 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26248 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26250 enum attr_type insn_type, dep_insn_type;
26251 enum attr_memory memory;
26253 int dep_insn_code_number;
26255 /* Anti and output dependencies have zero cost on all CPUs. */
26256 if (REG_NOTE_KIND (link) != 0)
26259 dep_insn_code_number = recog_memoized (dep_insn);
26261 /* If we can't recognize the insns, we can't really do anything. */
26262 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26265 insn_type = get_attr_type (insn);
26266 dep_insn_type = get_attr_type (dep_insn);
26270 case PROCESSOR_PENTIUM:
26271 /* Address Generation Interlock adds a cycle of latency. */
26272 if (insn_type == TYPE_LEA)
26274 rtx addr = PATTERN (insn);
26276 if (GET_CODE (addr) == PARALLEL)
26277 addr = XVECEXP (addr, 0, 0);
26279 gcc_assert (GET_CODE (addr) == SET);
26281 addr = SET_SRC (addr);
26282 if (modified_in_p (addr, dep_insn))
26285 else if (ix86_agi_dependent (dep_insn, insn))
26288 /* ??? Compares pair with jump/setcc. */
26289 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26292 /* Floating point stores require value to be ready one cycle earlier. */
26293 if (insn_type == TYPE_FMOV
26294 && get_attr_memory (insn) == MEMORY_STORE
26295 && !ix86_agi_dependent (dep_insn, insn))
26299 case PROCESSOR_PENTIUMPRO:
26300 /* INT->FP conversion is expensive. */
26301 if (get_attr_fp_int_src (dep_insn))
26304 /* There is one cycle extra latency between an FP op and a store. */
26305 if (insn_type == TYPE_FMOV
26306 && (set = single_set (dep_insn)) != NULL_RTX
26307 && (set2 = single_set (insn)) != NULL_RTX
26308 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26309 && MEM_P (SET_DEST (set2)))
26312 memory = get_attr_memory (insn);
26314 /* Show ability of reorder buffer to hide latency of load by executing
26315 in parallel with previous instruction in case
26316 previous instruction is not needed to compute the address. */
26317 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26318 && !ix86_agi_dependent (dep_insn, insn))
26320 /* Claim moves to take one cycle, as core can issue one load
26321 at time and the next load can start cycle later. */
26322 if (dep_insn_type == TYPE_IMOV
26323 || dep_insn_type == TYPE_FMOV)
26331 /* The esp dependency is resolved before
26332 the instruction is really finished. */
26333 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26334 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26337 /* INT->FP conversion is expensive. */
26338 if (get_attr_fp_int_src (dep_insn))
26341 memory = get_attr_memory (insn);
26343 /* Show ability of reorder buffer to hide latency of load by executing
26344 in parallel with previous instruction in case
26345 previous instruction is not needed to compute the address. */
26346 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26347 && !ix86_agi_dependent (dep_insn, insn))
26349 /* Claim moves to take one cycle, as core can issue one load
26350 at time and the next load can start cycle later. */
26351 if (dep_insn_type == TYPE_IMOV
26352 || dep_insn_type == TYPE_FMOV)
26361 case PROCESSOR_AMDFAM10:
26362 case PROCESSOR_BDVER1:
26363 case PROCESSOR_BDVER2:
26364 case PROCESSOR_BDVER3:
26365 case PROCESSOR_BDVER4:
26366 case PROCESSOR_BTVER1:
26367 case PROCESSOR_BTVER2:
26368 case PROCESSOR_GENERIC:
26369 /* Stack engine allows to execute push&pop instructions in parall. */
26370 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26371 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26375 case PROCESSOR_ATHLON:
26377 memory = get_attr_memory (insn);
26379 /* Show ability of reorder buffer to hide latency of load by executing
26380 in parallel with previous instruction in case
26381 previous instruction is not needed to compute the address. */
26382 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26383 && !ix86_agi_dependent (dep_insn, insn))
26385 enum attr_unit unit = get_attr_unit (insn);
26388 /* Because of the difference between the length of integer and
26389 floating unit pipeline preparation stages, the memory operands
26390 for floating point are cheaper.
26392 ??? For Athlon it the difference is most probably 2. */
26393 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26396 loadcost = TARGET_ATHLON ? 2 : 0;
26398 if (cost >= loadcost)
26405 case PROCESSOR_CORE2:
26406 case PROCESSOR_NEHALEM:
26407 case PROCESSOR_SANDYBRIDGE:
26408 case PROCESSOR_HASWELL:
26409 /* Stack engine allows to execute push&pop instructions in parall. */
26410 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26411 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26414 memory = get_attr_memory (insn);
26416 /* Show ability of reorder buffer to hide latency of load by executing
26417 in parallel with previous instruction in case
26418 previous instruction is not needed to compute the address. */
26419 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26420 && !ix86_agi_dependent (dep_insn, insn))
26429 case PROCESSOR_SILVERMONT:
26430 case PROCESSOR_KNL:
26431 case PROCESSOR_INTEL:
26432 if (!reload_completed)
26435 /* Increase cost of integer loads. */
26436 memory = get_attr_memory (dep_insn);
26437 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26439 enum attr_unit unit = get_attr_unit (dep_insn);
26440 if (unit == UNIT_INTEGER && cost == 1)
26442 if (memory == MEMORY_LOAD)
26446 /* Increase cost of ld/st for short int types only
26447 because of store forwarding issue. */
26448 rtx set = single_set (dep_insn);
26449 if (set && (GET_MODE (SET_DEST (set)) == QImode
26450 || GET_MODE (SET_DEST (set)) == HImode))
26452 /* Increase cost of store/load insn if exact
26453 dependence exists and it is load insn. */
26454 enum attr_memory insn_memory = get_attr_memory (insn);
26455 if (insn_memory == MEMORY_LOAD
26456 && exact_store_load_dependency (dep_insn, insn))
26470 /* How many alternative schedules to try. This should be as wide as the
26471 scheduling freedom in the DFA, but no wider. Making this value too
26472 large results extra work for the scheduler. */
26475 ia32_multipass_dfa_lookahead (void)
26479 case PROCESSOR_PENTIUM:
26482 case PROCESSOR_PENTIUMPRO:
26486 case PROCESSOR_BDVER1:
26487 case PROCESSOR_BDVER2:
26488 case PROCESSOR_BDVER3:
26489 case PROCESSOR_BDVER4:
26490 /* We use lookahead value 4 for BD both before and after reload
26491 schedules. Plan is to have value 8 included for O3. */
26494 case PROCESSOR_CORE2:
26495 case PROCESSOR_NEHALEM:
26496 case PROCESSOR_SANDYBRIDGE:
26497 case PROCESSOR_HASWELL:
26498 case PROCESSOR_BONNELL:
26499 case PROCESSOR_SILVERMONT:
26500 case PROCESSOR_KNL:
26501 case PROCESSOR_INTEL:
26502 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26503 as many instructions can be executed on a cycle, i.e.,
26504 issue_rate. I wonder why tuning for many CPUs does not do this. */
26505 if (reload_completed)
26506 return ix86_issue_rate ();
26507 /* Don't use lookahead for pre-reload schedule to save compile time. */
26515 /* Return true if target platform supports macro-fusion. */
26518 ix86_macro_fusion_p ()
26520 return TARGET_FUSE_CMP_AND_BRANCH;
26523 /* Check whether current microarchitecture support macro fusion
26524 for insn pair "CONDGEN + CONDJMP". Refer to
26525 "Intel Architectures Optimization Reference Manual". */
26528 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26531 enum rtx_code ccode;
26532 rtx compare_set = NULL_RTX, test_if, cond;
26533 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26535 if (!any_condjump_p (condjmp))
26538 if (get_attr_type (condgen) != TYPE_TEST
26539 && get_attr_type (condgen) != TYPE_ICMP
26540 && get_attr_type (condgen) != TYPE_INCDEC
26541 && get_attr_type (condgen) != TYPE_ALU)
26544 compare_set = single_set (condgen);
26545 if (compare_set == NULL_RTX
26546 && !TARGET_FUSE_ALU_AND_BRANCH)
26549 if (compare_set == NULL_RTX)
26552 rtx pat = PATTERN (condgen);
26553 for (i = 0; i < XVECLEN (pat, 0); i++)
26554 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26556 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26557 if (GET_CODE (set_src) == COMPARE)
26558 compare_set = XVECEXP (pat, 0, i);
26560 alu_set = XVECEXP (pat, 0, i);
26563 if (compare_set == NULL_RTX)
26565 src = SET_SRC (compare_set);
26566 if (GET_CODE (src) != COMPARE)
26569 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26571 if ((MEM_P (XEXP (src, 0))
26572 && CONST_INT_P (XEXP (src, 1)))
26573 || (MEM_P (XEXP (src, 1))
26574 && CONST_INT_P (XEXP (src, 0))))
26577 /* No fusion for RIP-relative address. */
26578 if (MEM_P (XEXP (src, 0)))
26579 addr = XEXP (XEXP (src, 0), 0);
26580 else if (MEM_P (XEXP (src, 1)))
26581 addr = XEXP (XEXP (src, 1), 0);
26584 ix86_address parts;
26585 int ok = ix86_decompose_address (addr, &parts);
26588 if (rip_relative_addr_p (&parts))
26592 test_if = SET_SRC (pc_set (condjmp));
26593 cond = XEXP (test_if, 0);
26594 ccode = GET_CODE (cond);
26595 /* Check whether conditional jump use Sign or Overflow Flags. */
26596 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26603 /* Return true for TYPE_TEST and TYPE_ICMP. */
26604 if (get_attr_type (condgen) == TYPE_TEST
26605 || get_attr_type (condgen) == TYPE_ICMP)
26608 /* The following is the case that macro-fusion for alu + jmp. */
26609 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26612 /* No fusion for alu op with memory destination operand. */
26613 dest = SET_DEST (alu_set);
26617 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26619 if (get_attr_type (condgen) == TYPE_INCDEC
26629 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26630 execution. It is applied if
26631 (1) IMUL instruction is on the top of list;
26632 (2) There exists the only producer of independent IMUL instruction in
26634 Return index of IMUL producer if it was found and -1 otherwise. */
26636 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26639 rtx set, insn1, insn2;
26640 sd_iterator_def sd_it;
26645 if (!TARGET_BONNELL)
26648 /* Check that IMUL instruction is on the top of ready list. */
26649 insn = ready[n_ready - 1];
26650 set = single_set (insn);
26653 if (!(GET_CODE (SET_SRC (set)) == MULT
26654 && GET_MODE (SET_SRC (set)) == SImode))
26657 /* Search for producer of independent IMUL instruction. */
26658 for (i = n_ready - 2; i >= 0; i--)
26661 if (!NONDEBUG_INSN_P (insn))
26663 /* Skip IMUL instruction. */
26664 insn2 = PATTERN (insn);
26665 if (GET_CODE (insn2) == PARALLEL)
26666 insn2 = XVECEXP (insn2, 0, 0);
26667 if (GET_CODE (insn2) == SET
26668 && GET_CODE (SET_SRC (insn2)) == MULT
26669 && GET_MODE (SET_SRC (insn2)) == SImode)
26672 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26675 con = DEP_CON (dep);
26676 if (!NONDEBUG_INSN_P (con))
26678 insn1 = PATTERN (con);
26679 if (GET_CODE (insn1) == PARALLEL)
26680 insn1 = XVECEXP (insn1, 0, 0);
26682 if (GET_CODE (insn1) == SET
26683 && GET_CODE (SET_SRC (insn1)) == MULT
26684 && GET_MODE (SET_SRC (insn1)) == SImode)
26686 sd_iterator_def sd_it1;
26688 /* Check if there is no other dependee for IMUL. */
26690 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26693 pro = DEP_PRO (dep1);
26694 if (!NONDEBUG_INSN_P (pro))
26709 /* Try to find the best candidate on the top of ready list if two insns
26710 have the same priority - candidate is best if its dependees were
26711 scheduled earlier. Applied for Silvermont only.
26712 Return true if top 2 insns must be interchanged. */
26714 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26716 rtx_insn *top = ready[n_ready - 1];
26717 rtx_insn *next = ready[n_ready - 2];
26719 sd_iterator_def sd_it;
26723 #define INSN_TICK(INSN) (HID (INSN)->tick)
26725 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26728 if (!NONDEBUG_INSN_P (top))
26730 if (!NONJUMP_INSN_P (top))
26732 if (!NONDEBUG_INSN_P (next))
26734 if (!NONJUMP_INSN_P (next))
26736 set = single_set (top);
26739 set = single_set (next);
26743 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26745 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26747 /* Determine winner more precise. */
26748 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26751 pro = DEP_PRO (dep);
26752 if (!NONDEBUG_INSN_P (pro))
26754 if (INSN_TICK (pro) > clock1)
26755 clock1 = INSN_TICK (pro);
26757 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26760 pro = DEP_PRO (dep);
26761 if (!NONDEBUG_INSN_P (pro))
26763 if (INSN_TICK (pro) > clock2)
26764 clock2 = INSN_TICK (pro);
26767 if (clock1 == clock2)
26769 /* Determine winner - load must win. */
26770 enum attr_memory memory1, memory2;
26771 memory1 = get_attr_memory (top);
26772 memory2 = get_attr_memory (next);
26773 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26776 return (bool) (clock2 < clock1);
26782 /* Perform possible reodering of ready list for Atom/Silvermont only.
26783 Return issue rate. */
26785 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26786 int *pn_ready, int clock_var)
26788 int issue_rate = -1;
26789 int n_ready = *pn_ready;
26794 /* Set up issue rate. */
26795 issue_rate = ix86_issue_rate ();
26797 /* Do reodering for BONNELL/SILVERMONT only. */
26798 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26801 /* Nothing to do if ready list contains only 1 instruction. */
26805 /* Do reodering for post-reload scheduler only. */
26806 if (!reload_completed)
26809 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26811 if (sched_verbose > 1)
26812 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26813 INSN_UID (ready[index]));
26815 /* Put IMUL producer (ready[index]) at the top of ready list. */
26816 insn = ready[index];
26817 for (i = index; i < n_ready - 1; i++)
26818 ready[i] = ready[i + 1];
26819 ready[n_ready - 1] = insn;
26823 /* Skip selective scheduling since HID is not populated in it. */
26826 && swap_top_of_ready_list (ready, n_ready))
26828 if (sched_verbose > 1)
26829 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26830 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26831 /* Swap 2 top elements of ready list. */
26832 insn = ready[n_ready - 1];
26833 ready[n_ready - 1] = ready[n_ready - 2];
26834 ready[n_ready - 2] = insn;
26840 ix86_class_likely_spilled_p (reg_class_t);
26842 /* Returns true if lhs of insn is HW function argument register and set up
26843 is_spilled to true if it is likely spilled HW register. */
26845 insn_is_function_arg (rtx insn, bool* is_spilled)
26849 if (!NONDEBUG_INSN_P (insn))
26851 /* Call instructions are not movable, ignore it. */
26854 insn = PATTERN (insn);
26855 if (GET_CODE (insn) == PARALLEL)
26856 insn = XVECEXP (insn, 0, 0);
26857 if (GET_CODE (insn) != SET)
26859 dst = SET_DEST (insn);
26860 if (REG_P (dst) && HARD_REGISTER_P (dst)
26861 && ix86_function_arg_regno_p (REGNO (dst)))
26863 /* Is it likely spilled HW register? */
26864 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26865 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26866 *is_spilled = true;
26872 /* Add output dependencies for chain of function adjacent arguments if only
26873 there is a move to likely spilled HW register. Return first argument
26874 if at least one dependence was added or NULL otherwise. */
26876 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26879 rtx_insn *last = call;
26880 rtx_insn *first_arg = NULL;
26881 bool is_spilled = false;
26883 head = PREV_INSN (head);
26885 /* Find nearest to call argument passing instruction. */
26888 last = PREV_INSN (last);
26891 if (!NONDEBUG_INSN_P (last))
26893 if (insn_is_function_arg (last, &is_spilled))
26901 insn = PREV_INSN (last);
26902 if (!INSN_P (insn))
26906 if (!NONDEBUG_INSN_P (insn))
26911 if (insn_is_function_arg (insn, &is_spilled))
26913 /* Add output depdendence between two function arguments if chain
26914 of output arguments contains likely spilled HW registers. */
26916 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26917 first_arg = last = insn;
26927 /* Add output or anti dependency from insn to first_arg to restrict its code
26930 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26935 /* Add anti dependencies for bounds stores. */
26937 && GET_CODE (PATTERN (insn)) == PARALLEL
26938 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26939 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26941 add_dependence (first_arg, insn, REG_DEP_ANTI);
26945 set = single_set (insn);
26948 tmp = SET_DEST (set);
26951 /* Add output dependency to the first function argument. */
26952 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26955 /* Add anti dependency. */
26956 add_dependence (first_arg, insn, REG_DEP_ANTI);
26959 /* Avoid cross block motion of function argument through adding dependency
26960 from the first non-jump instruction in bb. */
26962 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26964 rtx_insn *insn = BB_END (bb);
26968 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26970 rtx set = single_set (insn);
26973 avoid_func_arg_motion (arg, insn);
26977 if (insn == BB_HEAD (bb))
26979 insn = PREV_INSN (insn);
26983 /* Hook for pre-reload schedule - avoid motion of function arguments
26984 passed in likely spilled HW registers. */
26986 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26989 rtx_insn *first_arg = NULL;
26990 if (reload_completed)
26992 while (head != tail && DEBUG_INSN_P (head))
26993 head = NEXT_INSN (head);
26994 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26995 if (INSN_P (insn) && CALL_P (insn))
26997 first_arg = add_parameter_dependencies (insn, head);
27000 /* Add dependee for first argument to predecessors if only
27001 region contains more than one block. */
27002 basic_block bb = BLOCK_FOR_INSN (insn);
27003 int rgn = CONTAINING_RGN (bb->index);
27004 int nr_blks = RGN_NR_BLOCKS (rgn);
27005 /* Skip trivial regions and region head blocks that can have
27006 predecessors outside of region. */
27007 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27012 /* Regions are SCCs with the exception of selective
27013 scheduling with pipelining of outer blocks enabled.
27014 So also check that immediate predecessors of a non-head
27015 block are in the same region. */
27016 FOR_EACH_EDGE (e, ei, bb->preds)
27018 /* Avoid creating of loop-carried dependencies through
27019 using topological ordering in the region. */
27020 if (rgn == CONTAINING_RGN (e->src->index)
27021 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27022 add_dependee_for_func_arg (first_arg, e->src);
27030 else if (first_arg)
27031 avoid_func_arg_motion (first_arg, insn);
27034 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27035 HW registers to maximum, to schedule them at soon as possible. These are
27036 moves from function argument registers at the top of the function entry
27037 and moves from function return value registers after call. */
27039 ix86_adjust_priority (rtx_insn *insn, int priority)
27043 if (reload_completed)
27046 if (!NONDEBUG_INSN_P (insn))
27049 set = single_set (insn);
27052 rtx tmp = SET_SRC (set);
27054 && HARD_REGISTER_P (tmp)
27055 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27056 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27057 return current_sched_info->sched_max_insns_priority;
27063 /* Model decoder of Core 2/i7.
27064 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27065 track the instruction fetch block boundaries and make sure that long
27066 (9+ bytes) instructions are assigned to D0. */
27068 /* Maximum length of an insn that can be handled by
27069 a secondary decoder unit. '8' for Core 2/i7. */
27070 static int core2i7_secondary_decoder_max_insn_size;
27072 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27073 '16' for Core 2/i7. */
27074 static int core2i7_ifetch_block_size;
27076 /* Maximum number of instructions decoder can handle per cycle.
27077 '6' for Core 2/i7. */
27078 static int core2i7_ifetch_block_max_insns;
27080 typedef struct ix86_first_cycle_multipass_data_ *
27081 ix86_first_cycle_multipass_data_t;
27082 typedef const struct ix86_first_cycle_multipass_data_ *
27083 const_ix86_first_cycle_multipass_data_t;
27085 /* A variable to store target state across calls to max_issue within
27087 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27088 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27090 /* Initialize DATA. */
27092 core2i7_first_cycle_multipass_init (void *_data)
27094 ix86_first_cycle_multipass_data_t data
27095 = (ix86_first_cycle_multipass_data_t) _data;
27097 data->ifetch_block_len = 0;
27098 data->ifetch_block_n_insns = 0;
27099 data->ready_try_change = NULL;
27100 data->ready_try_change_size = 0;
27103 /* Advancing the cycle; reset ifetch block counts. */
27105 core2i7_dfa_post_advance_cycle (void)
27107 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27109 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27111 data->ifetch_block_len = 0;
27112 data->ifetch_block_n_insns = 0;
27115 static int min_insn_size (rtx_insn *);
27117 /* Filter out insns from ready_try that the core will not be able to issue
27118 on current cycle due to decoder. */
27120 core2i7_first_cycle_multipass_filter_ready_try
27121 (const_ix86_first_cycle_multipass_data_t data,
27122 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27129 if (ready_try[n_ready])
27132 insn = get_ready_element (n_ready);
27133 insn_size = min_insn_size (insn);
27135 if (/* If this is a too long an insn for a secondary decoder ... */
27136 (!first_cycle_insn_p
27137 && insn_size > core2i7_secondary_decoder_max_insn_size)
27138 /* ... or it would not fit into the ifetch block ... */
27139 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27140 /* ... or the decoder is full already ... */
27141 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27142 /* ... mask the insn out. */
27144 ready_try[n_ready] = 1;
27146 if (data->ready_try_change)
27147 bitmap_set_bit (data->ready_try_change, n_ready);
27152 /* Prepare for a new round of multipass lookahead scheduling. */
27154 core2i7_first_cycle_multipass_begin (void *_data,
27155 signed char *ready_try, int n_ready,
27156 bool first_cycle_insn_p)
27158 ix86_first_cycle_multipass_data_t data
27159 = (ix86_first_cycle_multipass_data_t) _data;
27160 const_ix86_first_cycle_multipass_data_t prev_data
27161 = ix86_first_cycle_multipass_data;
27163 /* Restore the state from the end of the previous round. */
27164 data->ifetch_block_len = prev_data->ifetch_block_len;
27165 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27167 /* Filter instructions that cannot be issued on current cycle due to
27168 decoder restrictions. */
27169 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27170 first_cycle_insn_p);
27173 /* INSN is being issued in current solution. Account for its impact on
27174 the decoder model. */
27176 core2i7_first_cycle_multipass_issue (void *_data,
27177 signed char *ready_try, int n_ready,
27178 rtx_insn *insn, const void *_prev_data)
27180 ix86_first_cycle_multipass_data_t data
27181 = (ix86_first_cycle_multipass_data_t) _data;
27182 const_ix86_first_cycle_multipass_data_t prev_data
27183 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27185 int insn_size = min_insn_size (insn);
27187 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27188 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27189 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27190 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27192 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27193 if (!data->ready_try_change)
27195 data->ready_try_change = sbitmap_alloc (n_ready);
27196 data->ready_try_change_size = n_ready;
27198 else if (data->ready_try_change_size < n_ready)
27200 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27202 data->ready_try_change_size = n_ready;
27204 bitmap_clear (data->ready_try_change);
27206 /* Filter out insns from ready_try that the core will not be able to issue
27207 on current cycle due to decoder. */
27208 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27212 /* Revert the effect on ready_try. */
27214 core2i7_first_cycle_multipass_backtrack (const void *_data,
27215 signed char *ready_try,
27216 int n_ready ATTRIBUTE_UNUSED)
27218 const_ix86_first_cycle_multipass_data_t data
27219 = (const_ix86_first_cycle_multipass_data_t) _data;
27220 unsigned int i = 0;
27221 sbitmap_iterator sbi;
27223 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27224 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27230 /* Save the result of multipass lookahead scheduling for the next round. */
27232 core2i7_first_cycle_multipass_end (const void *_data)
27234 const_ix86_first_cycle_multipass_data_t data
27235 = (const_ix86_first_cycle_multipass_data_t) _data;
27236 ix86_first_cycle_multipass_data_t next_data
27237 = ix86_first_cycle_multipass_data;
27241 next_data->ifetch_block_len = data->ifetch_block_len;
27242 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27246 /* Deallocate target data. */
27248 core2i7_first_cycle_multipass_fini (void *_data)
27250 ix86_first_cycle_multipass_data_t data
27251 = (ix86_first_cycle_multipass_data_t) _data;
27253 if (data->ready_try_change)
27255 sbitmap_free (data->ready_try_change);
27256 data->ready_try_change = NULL;
27257 data->ready_try_change_size = 0;
27261 /* Prepare for scheduling pass. */
27263 ix86_sched_init_global (FILE *, int, int)
27265 /* Install scheduling hooks for current CPU. Some of these hooks are used
27266 in time-critical parts of the scheduler, so we only set them up when
27267 they are actually used. */
27270 case PROCESSOR_CORE2:
27271 case PROCESSOR_NEHALEM:
27272 case PROCESSOR_SANDYBRIDGE:
27273 case PROCESSOR_HASWELL:
27274 /* Do not perform multipass scheduling for pre-reload schedule
27275 to save compile time. */
27276 if (reload_completed)
27278 targetm.sched.dfa_post_advance_cycle
27279 = core2i7_dfa_post_advance_cycle;
27280 targetm.sched.first_cycle_multipass_init
27281 = core2i7_first_cycle_multipass_init;
27282 targetm.sched.first_cycle_multipass_begin
27283 = core2i7_first_cycle_multipass_begin;
27284 targetm.sched.first_cycle_multipass_issue
27285 = core2i7_first_cycle_multipass_issue;
27286 targetm.sched.first_cycle_multipass_backtrack
27287 = core2i7_first_cycle_multipass_backtrack;
27288 targetm.sched.first_cycle_multipass_end
27289 = core2i7_first_cycle_multipass_end;
27290 targetm.sched.first_cycle_multipass_fini
27291 = core2i7_first_cycle_multipass_fini;
27293 /* Set decoder parameters. */
27294 core2i7_secondary_decoder_max_insn_size = 8;
27295 core2i7_ifetch_block_size = 16;
27296 core2i7_ifetch_block_max_insns = 6;
27299 /* ... Fall through ... */
27301 targetm.sched.dfa_post_advance_cycle = NULL;
27302 targetm.sched.first_cycle_multipass_init = NULL;
27303 targetm.sched.first_cycle_multipass_begin = NULL;
27304 targetm.sched.first_cycle_multipass_issue = NULL;
27305 targetm.sched.first_cycle_multipass_backtrack = NULL;
27306 targetm.sched.first_cycle_multipass_end = NULL;
27307 targetm.sched.first_cycle_multipass_fini = NULL;
27313 /* Compute the alignment given to a constant that is being placed in memory.
27314 EXP is the constant and ALIGN is the alignment that the object would
27316 The value of this function is used instead of that alignment to align
27320 ix86_constant_alignment (tree exp, int align)
27322 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27323 || TREE_CODE (exp) == INTEGER_CST)
27325 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27327 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27330 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27331 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27332 return BITS_PER_WORD;
27337 /* Compute the alignment for a static variable.
27338 TYPE is the data type, and ALIGN is the alignment that
27339 the object would ordinarily have. The value of this function is used
27340 instead of that alignment to align the object. */
27343 ix86_data_alignment (tree type, int align, bool opt)
27345 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27346 for symbols from other compilation units or symbols that don't need
27347 to bind locally. In order to preserve some ABI compatibility with
27348 those compilers, ensure we don't decrease alignment from what we
27351 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27353 /* A data structure, equal or greater than the size of a cache line
27354 (64 bytes in the Pentium 4 and other recent Intel processors, including
27355 processors based on Intel Core microarchitecture) should be aligned
27356 so that its base address is a multiple of a cache line size. */
27359 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27361 if (max_align < BITS_PER_WORD)
27362 max_align = BITS_PER_WORD;
27364 switch (ix86_align_data_type)
27366 case ix86_align_data_type_abi: opt = false; break;
27367 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27368 case ix86_align_data_type_cacheline: break;
27372 && AGGREGATE_TYPE_P (type)
27373 && TYPE_SIZE (type)
27374 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27376 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27377 && align < max_align_compat)
27378 align = max_align_compat;
27379 if (wi::geu_p (TYPE_SIZE (type), max_align)
27380 && align < max_align)
27384 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27385 to 16byte boundary. */
27388 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27389 && TYPE_SIZE (type)
27390 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27391 && wi::geu_p (TYPE_SIZE (type), 128)
27399 if (TREE_CODE (type) == ARRAY_TYPE)
27401 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27403 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27406 else if (TREE_CODE (type) == COMPLEX_TYPE)
27409 if (TYPE_MODE (type) == DCmode && align < 64)
27411 if ((TYPE_MODE (type) == XCmode
27412 || TYPE_MODE (type) == TCmode) && align < 128)
27415 else if ((TREE_CODE (type) == RECORD_TYPE
27416 || TREE_CODE (type) == UNION_TYPE
27417 || TREE_CODE (type) == QUAL_UNION_TYPE)
27418 && TYPE_FIELDS (type))
27420 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27422 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27425 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27426 || TREE_CODE (type) == INTEGER_TYPE)
27428 if (TYPE_MODE (type) == DFmode && align < 64)
27430 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27437 /* Compute the alignment for a local variable or a stack slot. EXP is
27438 the data type or decl itself, MODE is the widest mode available and
27439 ALIGN is the alignment that the object would ordinarily have. The
27440 value of this macro is used instead of that alignment to align the
27444 ix86_local_alignment (tree exp, machine_mode mode,
27445 unsigned int align)
27449 if (exp && DECL_P (exp))
27451 type = TREE_TYPE (exp);
27460 /* Don't do dynamic stack realignment for long long objects with
27461 -mpreferred-stack-boundary=2. */
27464 && ix86_preferred_stack_boundary < 64
27465 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27466 && (!type || !TYPE_USER_ALIGN (type))
27467 && (!decl || !DECL_USER_ALIGN (decl)))
27470 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27471 register in MODE. We will return the largest alignment of XF
27475 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27476 align = GET_MODE_ALIGNMENT (DFmode);
27480 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27481 to 16byte boundary. Exact wording is:
27483 An array uses the same alignment as its elements, except that a local or
27484 global array variable of length at least 16 bytes or
27485 a C99 variable-length array variable always has alignment of at least 16 bytes.
27487 This was added to allow use of aligned SSE instructions at arrays. This
27488 rule is meant for static storage (where compiler can not do the analysis
27489 by itself). We follow it for automatic variables only when convenient.
27490 We fully control everything in the function compiled and functions from
27491 other unit can not rely on the alignment.
27493 Exclude va_list type. It is the common case of local array where
27494 we can not benefit from the alignment.
27496 TODO: Probably one should optimize for size only when var is not escaping. */
27497 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27500 if (AGGREGATE_TYPE_P (type)
27501 && (va_list_type_node == NULL_TREE
27502 || (TYPE_MAIN_VARIANT (type)
27503 != TYPE_MAIN_VARIANT (va_list_type_node)))
27504 && TYPE_SIZE (type)
27505 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27506 && wi::geu_p (TYPE_SIZE (type), 16)
27510 if (TREE_CODE (type) == ARRAY_TYPE)
27512 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27514 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27517 else if (TREE_CODE (type) == COMPLEX_TYPE)
27519 if (TYPE_MODE (type) == DCmode && align < 64)
27521 if ((TYPE_MODE (type) == XCmode
27522 || TYPE_MODE (type) == TCmode) && align < 128)
27525 else if ((TREE_CODE (type) == RECORD_TYPE
27526 || TREE_CODE (type) == UNION_TYPE
27527 || TREE_CODE (type) == QUAL_UNION_TYPE)
27528 && TYPE_FIELDS (type))
27530 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27532 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27535 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27536 || TREE_CODE (type) == INTEGER_TYPE)
27539 if (TYPE_MODE (type) == DFmode && align < 64)
27541 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27547 /* Compute the minimum required alignment for dynamic stack realignment
27548 purposes for a local variable, parameter or a stack slot. EXP is
27549 the data type or decl itself, MODE is its mode and ALIGN is the
27550 alignment that the object would ordinarily have. */
27553 ix86_minimum_alignment (tree exp, machine_mode mode,
27554 unsigned int align)
27558 if (exp && DECL_P (exp))
27560 type = TREE_TYPE (exp);
27569 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27572 /* Don't do dynamic stack realignment for long long objects with
27573 -mpreferred-stack-boundary=2. */
27574 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27575 && (!type || !TYPE_USER_ALIGN (type))
27576 && (!decl || !DECL_USER_ALIGN (decl)))
27582 /* Find a location for the static chain incoming to a nested function.
27583 This is a register, unless all free registers are used by arguments. */
27586 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27590 /* While this function won't be called by the middle-end when a static
27591 chain isn't needed, it's also used throughout the backend so it's
27592 easiest to keep this check centralized. */
27593 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27598 /* We always use R10 in 64-bit mode. */
27603 const_tree fntype, fndecl;
27606 /* By default in 32-bit mode we use ECX to pass the static chain. */
27609 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27611 fntype = TREE_TYPE (fndecl_or_type);
27612 fndecl = fndecl_or_type;
27616 fntype = fndecl_or_type;
27620 ccvt = ix86_get_callcvt (fntype);
27621 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27623 /* Fastcall functions use ecx/edx for arguments, which leaves
27624 us with EAX for the static chain.
27625 Thiscall functions use ecx for arguments, which also
27626 leaves us with EAX for the static chain. */
27629 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27631 /* Thiscall functions use ecx for arguments, which leaves
27632 us with EAX and EDX for the static chain.
27633 We are using for abi-compatibility EAX. */
27636 else if (ix86_function_regparm (fntype, fndecl) == 3)
27638 /* For regparm 3, we have no free call-clobbered registers in
27639 which to store the static chain. In order to implement this,
27640 we have the trampoline push the static chain to the stack.
27641 However, we can't push a value below the return address when
27642 we call the nested function directly, so we have to use an
27643 alternate entry point. For this we use ESI, and have the
27644 alternate entry point push ESI, so that things appear the
27645 same once we're executing the nested function. */
27648 if (fndecl == current_function_decl)
27649 ix86_static_chain_on_stack = true;
27650 return gen_frame_mem (SImode,
27651 plus_constant (Pmode,
27652 arg_pointer_rtx, -8));
27658 return gen_rtx_REG (Pmode, regno);
27661 /* Emit RTL insns to initialize the variable parts of a trampoline.
27662 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27663 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27664 to be passed to the target function. */
27667 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27673 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27679 /* Load the function address to r11. Try to load address using
27680 the shorter movl instead of movabs. We may want to support
27681 movq for kernel mode, but kernel does not use trampolines at
27682 the moment. FNADDR is a 32bit address and may not be in
27683 DImode when ptr_mode == SImode. Always use movl in this
27685 if (ptr_mode == SImode
27686 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27688 fnaddr = copy_addr_to_reg (fnaddr);
27690 mem = adjust_address (m_tramp, HImode, offset);
27691 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27693 mem = adjust_address (m_tramp, SImode, offset + 2);
27694 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27699 mem = adjust_address (m_tramp, HImode, offset);
27700 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27702 mem = adjust_address (m_tramp, DImode, offset + 2);
27703 emit_move_insn (mem, fnaddr);
27707 /* Load static chain using movabs to r10. Use the shorter movl
27708 instead of movabs when ptr_mode == SImode. */
27709 if (ptr_mode == SImode)
27720 mem = adjust_address (m_tramp, HImode, offset);
27721 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27723 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27724 emit_move_insn (mem, chain_value);
27727 /* Jump to r11; the last (unused) byte is a nop, only there to
27728 pad the write out to a single 32-bit store. */
27729 mem = adjust_address (m_tramp, SImode, offset);
27730 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27737 /* Depending on the static chain location, either load a register
27738 with a constant, or push the constant to the stack. All of the
27739 instructions are the same size. */
27740 chain = ix86_static_chain (fndecl, true);
27743 switch (REGNO (chain))
27746 opcode = 0xb8; break;
27748 opcode = 0xb9; break;
27750 gcc_unreachable ();
27756 mem = adjust_address (m_tramp, QImode, offset);
27757 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27759 mem = adjust_address (m_tramp, SImode, offset + 1);
27760 emit_move_insn (mem, chain_value);
27763 mem = adjust_address (m_tramp, QImode, offset);
27764 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27766 mem = adjust_address (m_tramp, SImode, offset + 1);
27768 /* Compute offset from the end of the jmp to the target function.
27769 In the case in which the trampoline stores the static chain on
27770 the stack, we need to skip the first insn which pushes the
27771 (call-saved) register static chain; this push is 1 byte. */
27773 disp = expand_binop (SImode, sub_optab, fnaddr,
27774 plus_constant (Pmode, XEXP (m_tramp, 0),
27775 offset - (MEM_P (chain) ? 1 : 0)),
27776 NULL_RTX, 1, OPTAB_DIRECT);
27777 emit_move_insn (mem, disp);
27780 gcc_assert (offset <= TRAMPOLINE_SIZE);
27782 #ifdef HAVE_ENABLE_EXECUTE_STACK
27783 #ifdef CHECK_EXECUTE_STACK_ENABLED
27784 if (CHECK_EXECUTE_STACK_ENABLED)
27786 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27787 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27791 /* The following file contains several enumerations and data structures
27792 built from the definitions in i386-builtin-types.def. */
27794 #include "i386-builtin-types.inc"
27796 /* Table for the ix86 builtin non-function types. */
27797 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27799 /* Retrieve an element from the above table, building some of
27800 the types lazily. */
27803 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27805 unsigned int index;
27808 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27810 type = ix86_builtin_type_tab[(int) tcode];
27814 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27815 if (tcode <= IX86_BT_LAST_VECT)
27819 index = tcode - IX86_BT_LAST_PRIM - 1;
27820 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27821 mode = ix86_builtin_type_vect_mode[index];
27823 type = build_vector_type_for_mode (itype, mode);
27829 index = tcode - IX86_BT_LAST_VECT - 1;
27830 if (tcode <= IX86_BT_LAST_PTR)
27831 quals = TYPE_UNQUALIFIED;
27833 quals = TYPE_QUAL_CONST;
27835 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27836 if (quals != TYPE_UNQUALIFIED)
27837 itype = build_qualified_type (itype, quals);
27839 type = build_pointer_type (itype);
27842 ix86_builtin_type_tab[(int) tcode] = type;
27846 /* Table for the ix86 builtin function types. */
27847 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27849 /* Retrieve an element from the above table, building some of
27850 the types lazily. */
27853 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27857 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27859 type = ix86_builtin_func_type_tab[(int) tcode];
27863 if (tcode <= IX86_BT_LAST_FUNC)
27865 unsigned start = ix86_builtin_func_start[(int) tcode];
27866 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27867 tree rtype, atype, args = void_list_node;
27870 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27871 for (i = after - 1; i > start; --i)
27873 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27874 args = tree_cons (NULL, atype, args);
27877 type = build_function_type (rtype, args);
27881 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27882 enum ix86_builtin_func_type icode;
27884 icode = ix86_builtin_func_alias_base[index];
27885 type = ix86_get_builtin_func_type (icode);
27888 ix86_builtin_func_type_tab[(int) tcode] = type;
27893 /* Codes for all the SSE/MMX builtins. */
27896 IX86_BUILTIN_ADDPS,
27897 IX86_BUILTIN_ADDSS,
27898 IX86_BUILTIN_DIVPS,
27899 IX86_BUILTIN_DIVSS,
27900 IX86_BUILTIN_MULPS,
27901 IX86_BUILTIN_MULSS,
27902 IX86_BUILTIN_SUBPS,
27903 IX86_BUILTIN_SUBSS,
27905 IX86_BUILTIN_CMPEQPS,
27906 IX86_BUILTIN_CMPLTPS,
27907 IX86_BUILTIN_CMPLEPS,
27908 IX86_BUILTIN_CMPGTPS,
27909 IX86_BUILTIN_CMPGEPS,
27910 IX86_BUILTIN_CMPNEQPS,
27911 IX86_BUILTIN_CMPNLTPS,
27912 IX86_BUILTIN_CMPNLEPS,
27913 IX86_BUILTIN_CMPNGTPS,
27914 IX86_BUILTIN_CMPNGEPS,
27915 IX86_BUILTIN_CMPORDPS,
27916 IX86_BUILTIN_CMPUNORDPS,
27917 IX86_BUILTIN_CMPEQSS,
27918 IX86_BUILTIN_CMPLTSS,
27919 IX86_BUILTIN_CMPLESS,
27920 IX86_BUILTIN_CMPNEQSS,
27921 IX86_BUILTIN_CMPNLTSS,
27922 IX86_BUILTIN_CMPNLESS,
27923 IX86_BUILTIN_CMPORDSS,
27924 IX86_BUILTIN_CMPUNORDSS,
27926 IX86_BUILTIN_COMIEQSS,
27927 IX86_BUILTIN_COMILTSS,
27928 IX86_BUILTIN_COMILESS,
27929 IX86_BUILTIN_COMIGTSS,
27930 IX86_BUILTIN_COMIGESS,
27931 IX86_BUILTIN_COMINEQSS,
27932 IX86_BUILTIN_UCOMIEQSS,
27933 IX86_BUILTIN_UCOMILTSS,
27934 IX86_BUILTIN_UCOMILESS,
27935 IX86_BUILTIN_UCOMIGTSS,
27936 IX86_BUILTIN_UCOMIGESS,
27937 IX86_BUILTIN_UCOMINEQSS,
27939 IX86_BUILTIN_CVTPI2PS,
27940 IX86_BUILTIN_CVTPS2PI,
27941 IX86_BUILTIN_CVTSI2SS,
27942 IX86_BUILTIN_CVTSI642SS,
27943 IX86_BUILTIN_CVTSS2SI,
27944 IX86_BUILTIN_CVTSS2SI64,
27945 IX86_BUILTIN_CVTTPS2PI,
27946 IX86_BUILTIN_CVTTSS2SI,
27947 IX86_BUILTIN_CVTTSS2SI64,
27949 IX86_BUILTIN_MAXPS,
27950 IX86_BUILTIN_MAXSS,
27951 IX86_BUILTIN_MINPS,
27952 IX86_BUILTIN_MINSS,
27954 IX86_BUILTIN_LOADUPS,
27955 IX86_BUILTIN_STOREUPS,
27956 IX86_BUILTIN_MOVSS,
27958 IX86_BUILTIN_MOVHLPS,
27959 IX86_BUILTIN_MOVLHPS,
27960 IX86_BUILTIN_LOADHPS,
27961 IX86_BUILTIN_LOADLPS,
27962 IX86_BUILTIN_STOREHPS,
27963 IX86_BUILTIN_STORELPS,
27965 IX86_BUILTIN_MASKMOVQ,
27966 IX86_BUILTIN_MOVMSKPS,
27967 IX86_BUILTIN_PMOVMSKB,
27969 IX86_BUILTIN_MOVNTPS,
27970 IX86_BUILTIN_MOVNTQ,
27972 IX86_BUILTIN_LOADDQU,
27973 IX86_BUILTIN_STOREDQU,
27975 IX86_BUILTIN_PACKSSWB,
27976 IX86_BUILTIN_PACKSSDW,
27977 IX86_BUILTIN_PACKUSWB,
27979 IX86_BUILTIN_PADDB,
27980 IX86_BUILTIN_PADDW,
27981 IX86_BUILTIN_PADDD,
27982 IX86_BUILTIN_PADDQ,
27983 IX86_BUILTIN_PADDSB,
27984 IX86_BUILTIN_PADDSW,
27985 IX86_BUILTIN_PADDUSB,
27986 IX86_BUILTIN_PADDUSW,
27987 IX86_BUILTIN_PSUBB,
27988 IX86_BUILTIN_PSUBW,
27989 IX86_BUILTIN_PSUBD,
27990 IX86_BUILTIN_PSUBQ,
27991 IX86_BUILTIN_PSUBSB,
27992 IX86_BUILTIN_PSUBSW,
27993 IX86_BUILTIN_PSUBUSB,
27994 IX86_BUILTIN_PSUBUSW,
27997 IX86_BUILTIN_PANDN,
28001 IX86_BUILTIN_PAVGB,
28002 IX86_BUILTIN_PAVGW,
28004 IX86_BUILTIN_PCMPEQB,
28005 IX86_BUILTIN_PCMPEQW,
28006 IX86_BUILTIN_PCMPEQD,
28007 IX86_BUILTIN_PCMPGTB,
28008 IX86_BUILTIN_PCMPGTW,
28009 IX86_BUILTIN_PCMPGTD,
28011 IX86_BUILTIN_PMADDWD,
28013 IX86_BUILTIN_PMAXSW,
28014 IX86_BUILTIN_PMAXUB,
28015 IX86_BUILTIN_PMINSW,
28016 IX86_BUILTIN_PMINUB,
28018 IX86_BUILTIN_PMULHUW,
28019 IX86_BUILTIN_PMULHW,
28020 IX86_BUILTIN_PMULLW,
28022 IX86_BUILTIN_PSADBW,
28023 IX86_BUILTIN_PSHUFW,
28025 IX86_BUILTIN_PSLLW,
28026 IX86_BUILTIN_PSLLD,
28027 IX86_BUILTIN_PSLLQ,
28028 IX86_BUILTIN_PSRAW,
28029 IX86_BUILTIN_PSRAD,
28030 IX86_BUILTIN_PSRLW,
28031 IX86_BUILTIN_PSRLD,
28032 IX86_BUILTIN_PSRLQ,
28033 IX86_BUILTIN_PSLLWI,
28034 IX86_BUILTIN_PSLLDI,
28035 IX86_BUILTIN_PSLLQI,
28036 IX86_BUILTIN_PSRAWI,
28037 IX86_BUILTIN_PSRADI,
28038 IX86_BUILTIN_PSRLWI,
28039 IX86_BUILTIN_PSRLDI,
28040 IX86_BUILTIN_PSRLQI,
28042 IX86_BUILTIN_PUNPCKHBW,
28043 IX86_BUILTIN_PUNPCKHWD,
28044 IX86_BUILTIN_PUNPCKHDQ,
28045 IX86_BUILTIN_PUNPCKLBW,
28046 IX86_BUILTIN_PUNPCKLWD,
28047 IX86_BUILTIN_PUNPCKLDQ,
28049 IX86_BUILTIN_SHUFPS,
28051 IX86_BUILTIN_RCPPS,
28052 IX86_BUILTIN_RCPSS,
28053 IX86_BUILTIN_RSQRTPS,
28054 IX86_BUILTIN_RSQRTPS_NR,
28055 IX86_BUILTIN_RSQRTSS,
28056 IX86_BUILTIN_RSQRTF,
28057 IX86_BUILTIN_SQRTPS,
28058 IX86_BUILTIN_SQRTPS_NR,
28059 IX86_BUILTIN_SQRTSS,
28061 IX86_BUILTIN_UNPCKHPS,
28062 IX86_BUILTIN_UNPCKLPS,
28064 IX86_BUILTIN_ANDPS,
28065 IX86_BUILTIN_ANDNPS,
28067 IX86_BUILTIN_XORPS,
28070 IX86_BUILTIN_LDMXCSR,
28071 IX86_BUILTIN_STMXCSR,
28072 IX86_BUILTIN_SFENCE,
28074 IX86_BUILTIN_FXSAVE,
28075 IX86_BUILTIN_FXRSTOR,
28076 IX86_BUILTIN_FXSAVE64,
28077 IX86_BUILTIN_FXRSTOR64,
28079 IX86_BUILTIN_XSAVE,
28080 IX86_BUILTIN_XRSTOR,
28081 IX86_BUILTIN_XSAVE64,
28082 IX86_BUILTIN_XRSTOR64,
28084 IX86_BUILTIN_XSAVEOPT,
28085 IX86_BUILTIN_XSAVEOPT64,
28087 IX86_BUILTIN_XSAVEC,
28088 IX86_BUILTIN_XSAVEC64,
28090 IX86_BUILTIN_XSAVES,
28091 IX86_BUILTIN_XRSTORS,
28092 IX86_BUILTIN_XSAVES64,
28093 IX86_BUILTIN_XRSTORS64,
28095 /* 3DNow! Original */
28096 IX86_BUILTIN_FEMMS,
28097 IX86_BUILTIN_PAVGUSB,
28098 IX86_BUILTIN_PF2ID,
28099 IX86_BUILTIN_PFACC,
28100 IX86_BUILTIN_PFADD,
28101 IX86_BUILTIN_PFCMPEQ,
28102 IX86_BUILTIN_PFCMPGE,
28103 IX86_BUILTIN_PFCMPGT,
28104 IX86_BUILTIN_PFMAX,
28105 IX86_BUILTIN_PFMIN,
28106 IX86_BUILTIN_PFMUL,
28107 IX86_BUILTIN_PFRCP,
28108 IX86_BUILTIN_PFRCPIT1,
28109 IX86_BUILTIN_PFRCPIT2,
28110 IX86_BUILTIN_PFRSQIT1,
28111 IX86_BUILTIN_PFRSQRT,
28112 IX86_BUILTIN_PFSUB,
28113 IX86_BUILTIN_PFSUBR,
28114 IX86_BUILTIN_PI2FD,
28115 IX86_BUILTIN_PMULHRW,
28117 /* 3DNow! Athlon Extensions */
28118 IX86_BUILTIN_PF2IW,
28119 IX86_BUILTIN_PFNACC,
28120 IX86_BUILTIN_PFPNACC,
28121 IX86_BUILTIN_PI2FW,
28122 IX86_BUILTIN_PSWAPDSI,
28123 IX86_BUILTIN_PSWAPDSF,
28126 IX86_BUILTIN_ADDPD,
28127 IX86_BUILTIN_ADDSD,
28128 IX86_BUILTIN_DIVPD,
28129 IX86_BUILTIN_DIVSD,
28130 IX86_BUILTIN_MULPD,
28131 IX86_BUILTIN_MULSD,
28132 IX86_BUILTIN_SUBPD,
28133 IX86_BUILTIN_SUBSD,
28135 IX86_BUILTIN_CMPEQPD,
28136 IX86_BUILTIN_CMPLTPD,
28137 IX86_BUILTIN_CMPLEPD,
28138 IX86_BUILTIN_CMPGTPD,
28139 IX86_BUILTIN_CMPGEPD,
28140 IX86_BUILTIN_CMPNEQPD,
28141 IX86_BUILTIN_CMPNLTPD,
28142 IX86_BUILTIN_CMPNLEPD,
28143 IX86_BUILTIN_CMPNGTPD,
28144 IX86_BUILTIN_CMPNGEPD,
28145 IX86_BUILTIN_CMPORDPD,
28146 IX86_BUILTIN_CMPUNORDPD,
28147 IX86_BUILTIN_CMPEQSD,
28148 IX86_BUILTIN_CMPLTSD,
28149 IX86_BUILTIN_CMPLESD,
28150 IX86_BUILTIN_CMPNEQSD,
28151 IX86_BUILTIN_CMPNLTSD,
28152 IX86_BUILTIN_CMPNLESD,
28153 IX86_BUILTIN_CMPORDSD,
28154 IX86_BUILTIN_CMPUNORDSD,
28156 IX86_BUILTIN_COMIEQSD,
28157 IX86_BUILTIN_COMILTSD,
28158 IX86_BUILTIN_COMILESD,
28159 IX86_BUILTIN_COMIGTSD,
28160 IX86_BUILTIN_COMIGESD,
28161 IX86_BUILTIN_COMINEQSD,
28162 IX86_BUILTIN_UCOMIEQSD,
28163 IX86_BUILTIN_UCOMILTSD,
28164 IX86_BUILTIN_UCOMILESD,
28165 IX86_BUILTIN_UCOMIGTSD,
28166 IX86_BUILTIN_UCOMIGESD,
28167 IX86_BUILTIN_UCOMINEQSD,
28169 IX86_BUILTIN_MAXPD,
28170 IX86_BUILTIN_MAXSD,
28171 IX86_BUILTIN_MINPD,
28172 IX86_BUILTIN_MINSD,
28174 IX86_BUILTIN_ANDPD,
28175 IX86_BUILTIN_ANDNPD,
28177 IX86_BUILTIN_XORPD,
28179 IX86_BUILTIN_SQRTPD,
28180 IX86_BUILTIN_SQRTSD,
28182 IX86_BUILTIN_UNPCKHPD,
28183 IX86_BUILTIN_UNPCKLPD,
28185 IX86_BUILTIN_SHUFPD,
28187 IX86_BUILTIN_LOADUPD,
28188 IX86_BUILTIN_STOREUPD,
28189 IX86_BUILTIN_MOVSD,
28191 IX86_BUILTIN_LOADHPD,
28192 IX86_BUILTIN_LOADLPD,
28194 IX86_BUILTIN_CVTDQ2PD,
28195 IX86_BUILTIN_CVTDQ2PS,
28197 IX86_BUILTIN_CVTPD2DQ,
28198 IX86_BUILTIN_CVTPD2PI,
28199 IX86_BUILTIN_CVTPD2PS,
28200 IX86_BUILTIN_CVTTPD2DQ,
28201 IX86_BUILTIN_CVTTPD2PI,
28203 IX86_BUILTIN_CVTPI2PD,
28204 IX86_BUILTIN_CVTSI2SD,
28205 IX86_BUILTIN_CVTSI642SD,
28207 IX86_BUILTIN_CVTSD2SI,
28208 IX86_BUILTIN_CVTSD2SI64,
28209 IX86_BUILTIN_CVTSD2SS,
28210 IX86_BUILTIN_CVTSS2SD,
28211 IX86_BUILTIN_CVTTSD2SI,
28212 IX86_BUILTIN_CVTTSD2SI64,
28214 IX86_BUILTIN_CVTPS2DQ,
28215 IX86_BUILTIN_CVTPS2PD,
28216 IX86_BUILTIN_CVTTPS2DQ,
28218 IX86_BUILTIN_MOVNTI,
28219 IX86_BUILTIN_MOVNTI64,
28220 IX86_BUILTIN_MOVNTPD,
28221 IX86_BUILTIN_MOVNTDQ,
28223 IX86_BUILTIN_MOVQ128,
28226 IX86_BUILTIN_MASKMOVDQU,
28227 IX86_BUILTIN_MOVMSKPD,
28228 IX86_BUILTIN_PMOVMSKB128,
28230 IX86_BUILTIN_PACKSSWB128,
28231 IX86_BUILTIN_PACKSSDW128,
28232 IX86_BUILTIN_PACKUSWB128,
28234 IX86_BUILTIN_PADDB128,
28235 IX86_BUILTIN_PADDW128,
28236 IX86_BUILTIN_PADDD128,
28237 IX86_BUILTIN_PADDQ128,
28238 IX86_BUILTIN_PADDSB128,
28239 IX86_BUILTIN_PADDSW128,
28240 IX86_BUILTIN_PADDUSB128,
28241 IX86_BUILTIN_PADDUSW128,
28242 IX86_BUILTIN_PSUBB128,
28243 IX86_BUILTIN_PSUBW128,
28244 IX86_BUILTIN_PSUBD128,
28245 IX86_BUILTIN_PSUBQ128,
28246 IX86_BUILTIN_PSUBSB128,
28247 IX86_BUILTIN_PSUBSW128,
28248 IX86_BUILTIN_PSUBUSB128,
28249 IX86_BUILTIN_PSUBUSW128,
28251 IX86_BUILTIN_PAND128,
28252 IX86_BUILTIN_PANDN128,
28253 IX86_BUILTIN_POR128,
28254 IX86_BUILTIN_PXOR128,
28256 IX86_BUILTIN_PAVGB128,
28257 IX86_BUILTIN_PAVGW128,
28259 IX86_BUILTIN_PCMPEQB128,
28260 IX86_BUILTIN_PCMPEQW128,
28261 IX86_BUILTIN_PCMPEQD128,
28262 IX86_BUILTIN_PCMPGTB128,
28263 IX86_BUILTIN_PCMPGTW128,
28264 IX86_BUILTIN_PCMPGTD128,
28266 IX86_BUILTIN_PMADDWD128,
28268 IX86_BUILTIN_PMAXSW128,
28269 IX86_BUILTIN_PMAXUB128,
28270 IX86_BUILTIN_PMINSW128,
28271 IX86_BUILTIN_PMINUB128,
28273 IX86_BUILTIN_PMULUDQ,
28274 IX86_BUILTIN_PMULUDQ128,
28275 IX86_BUILTIN_PMULHUW128,
28276 IX86_BUILTIN_PMULHW128,
28277 IX86_BUILTIN_PMULLW128,
28279 IX86_BUILTIN_PSADBW128,
28280 IX86_BUILTIN_PSHUFHW,
28281 IX86_BUILTIN_PSHUFLW,
28282 IX86_BUILTIN_PSHUFD,
28284 IX86_BUILTIN_PSLLDQI128,
28285 IX86_BUILTIN_PSLLWI128,
28286 IX86_BUILTIN_PSLLDI128,
28287 IX86_BUILTIN_PSLLQI128,
28288 IX86_BUILTIN_PSRAWI128,
28289 IX86_BUILTIN_PSRADI128,
28290 IX86_BUILTIN_PSRLDQI128,
28291 IX86_BUILTIN_PSRLWI128,
28292 IX86_BUILTIN_PSRLDI128,
28293 IX86_BUILTIN_PSRLQI128,
28295 IX86_BUILTIN_PSLLDQ128,
28296 IX86_BUILTIN_PSLLW128,
28297 IX86_BUILTIN_PSLLD128,
28298 IX86_BUILTIN_PSLLQ128,
28299 IX86_BUILTIN_PSRAW128,
28300 IX86_BUILTIN_PSRAD128,
28301 IX86_BUILTIN_PSRLW128,
28302 IX86_BUILTIN_PSRLD128,
28303 IX86_BUILTIN_PSRLQ128,
28305 IX86_BUILTIN_PUNPCKHBW128,
28306 IX86_BUILTIN_PUNPCKHWD128,
28307 IX86_BUILTIN_PUNPCKHDQ128,
28308 IX86_BUILTIN_PUNPCKHQDQ128,
28309 IX86_BUILTIN_PUNPCKLBW128,
28310 IX86_BUILTIN_PUNPCKLWD128,
28311 IX86_BUILTIN_PUNPCKLDQ128,
28312 IX86_BUILTIN_PUNPCKLQDQ128,
28314 IX86_BUILTIN_CLFLUSH,
28315 IX86_BUILTIN_MFENCE,
28316 IX86_BUILTIN_LFENCE,
28317 IX86_BUILTIN_PAUSE,
28319 IX86_BUILTIN_FNSTENV,
28320 IX86_BUILTIN_FLDENV,
28321 IX86_BUILTIN_FNSTSW,
28322 IX86_BUILTIN_FNCLEX,
28324 IX86_BUILTIN_BSRSI,
28325 IX86_BUILTIN_BSRDI,
28326 IX86_BUILTIN_RDPMC,
28327 IX86_BUILTIN_RDTSC,
28328 IX86_BUILTIN_RDTSCP,
28329 IX86_BUILTIN_ROLQI,
28330 IX86_BUILTIN_ROLHI,
28331 IX86_BUILTIN_RORQI,
28332 IX86_BUILTIN_RORHI,
28335 IX86_BUILTIN_ADDSUBPS,
28336 IX86_BUILTIN_HADDPS,
28337 IX86_BUILTIN_HSUBPS,
28338 IX86_BUILTIN_MOVSHDUP,
28339 IX86_BUILTIN_MOVSLDUP,
28340 IX86_BUILTIN_ADDSUBPD,
28341 IX86_BUILTIN_HADDPD,
28342 IX86_BUILTIN_HSUBPD,
28343 IX86_BUILTIN_LDDQU,
28345 IX86_BUILTIN_MONITOR,
28346 IX86_BUILTIN_MWAIT,
28349 IX86_BUILTIN_PHADDW,
28350 IX86_BUILTIN_PHADDD,
28351 IX86_BUILTIN_PHADDSW,
28352 IX86_BUILTIN_PHSUBW,
28353 IX86_BUILTIN_PHSUBD,
28354 IX86_BUILTIN_PHSUBSW,
28355 IX86_BUILTIN_PMADDUBSW,
28356 IX86_BUILTIN_PMULHRSW,
28357 IX86_BUILTIN_PSHUFB,
28358 IX86_BUILTIN_PSIGNB,
28359 IX86_BUILTIN_PSIGNW,
28360 IX86_BUILTIN_PSIGND,
28361 IX86_BUILTIN_PALIGNR,
28362 IX86_BUILTIN_PABSB,
28363 IX86_BUILTIN_PABSW,
28364 IX86_BUILTIN_PABSD,
28366 IX86_BUILTIN_PHADDW128,
28367 IX86_BUILTIN_PHADDD128,
28368 IX86_BUILTIN_PHADDSW128,
28369 IX86_BUILTIN_PHSUBW128,
28370 IX86_BUILTIN_PHSUBD128,
28371 IX86_BUILTIN_PHSUBSW128,
28372 IX86_BUILTIN_PMADDUBSW128,
28373 IX86_BUILTIN_PMULHRSW128,
28374 IX86_BUILTIN_PSHUFB128,
28375 IX86_BUILTIN_PSIGNB128,
28376 IX86_BUILTIN_PSIGNW128,
28377 IX86_BUILTIN_PSIGND128,
28378 IX86_BUILTIN_PALIGNR128,
28379 IX86_BUILTIN_PABSB128,
28380 IX86_BUILTIN_PABSW128,
28381 IX86_BUILTIN_PABSD128,
28383 /* AMDFAM10 - SSE4A New Instructions. */
28384 IX86_BUILTIN_MOVNTSD,
28385 IX86_BUILTIN_MOVNTSS,
28386 IX86_BUILTIN_EXTRQI,
28387 IX86_BUILTIN_EXTRQ,
28388 IX86_BUILTIN_INSERTQI,
28389 IX86_BUILTIN_INSERTQ,
28392 IX86_BUILTIN_BLENDPD,
28393 IX86_BUILTIN_BLENDPS,
28394 IX86_BUILTIN_BLENDVPD,
28395 IX86_BUILTIN_BLENDVPS,
28396 IX86_BUILTIN_PBLENDVB128,
28397 IX86_BUILTIN_PBLENDW128,
28402 IX86_BUILTIN_INSERTPS128,
28404 IX86_BUILTIN_MOVNTDQA,
28405 IX86_BUILTIN_MPSADBW128,
28406 IX86_BUILTIN_PACKUSDW128,
28407 IX86_BUILTIN_PCMPEQQ,
28408 IX86_BUILTIN_PHMINPOSUW128,
28410 IX86_BUILTIN_PMAXSB128,
28411 IX86_BUILTIN_PMAXSD128,
28412 IX86_BUILTIN_PMAXUD128,
28413 IX86_BUILTIN_PMAXUW128,
28415 IX86_BUILTIN_PMINSB128,
28416 IX86_BUILTIN_PMINSD128,
28417 IX86_BUILTIN_PMINUD128,
28418 IX86_BUILTIN_PMINUW128,
28420 IX86_BUILTIN_PMOVSXBW128,
28421 IX86_BUILTIN_PMOVSXBD128,
28422 IX86_BUILTIN_PMOVSXBQ128,
28423 IX86_BUILTIN_PMOVSXWD128,
28424 IX86_BUILTIN_PMOVSXWQ128,
28425 IX86_BUILTIN_PMOVSXDQ128,
28427 IX86_BUILTIN_PMOVZXBW128,
28428 IX86_BUILTIN_PMOVZXBD128,
28429 IX86_BUILTIN_PMOVZXBQ128,
28430 IX86_BUILTIN_PMOVZXWD128,
28431 IX86_BUILTIN_PMOVZXWQ128,
28432 IX86_BUILTIN_PMOVZXDQ128,
28434 IX86_BUILTIN_PMULDQ128,
28435 IX86_BUILTIN_PMULLD128,
28437 IX86_BUILTIN_ROUNDSD,
28438 IX86_BUILTIN_ROUNDSS,
28440 IX86_BUILTIN_ROUNDPD,
28441 IX86_BUILTIN_ROUNDPS,
28443 IX86_BUILTIN_FLOORPD,
28444 IX86_BUILTIN_CEILPD,
28445 IX86_BUILTIN_TRUNCPD,
28446 IX86_BUILTIN_RINTPD,
28447 IX86_BUILTIN_ROUNDPD_AZ,
28449 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28450 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28451 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28453 IX86_BUILTIN_FLOORPS,
28454 IX86_BUILTIN_CEILPS,
28455 IX86_BUILTIN_TRUNCPS,
28456 IX86_BUILTIN_RINTPS,
28457 IX86_BUILTIN_ROUNDPS_AZ,
28459 IX86_BUILTIN_FLOORPS_SFIX,
28460 IX86_BUILTIN_CEILPS_SFIX,
28461 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28463 IX86_BUILTIN_PTESTZ,
28464 IX86_BUILTIN_PTESTC,
28465 IX86_BUILTIN_PTESTNZC,
28467 IX86_BUILTIN_VEC_INIT_V2SI,
28468 IX86_BUILTIN_VEC_INIT_V4HI,
28469 IX86_BUILTIN_VEC_INIT_V8QI,
28470 IX86_BUILTIN_VEC_EXT_V2DF,
28471 IX86_BUILTIN_VEC_EXT_V2DI,
28472 IX86_BUILTIN_VEC_EXT_V4SF,
28473 IX86_BUILTIN_VEC_EXT_V4SI,
28474 IX86_BUILTIN_VEC_EXT_V8HI,
28475 IX86_BUILTIN_VEC_EXT_V2SI,
28476 IX86_BUILTIN_VEC_EXT_V4HI,
28477 IX86_BUILTIN_VEC_EXT_V16QI,
28478 IX86_BUILTIN_VEC_SET_V2DI,
28479 IX86_BUILTIN_VEC_SET_V4SF,
28480 IX86_BUILTIN_VEC_SET_V4SI,
28481 IX86_BUILTIN_VEC_SET_V8HI,
28482 IX86_BUILTIN_VEC_SET_V4HI,
28483 IX86_BUILTIN_VEC_SET_V16QI,
28485 IX86_BUILTIN_VEC_PACK_SFIX,
28486 IX86_BUILTIN_VEC_PACK_SFIX256,
28489 IX86_BUILTIN_CRC32QI,
28490 IX86_BUILTIN_CRC32HI,
28491 IX86_BUILTIN_CRC32SI,
28492 IX86_BUILTIN_CRC32DI,
28494 IX86_BUILTIN_PCMPESTRI128,
28495 IX86_BUILTIN_PCMPESTRM128,
28496 IX86_BUILTIN_PCMPESTRA128,
28497 IX86_BUILTIN_PCMPESTRC128,
28498 IX86_BUILTIN_PCMPESTRO128,
28499 IX86_BUILTIN_PCMPESTRS128,
28500 IX86_BUILTIN_PCMPESTRZ128,
28501 IX86_BUILTIN_PCMPISTRI128,
28502 IX86_BUILTIN_PCMPISTRM128,
28503 IX86_BUILTIN_PCMPISTRA128,
28504 IX86_BUILTIN_PCMPISTRC128,
28505 IX86_BUILTIN_PCMPISTRO128,
28506 IX86_BUILTIN_PCMPISTRS128,
28507 IX86_BUILTIN_PCMPISTRZ128,
28509 IX86_BUILTIN_PCMPGTQ,
28511 /* AES instructions */
28512 IX86_BUILTIN_AESENC128,
28513 IX86_BUILTIN_AESENCLAST128,
28514 IX86_BUILTIN_AESDEC128,
28515 IX86_BUILTIN_AESDECLAST128,
28516 IX86_BUILTIN_AESIMC128,
28517 IX86_BUILTIN_AESKEYGENASSIST128,
28519 /* PCLMUL instruction */
28520 IX86_BUILTIN_PCLMULQDQ128,
28523 IX86_BUILTIN_ADDPD256,
28524 IX86_BUILTIN_ADDPS256,
28525 IX86_BUILTIN_ADDSUBPD256,
28526 IX86_BUILTIN_ADDSUBPS256,
28527 IX86_BUILTIN_ANDPD256,
28528 IX86_BUILTIN_ANDPS256,
28529 IX86_BUILTIN_ANDNPD256,
28530 IX86_BUILTIN_ANDNPS256,
28531 IX86_BUILTIN_BLENDPD256,
28532 IX86_BUILTIN_BLENDPS256,
28533 IX86_BUILTIN_BLENDVPD256,
28534 IX86_BUILTIN_BLENDVPS256,
28535 IX86_BUILTIN_DIVPD256,
28536 IX86_BUILTIN_DIVPS256,
28537 IX86_BUILTIN_DPPS256,
28538 IX86_BUILTIN_HADDPD256,
28539 IX86_BUILTIN_HADDPS256,
28540 IX86_BUILTIN_HSUBPD256,
28541 IX86_BUILTIN_HSUBPS256,
28542 IX86_BUILTIN_MAXPD256,
28543 IX86_BUILTIN_MAXPS256,
28544 IX86_BUILTIN_MINPD256,
28545 IX86_BUILTIN_MINPS256,
28546 IX86_BUILTIN_MULPD256,
28547 IX86_BUILTIN_MULPS256,
28548 IX86_BUILTIN_ORPD256,
28549 IX86_BUILTIN_ORPS256,
28550 IX86_BUILTIN_SHUFPD256,
28551 IX86_BUILTIN_SHUFPS256,
28552 IX86_BUILTIN_SUBPD256,
28553 IX86_BUILTIN_SUBPS256,
28554 IX86_BUILTIN_XORPD256,
28555 IX86_BUILTIN_XORPS256,
28556 IX86_BUILTIN_CMPSD,
28557 IX86_BUILTIN_CMPSS,
28558 IX86_BUILTIN_CMPPD,
28559 IX86_BUILTIN_CMPPS,
28560 IX86_BUILTIN_CMPPD256,
28561 IX86_BUILTIN_CMPPS256,
28562 IX86_BUILTIN_CVTDQ2PD256,
28563 IX86_BUILTIN_CVTDQ2PS256,
28564 IX86_BUILTIN_CVTPD2PS256,
28565 IX86_BUILTIN_CVTPS2DQ256,
28566 IX86_BUILTIN_CVTPS2PD256,
28567 IX86_BUILTIN_CVTTPD2DQ256,
28568 IX86_BUILTIN_CVTPD2DQ256,
28569 IX86_BUILTIN_CVTTPS2DQ256,
28570 IX86_BUILTIN_EXTRACTF128PD256,
28571 IX86_BUILTIN_EXTRACTF128PS256,
28572 IX86_BUILTIN_EXTRACTF128SI256,
28573 IX86_BUILTIN_VZEROALL,
28574 IX86_BUILTIN_VZEROUPPER,
28575 IX86_BUILTIN_VPERMILVARPD,
28576 IX86_BUILTIN_VPERMILVARPS,
28577 IX86_BUILTIN_VPERMILVARPD256,
28578 IX86_BUILTIN_VPERMILVARPS256,
28579 IX86_BUILTIN_VPERMILPD,
28580 IX86_BUILTIN_VPERMILPS,
28581 IX86_BUILTIN_VPERMILPD256,
28582 IX86_BUILTIN_VPERMILPS256,
28583 IX86_BUILTIN_VPERMIL2PD,
28584 IX86_BUILTIN_VPERMIL2PS,
28585 IX86_BUILTIN_VPERMIL2PD256,
28586 IX86_BUILTIN_VPERMIL2PS256,
28587 IX86_BUILTIN_VPERM2F128PD256,
28588 IX86_BUILTIN_VPERM2F128PS256,
28589 IX86_BUILTIN_VPERM2F128SI256,
28590 IX86_BUILTIN_VBROADCASTSS,
28591 IX86_BUILTIN_VBROADCASTSD256,
28592 IX86_BUILTIN_VBROADCASTSS256,
28593 IX86_BUILTIN_VBROADCASTPD256,
28594 IX86_BUILTIN_VBROADCASTPS256,
28595 IX86_BUILTIN_VINSERTF128PD256,
28596 IX86_BUILTIN_VINSERTF128PS256,
28597 IX86_BUILTIN_VINSERTF128SI256,
28598 IX86_BUILTIN_LOADUPD256,
28599 IX86_BUILTIN_LOADUPS256,
28600 IX86_BUILTIN_STOREUPD256,
28601 IX86_BUILTIN_STOREUPS256,
28602 IX86_BUILTIN_LDDQU256,
28603 IX86_BUILTIN_MOVNTDQ256,
28604 IX86_BUILTIN_MOVNTPD256,
28605 IX86_BUILTIN_MOVNTPS256,
28606 IX86_BUILTIN_LOADDQU256,
28607 IX86_BUILTIN_STOREDQU256,
28608 IX86_BUILTIN_MASKLOADPD,
28609 IX86_BUILTIN_MASKLOADPS,
28610 IX86_BUILTIN_MASKSTOREPD,
28611 IX86_BUILTIN_MASKSTOREPS,
28612 IX86_BUILTIN_MASKLOADPD256,
28613 IX86_BUILTIN_MASKLOADPS256,
28614 IX86_BUILTIN_MASKSTOREPD256,
28615 IX86_BUILTIN_MASKSTOREPS256,
28616 IX86_BUILTIN_MOVSHDUP256,
28617 IX86_BUILTIN_MOVSLDUP256,
28618 IX86_BUILTIN_MOVDDUP256,
28620 IX86_BUILTIN_SQRTPD256,
28621 IX86_BUILTIN_SQRTPS256,
28622 IX86_BUILTIN_SQRTPS_NR256,
28623 IX86_BUILTIN_RSQRTPS256,
28624 IX86_BUILTIN_RSQRTPS_NR256,
28626 IX86_BUILTIN_RCPPS256,
28628 IX86_BUILTIN_ROUNDPD256,
28629 IX86_BUILTIN_ROUNDPS256,
28631 IX86_BUILTIN_FLOORPD256,
28632 IX86_BUILTIN_CEILPD256,
28633 IX86_BUILTIN_TRUNCPD256,
28634 IX86_BUILTIN_RINTPD256,
28635 IX86_BUILTIN_ROUNDPD_AZ256,
28637 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28638 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28639 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28641 IX86_BUILTIN_FLOORPS256,
28642 IX86_BUILTIN_CEILPS256,
28643 IX86_BUILTIN_TRUNCPS256,
28644 IX86_BUILTIN_RINTPS256,
28645 IX86_BUILTIN_ROUNDPS_AZ256,
28647 IX86_BUILTIN_FLOORPS_SFIX256,
28648 IX86_BUILTIN_CEILPS_SFIX256,
28649 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28651 IX86_BUILTIN_UNPCKHPD256,
28652 IX86_BUILTIN_UNPCKLPD256,
28653 IX86_BUILTIN_UNPCKHPS256,
28654 IX86_BUILTIN_UNPCKLPS256,
28656 IX86_BUILTIN_SI256_SI,
28657 IX86_BUILTIN_PS256_PS,
28658 IX86_BUILTIN_PD256_PD,
28659 IX86_BUILTIN_SI_SI256,
28660 IX86_BUILTIN_PS_PS256,
28661 IX86_BUILTIN_PD_PD256,
28663 IX86_BUILTIN_VTESTZPD,
28664 IX86_BUILTIN_VTESTCPD,
28665 IX86_BUILTIN_VTESTNZCPD,
28666 IX86_BUILTIN_VTESTZPS,
28667 IX86_BUILTIN_VTESTCPS,
28668 IX86_BUILTIN_VTESTNZCPS,
28669 IX86_BUILTIN_VTESTZPD256,
28670 IX86_BUILTIN_VTESTCPD256,
28671 IX86_BUILTIN_VTESTNZCPD256,
28672 IX86_BUILTIN_VTESTZPS256,
28673 IX86_BUILTIN_VTESTCPS256,
28674 IX86_BUILTIN_VTESTNZCPS256,
28675 IX86_BUILTIN_PTESTZ256,
28676 IX86_BUILTIN_PTESTC256,
28677 IX86_BUILTIN_PTESTNZC256,
28679 IX86_BUILTIN_MOVMSKPD256,
28680 IX86_BUILTIN_MOVMSKPS256,
28683 IX86_BUILTIN_MPSADBW256,
28684 IX86_BUILTIN_PABSB256,
28685 IX86_BUILTIN_PABSW256,
28686 IX86_BUILTIN_PABSD256,
28687 IX86_BUILTIN_PACKSSDW256,
28688 IX86_BUILTIN_PACKSSWB256,
28689 IX86_BUILTIN_PACKUSDW256,
28690 IX86_BUILTIN_PACKUSWB256,
28691 IX86_BUILTIN_PADDB256,
28692 IX86_BUILTIN_PADDW256,
28693 IX86_BUILTIN_PADDD256,
28694 IX86_BUILTIN_PADDQ256,
28695 IX86_BUILTIN_PADDSB256,
28696 IX86_BUILTIN_PADDSW256,
28697 IX86_BUILTIN_PADDUSB256,
28698 IX86_BUILTIN_PADDUSW256,
28699 IX86_BUILTIN_PALIGNR256,
28700 IX86_BUILTIN_AND256I,
28701 IX86_BUILTIN_ANDNOT256I,
28702 IX86_BUILTIN_PAVGB256,
28703 IX86_BUILTIN_PAVGW256,
28704 IX86_BUILTIN_PBLENDVB256,
28705 IX86_BUILTIN_PBLENDVW256,
28706 IX86_BUILTIN_PCMPEQB256,
28707 IX86_BUILTIN_PCMPEQW256,
28708 IX86_BUILTIN_PCMPEQD256,
28709 IX86_BUILTIN_PCMPEQQ256,
28710 IX86_BUILTIN_PCMPGTB256,
28711 IX86_BUILTIN_PCMPGTW256,
28712 IX86_BUILTIN_PCMPGTD256,
28713 IX86_BUILTIN_PCMPGTQ256,
28714 IX86_BUILTIN_PHADDW256,
28715 IX86_BUILTIN_PHADDD256,
28716 IX86_BUILTIN_PHADDSW256,
28717 IX86_BUILTIN_PHSUBW256,
28718 IX86_BUILTIN_PHSUBD256,
28719 IX86_BUILTIN_PHSUBSW256,
28720 IX86_BUILTIN_PMADDUBSW256,
28721 IX86_BUILTIN_PMADDWD256,
28722 IX86_BUILTIN_PMAXSB256,
28723 IX86_BUILTIN_PMAXSW256,
28724 IX86_BUILTIN_PMAXSD256,
28725 IX86_BUILTIN_PMAXUB256,
28726 IX86_BUILTIN_PMAXUW256,
28727 IX86_BUILTIN_PMAXUD256,
28728 IX86_BUILTIN_PMINSB256,
28729 IX86_BUILTIN_PMINSW256,
28730 IX86_BUILTIN_PMINSD256,
28731 IX86_BUILTIN_PMINUB256,
28732 IX86_BUILTIN_PMINUW256,
28733 IX86_BUILTIN_PMINUD256,
28734 IX86_BUILTIN_PMOVMSKB256,
28735 IX86_BUILTIN_PMOVSXBW256,
28736 IX86_BUILTIN_PMOVSXBD256,
28737 IX86_BUILTIN_PMOVSXBQ256,
28738 IX86_BUILTIN_PMOVSXWD256,
28739 IX86_BUILTIN_PMOVSXWQ256,
28740 IX86_BUILTIN_PMOVSXDQ256,
28741 IX86_BUILTIN_PMOVZXBW256,
28742 IX86_BUILTIN_PMOVZXBD256,
28743 IX86_BUILTIN_PMOVZXBQ256,
28744 IX86_BUILTIN_PMOVZXWD256,
28745 IX86_BUILTIN_PMOVZXWQ256,
28746 IX86_BUILTIN_PMOVZXDQ256,
28747 IX86_BUILTIN_PMULDQ256,
28748 IX86_BUILTIN_PMULHRSW256,
28749 IX86_BUILTIN_PMULHUW256,
28750 IX86_BUILTIN_PMULHW256,
28751 IX86_BUILTIN_PMULLW256,
28752 IX86_BUILTIN_PMULLD256,
28753 IX86_BUILTIN_PMULUDQ256,
28754 IX86_BUILTIN_POR256,
28755 IX86_BUILTIN_PSADBW256,
28756 IX86_BUILTIN_PSHUFB256,
28757 IX86_BUILTIN_PSHUFD256,
28758 IX86_BUILTIN_PSHUFHW256,
28759 IX86_BUILTIN_PSHUFLW256,
28760 IX86_BUILTIN_PSIGNB256,
28761 IX86_BUILTIN_PSIGNW256,
28762 IX86_BUILTIN_PSIGND256,
28763 IX86_BUILTIN_PSLLDQI256,
28764 IX86_BUILTIN_PSLLWI256,
28765 IX86_BUILTIN_PSLLW256,
28766 IX86_BUILTIN_PSLLDI256,
28767 IX86_BUILTIN_PSLLD256,
28768 IX86_BUILTIN_PSLLQI256,
28769 IX86_BUILTIN_PSLLQ256,
28770 IX86_BUILTIN_PSRAWI256,
28771 IX86_BUILTIN_PSRAW256,
28772 IX86_BUILTIN_PSRADI256,
28773 IX86_BUILTIN_PSRAD256,
28774 IX86_BUILTIN_PSRLDQI256,
28775 IX86_BUILTIN_PSRLWI256,
28776 IX86_BUILTIN_PSRLW256,
28777 IX86_BUILTIN_PSRLDI256,
28778 IX86_BUILTIN_PSRLD256,
28779 IX86_BUILTIN_PSRLQI256,
28780 IX86_BUILTIN_PSRLQ256,
28781 IX86_BUILTIN_PSUBB256,
28782 IX86_BUILTIN_PSUBW256,
28783 IX86_BUILTIN_PSUBD256,
28784 IX86_BUILTIN_PSUBQ256,
28785 IX86_BUILTIN_PSUBSB256,
28786 IX86_BUILTIN_PSUBSW256,
28787 IX86_BUILTIN_PSUBUSB256,
28788 IX86_BUILTIN_PSUBUSW256,
28789 IX86_BUILTIN_PUNPCKHBW256,
28790 IX86_BUILTIN_PUNPCKHWD256,
28791 IX86_BUILTIN_PUNPCKHDQ256,
28792 IX86_BUILTIN_PUNPCKHQDQ256,
28793 IX86_BUILTIN_PUNPCKLBW256,
28794 IX86_BUILTIN_PUNPCKLWD256,
28795 IX86_BUILTIN_PUNPCKLDQ256,
28796 IX86_BUILTIN_PUNPCKLQDQ256,
28797 IX86_BUILTIN_PXOR256,
28798 IX86_BUILTIN_MOVNTDQA256,
28799 IX86_BUILTIN_VBROADCASTSS_PS,
28800 IX86_BUILTIN_VBROADCASTSS_PS256,
28801 IX86_BUILTIN_VBROADCASTSD_PD256,
28802 IX86_BUILTIN_VBROADCASTSI256,
28803 IX86_BUILTIN_PBLENDD256,
28804 IX86_BUILTIN_PBLENDD128,
28805 IX86_BUILTIN_PBROADCASTB256,
28806 IX86_BUILTIN_PBROADCASTW256,
28807 IX86_BUILTIN_PBROADCASTD256,
28808 IX86_BUILTIN_PBROADCASTQ256,
28809 IX86_BUILTIN_PBROADCASTB128,
28810 IX86_BUILTIN_PBROADCASTW128,
28811 IX86_BUILTIN_PBROADCASTD128,
28812 IX86_BUILTIN_PBROADCASTQ128,
28813 IX86_BUILTIN_VPERMVARSI256,
28814 IX86_BUILTIN_VPERMDF256,
28815 IX86_BUILTIN_VPERMVARSF256,
28816 IX86_BUILTIN_VPERMDI256,
28817 IX86_BUILTIN_VPERMTI256,
28818 IX86_BUILTIN_VEXTRACT128I256,
28819 IX86_BUILTIN_VINSERT128I256,
28820 IX86_BUILTIN_MASKLOADD,
28821 IX86_BUILTIN_MASKLOADQ,
28822 IX86_BUILTIN_MASKLOADD256,
28823 IX86_BUILTIN_MASKLOADQ256,
28824 IX86_BUILTIN_MASKSTORED,
28825 IX86_BUILTIN_MASKSTOREQ,
28826 IX86_BUILTIN_MASKSTORED256,
28827 IX86_BUILTIN_MASKSTOREQ256,
28828 IX86_BUILTIN_PSLLVV4DI,
28829 IX86_BUILTIN_PSLLVV2DI,
28830 IX86_BUILTIN_PSLLVV8SI,
28831 IX86_BUILTIN_PSLLVV4SI,
28832 IX86_BUILTIN_PSRAVV8SI,
28833 IX86_BUILTIN_PSRAVV4SI,
28834 IX86_BUILTIN_PSRLVV4DI,
28835 IX86_BUILTIN_PSRLVV2DI,
28836 IX86_BUILTIN_PSRLVV8SI,
28837 IX86_BUILTIN_PSRLVV4SI,
28839 IX86_BUILTIN_GATHERSIV2DF,
28840 IX86_BUILTIN_GATHERSIV4DF,
28841 IX86_BUILTIN_GATHERDIV2DF,
28842 IX86_BUILTIN_GATHERDIV4DF,
28843 IX86_BUILTIN_GATHERSIV4SF,
28844 IX86_BUILTIN_GATHERSIV8SF,
28845 IX86_BUILTIN_GATHERDIV4SF,
28846 IX86_BUILTIN_GATHERDIV8SF,
28847 IX86_BUILTIN_GATHERSIV2DI,
28848 IX86_BUILTIN_GATHERSIV4DI,
28849 IX86_BUILTIN_GATHERDIV2DI,
28850 IX86_BUILTIN_GATHERDIV4DI,
28851 IX86_BUILTIN_GATHERSIV4SI,
28852 IX86_BUILTIN_GATHERSIV8SI,
28853 IX86_BUILTIN_GATHERDIV4SI,
28854 IX86_BUILTIN_GATHERDIV8SI,
28857 IX86_BUILTIN_SI512_SI256,
28858 IX86_BUILTIN_PD512_PD256,
28859 IX86_BUILTIN_PS512_PS256,
28860 IX86_BUILTIN_SI512_SI,
28861 IX86_BUILTIN_PD512_PD,
28862 IX86_BUILTIN_PS512_PS,
28863 IX86_BUILTIN_ADDPD512,
28864 IX86_BUILTIN_ADDPS512,
28865 IX86_BUILTIN_ADDSD_ROUND,
28866 IX86_BUILTIN_ADDSS_ROUND,
28867 IX86_BUILTIN_ALIGND512,
28868 IX86_BUILTIN_ALIGNQ512,
28869 IX86_BUILTIN_BLENDMD512,
28870 IX86_BUILTIN_BLENDMPD512,
28871 IX86_BUILTIN_BLENDMPS512,
28872 IX86_BUILTIN_BLENDMQ512,
28873 IX86_BUILTIN_BROADCASTF32X4_512,
28874 IX86_BUILTIN_BROADCASTF64X4_512,
28875 IX86_BUILTIN_BROADCASTI32X4_512,
28876 IX86_BUILTIN_BROADCASTI64X4_512,
28877 IX86_BUILTIN_BROADCASTSD512,
28878 IX86_BUILTIN_BROADCASTSS512,
28879 IX86_BUILTIN_CMPD512,
28880 IX86_BUILTIN_CMPPD512,
28881 IX86_BUILTIN_CMPPS512,
28882 IX86_BUILTIN_CMPQ512,
28883 IX86_BUILTIN_CMPSD_MASK,
28884 IX86_BUILTIN_CMPSS_MASK,
28885 IX86_BUILTIN_COMIDF,
28886 IX86_BUILTIN_COMISF,
28887 IX86_BUILTIN_COMPRESSPD512,
28888 IX86_BUILTIN_COMPRESSPDSTORE512,
28889 IX86_BUILTIN_COMPRESSPS512,
28890 IX86_BUILTIN_COMPRESSPSSTORE512,
28891 IX86_BUILTIN_CVTDQ2PD512,
28892 IX86_BUILTIN_CVTDQ2PS512,
28893 IX86_BUILTIN_CVTPD2DQ512,
28894 IX86_BUILTIN_CVTPD2PS512,
28895 IX86_BUILTIN_CVTPD2UDQ512,
28896 IX86_BUILTIN_CVTPH2PS512,
28897 IX86_BUILTIN_CVTPS2DQ512,
28898 IX86_BUILTIN_CVTPS2PD512,
28899 IX86_BUILTIN_CVTPS2PH512,
28900 IX86_BUILTIN_CVTPS2UDQ512,
28901 IX86_BUILTIN_CVTSD2SS_ROUND,
28902 IX86_BUILTIN_CVTSI2SD64,
28903 IX86_BUILTIN_CVTSI2SS32,
28904 IX86_BUILTIN_CVTSI2SS64,
28905 IX86_BUILTIN_CVTSS2SD_ROUND,
28906 IX86_BUILTIN_CVTTPD2DQ512,
28907 IX86_BUILTIN_CVTTPD2UDQ512,
28908 IX86_BUILTIN_CVTTPS2DQ512,
28909 IX86_BUILTIN_CVTTPS2UDQ512,
28910 IX86_BUILTIN_CVTUDQ2PD512,
28911 IX86_BUILTIN_CVTUDQ2PS512,
28912 IX86_BUILTIN_CVTUSI2SD32,
28913 IX86_BUILTIN_CVTUSI2SD64,
28914 IX86_BUILTIN_CVTUSI2SS32,
28915 IX86_BUILTIN_CVTUSI2SS64,
28916 IX86_BUILTIN_DIVPD512,
28917 IX86_BUILTIN_DIVPS512,
28918 IX86_BUILTIN_DIVSD_ROUND,
28919 IX86_BUILTIN_DIVSS_ROUND,
28920 IX86_BUILTIN_EXPANDPD512,
28921 IX86_BUILTIN_EXPANDPD512Z,
28922 IX86_BUILTIN_EXPANDPDLOAD512,
28923 IX86_BUILTIN_EXPANDPDLOAD512Z,
28924 IX86_BUILTIN_EXPANDPS512,
28925 IX86_BUILTIN_EXPANDPS512Z,
28926 IX86_BUILTIN_EXPANDPSLOAD512,
28927 IX86_BUILTIN_EXPANDPSLOAD512Z,
28928 IX86_BUILTIN_EXTRACTF32X4,
28929 IX86_BUILTIN_EXTRACTF64X4,
28930 IX86_BUILTIN_EXTRACTI32X4,
28931 IX86_BUILTIN_EXTRACTI64X4,
28932 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28933 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28934 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28935 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28936 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28937 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28938 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28939 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28940 IX86_BUILTIN_GETEXPPD512,
28941 IX86_BUILTIN_GETEXPPS512,
28942 IX86_BUILTIN_GETEXPSD128,
28943 IX86_BUILTIN_GETEXPSS128,
28944 IX86_BUILTIN_GETMANTPD512,
28945 IX86_BUILTIN_GETMANTPS512,
28946 IX86_BUILTIN_GETMANTSD128,
28947 IX86_BUILTIN_GETMANTSS128,
28948 IX86_BUILTIN_INSERTF32X4,
28949 IX86_BUILTIN_INSERTF64X4,
28950 IX86_BUILTIN_INSERTI32X4,
28951 IX86_BUILTIN_INSERTI64X4,
28952 IX86_BUILTIN_LOADAPD512,
28953 IX86_BUILTIN_LOADAPS512,
28954 IX86_BUILTIN_LOADDQUDI512,
28955 IX86_BUILTIN_LOADDQUSI512,
28956 IX86_BUILTIN_LOADUPD512,
28957 IX86_BUILTIN_LOADUPS512,
28958 IX86_BUILTIN_MAXPD512,
28959 IX86_BUILTIN_MAXPS512,
28960 IX86_BUILTIN_MAXSD_ROUND,
28961 IX86_BUILTIN_MAXSS_ROUND,
28962 IX86_BUILTIN_MINPD512,
28963 IX86_BUILTIN_MINPS512,
28964 IX86_BUILTIN_MINSD_ROUND,
28965 IX86_BUILTIN_MINSS_ROUND,
28966 IX86_BUILTIN_MOVAPD512,
28967 IX86_BUILTIN_MOVAPS512,
28968 IX86_BUILTIN_MOVDDUP512,
28969 IX86_BUILTIN_MOVDQA32LOAD512,
28970 IX86_BUILTIN_MOVDQA32STORE512,
28971 IX86_BUILTIN_MOVDQA32_512,
28972 IX86_BUILTIN_MOVDQA64LOAD512,
28973 IX86_BUILTIN_MOVDQA64STORE512,
28974 IX86_BUILTIN_MOVDQA64_512,
28975 IX86_BUILTIN_MOVNTDQ512,
28976 IX86_BUILTIN_MOVNTDQA512,
28977 IX86_BUILTIN_MOVNTPD512,
28978 IX86_BUILTIN_MOVNTPS512,
28979 IX86_BUILTIN_MOVSHDUP512,
28980 IX86_BUILTIN_MOVSLDUP512,
28981 IX86_BUILTIN_MULPD512,
28982 IX86_BUILTIN_MULPS512,
28983 IX86_BUILTIN_MULSD_ROUND,
28984 IX86_BUILTIN_MULSS_ROUND,
28985 IX86_BUILTIN_PABSD512,
28986 IX86_BUILTIN_PABSQ512,
28987 IX86_BUILTIN_PADDD512,
28988 IX86_BUILTIN_PADDQ512,
28989 IX86_BUILTIN_PANDD512,
28990 IX86_BUILTIN_PANDND512,
28991 IX86_BUILTIN_PANDNQ512,
28992 IX86_BUILTIN_PANDQ512,
28993 IX86_BUILTIN_PBROADCASTD512,
28994 IX86_BUILTIN_PBROADCASTD512_GPR,
28995 IX86_BUILTIN_PBROADCASTMB512,
28996 IX86_BUILTIN_PBROADCASTMW512,
28997 IX86_BUILTIN_PBROADCASTQ512,
28998 IX86_BUILTIN_PBROADCASTQ512_GPR,
28999 IX86_BUILTIN_PCMPEQD512_MASK,
29000 IX86_BUILTIN_PCMPEQQ512_MASK,
29001 IX86_BUILTIN_PCMPGTD512_MASK,
29002 IX86_BUILTIN_PCMPGTQ512_MASK,
29003 IX86_BUILTIN_PCOMPRESSD512,
29004 IX86_BUILTIN_PCOMPRESSDSTORE512,
29005 IX86_BUILTIN_PCOMPRESSQ512,
29006 IX86_BUILTIN_PCOMPRESSQSTORE512,
29007 IX86_BUILTIN_PEXPANDD512,
29008 IX86_BUILTIN_PEXPANDD512Z,
29009 IX86_BUILTIN_PEXPANDDLOAD512,
29010 IX86_BUILTIN_PEXPANDDLOAD512Z,
29011 IX86_BUILTIN_PEXPANDQ512,
29012 IX86_BUILTIN_PEXPANDQ512Z,
29013 IX86_BUILTIN_PEXPANDQLOAD512,
29014 IX86_BUILTIN_PEXPANDQLOAD512Z,
29015 IX86_BUILTIN_PMAXSD512,
29016 IX86_BUILTIN_PMAXSQ512,
29017 IX86_BUILTIN_PMAXUD512,
29018 IX86_BUILTIN_PMAXUQ512,
29019 IX86_BUILTIN_PMINSD512,
29020 IX86_BUILTIN_PMINSQ512,
29021 IX86_BUILTIN_PMINUD512,
29022 IX86_BUILTIN_PMINUQ512,
29023 IX86_BUILTIN_PMOVDB512,
29024 IX86_BUILTIN_PMOVDB512_MEM,
29025 IX86_BUILTIN_PMOVDW512,
29026 IX86_BUILTIN_PMOVDW512_MEM,
29027 IX86_BUILTIN_PMOVQB512,
29028 IX86_BUILTIN_PMOVQB512_MEM,
29029 IX86_BUILTIN_PMOVQD512,
29030 IX86_BUILTIN_PMOVQD512_MEM,
29031 IX86_BUILTIN_PMOVQW512,
29032 IX86_BUILTIN_PMOVQW512_MEM,
29033 IX86_BUILTIN_PMOVSDB512,
29034 IX86_BUILTIN_PMOVSDB512_MEM,
29035 IX86_BUILTIN_PMOVSDW512,
29036 IX86_BUILTIN_PMOVSDW512_MEM,
29037 IX86_BUILTIN_PMOVSQB512,
29038 IX86_BUILTIN_PMOVSQB512_MEM,
29039 IX86_BUILTIN_PMOVSQD512,
29040 IX86_BUILTIN_PMOVSQD512_MEM,
29041 IX86_BUILTIN_PMOVSQW512,
29042 IX86_BUILTIN_PMOVSQW512_MEM,
29043 IX86_BUILTIN_PMOVSXBD512,
29044 IX86_BUILTIN_PMOVSXBQ512,
29045 IX86_BUILTIN_PMOVSXDQ512,
29046 IX86_BUILTIN_PMOVSXWD512,
29047 IX86_BUILTIN_PMOVSXWQ512,
29048 IX86_BUILTIN_PMOVUSDB512,
29049 IX86_BUILTIN_PMOVUSDB512_MEM,
29050 IX86_BUILTIN_PMOVUSDW512,
29051 IX86_BUILTIN_PMOVUSDW512_MEM,
29052 IX86_BUILTIN_PMOVUSQB512,
29053 IX86_BUILTIN_PMOVUSQB512_MEM,
29054 IX86_BUILTIN_PMOVUSQD512,
29055 IX86_BUILTIN_PMOVUSQD512_MEM,
29056 IX86_BUILTIN_PMOVUSQW512,
29057 IX86_BUILTIN_PMOVUSQW512_MEM,
29058 IX86_BUILTIN_PMOVZXBD512,
29059 IX86_BUILTIN_PMOVZXBQ512,
29060 IX86_BUILTIN_PMOVZXDQ512,
29061 IX86_BUILTIN_PMOVZXWD512,
29062 IX86_BUILTIN_PMOVZXWQ512,
29063 IX86_BUILTIN_PMULDQ512,
29064 IX86_BUILTIN_PMULLD512,
29065 IX86_BUILTIN_PMULUDQ512,
29066 IX86_BUILTIN_PORD512,
29067 IX86_BUILTIN_PORQ512,
29068 IX86_BUILTIN_PROLD512,
29069 IX86_BUILTIN_PROLQ512,
29070 IX86_BUILTIN_PROLVD512,
29071 IX86_BUILTIN_PROLVQ512,
29072 IX86_BUILTIN_PRORD512,
29073 IX86_BUILTIN_PRORQ512,
29074 IX86_BUILTIN_PRORVD512,
29075 IX86_BUILTIN_PRORVQ512,
29076 IX86_BUILTIN_PSHUFD512,
29077 IX86_BUILTIN_PSLLD512,
29078 IX86_BUILTIN_PSLLDI512,
29079 IX86_BUILTIN_PSLLQ512,
29080 IX86_BUILTIN_PSLLQI512,
29081 IX86_BUILTIN_PSLLVV16SI,
29082 IX86_BUILTIN_PSLLVV8DI,
29083 IX86_BUILTIN_PSRAD512,
29084 IX86_BUILTIN_PSRADI512,
29085 IX86_BUILTIN_PSRAQ512,
29086 IX86_BUILTIN_PSRAQI512,
29087 IX86_BUILTIN_PSRAVV16SI,
29088 IX86_BUILTIN_PSRAVV8DI,
29089 IX86_BUILTIN_PSRLD512,
29090 IX86_BUILTIN_PSRLDI512,
29091 IX86_BUILTIN_PSRLQ512,
29092 IX86_BUILTIN_PSRLQI512,
29093 IX86_BUILTIN_PSRLVV16SI,
29094 IX86_BUILTIN_PSRLVV8DI,
29095 IX86_BUILTIN_PSUBD512,
29096 IX86_BUILTIN_PSUBQ512,
29097 IX86_BUILTIN_PTESTMD512,
29098 IX86_BUILTIN_PTESTMQ512,
29099 IX86_BUILTIN_PTESTNMD512,
29100 IX86_BUILTIN_PTESTNMQ512,
29101 IX86_BUILTIN_PUNPCKHDQ512,
29102 IX86_BUILTIN_PUNPCKHQDQ512,
29103 IX86_BUILTIN_PUNPCKLDQ512,
29104 IX86_BUILTIN_PUNPCKLQDQ512,
29105 IX86_BUILTIN_PXORD512,
29106 IX86_BUILTIN_PXORQ512,
29107 IX86_BUILTIN_RCP14PD512,
29108 IX86_BUILTIN_RCP14PS512,
29109 IX86_BUILTIN_RCP14SD,
29110 IX86_BUILTIN_RCP14SS,
29111 IX86_BUILTIN_RNDSCALEPD,
29112 IX86_BUILTIN_RNDSCALEPS,
29113 IX86_BUILTIN_RNDSCALESD,
29114 IX86_BUILTIN_RNDSCALESS,
29115 IX86_BUILTIN_RSQRT14PD512,
29116 IX86_BUILTIN_RSQRT14PS512,
29117 IX86_BUILTIN_RSQRT14SD,
29118 IX86_BUILTIN_RSQRT14SS,
29119 IX86_BUILTIN_SCALEFPD512,
29120 IX86_BUILTIN_SCALEFPS512,
29121 IX86_BUILTIN_SCALEFSD,
29122 IX86_BUILTIN_SCALEFSS,
29123 IX86_BUILTIN_SHUFPD512,
29124 IX86_BUILTIN_SHUFPS512,
29125 IX86_BUILTIN_SHUF_F32x4,
29126 IX86_BUILTIN_SHUF_F64x2,
29127 IX86_BUILTIN_SHUF_I32x4,
29128 IX86_BUILTIN_SHUF_I64x2,
29129 IX86_BUILTIN_SQRTPD512,
29130 IX86_BUILTIN_SQRTPD512_MASK,
29131 IX86_BUILTIN_SQRTPS512_MASK,
29132 IX86_BUILTIN_SQRTPS_NR512,
29133 IX86_BUILTIN_SQRTSD_ROUND,
29134 IX86_BUILTIN_SQRTSS_ROUND,
29135 IX86_BUILTIN_STOREAPD512,
29136 IX86_BUILTIN_STOREAPS512,
29137 IX86_BUILTIN_STOREDQUDI512,
29138 IX86_BUILTIN_STOREDQUSI512,
29139 IX86_BUILTIN_STOREUPD512,
29140 IX86_BUILTIN_STOREUPS512,
29141 IX86_BUILTIN_SUBPD512,
29142 IX86_BUILTIN_SUBPS512,
29143 IX86_BUILTIN_SUBSD_ROUND,
29144 IX86_BUILTIN_SUBSS_ROUND,
29145 IX86_BUILTIN_UCMPD512,
29146 IX86_BUILTIN_UCMPQ512,
29147 IX86_BUILTIN_UNPCKHPD512,
29148 IX86_BUILTIN_UNPCKHPS512,
29149 IX86_BUILTIN_UNPCKLPD512,
29150 IX86_BUILTIN_UNPCKLPS512,
29151 IX86_BUILTIN_VCVTSD2SI32,
29152 IX86_BUILTIN_VCVTSD2SI64,
29153 IX86_BUILTIN_VCVTSD2USI32,
29154 IX86_BUILTIN_VCVTSD2USI64,
29155 IX86_BUILTIN_VCVTSS2SI32,
29156 IX86_BUILTIN_VCVTSS2SI64,
29157 IX86_BUILTIN_VCVTSS2USI32,
29158 IX86_BUILTIN_VCVTSS2USI64,
29159 IX86_BUILTIN_VCVTTSD2SI32,
29160 IX86_BUILTIN_VCVTTSD2SI64,
29161 IX86_BUILTIN_VCVTTSD2USI32,
29162 IX86_BUILTIN_VCVTTSD2USI64,
29163 IX86_BUILTIN_VCVTTSS2SI32,
29164 IX86_BUILTIN_VCVTTSS2SI64,
29165 IX86_BUILTIN_VCVTTSS2USI32,
29166 IX86_BUILTIN_VCVTTSS2USI64,
29167 IX86_BUILTIN_VFMADDPD512_MASK,
29168 IX86_BUILTIN_VFMADDPD512_MASK3,
29169 IX86_BUILTIN_VFMADDPD512_MASKZ,
29170 IX86_BUILTIN_VFMADDPS512_MASK,
29171 IX86_BUILTIN_VFMADDPS512_MASK3,
29172 IX86_BUILTIN_VFMADDPS512_MASKZ,
29173 IX86_BUILTIN_VFMADDSD3_ROUND,
29174 IX86_BUILTIN_VFMADDSS3_ROUND,
29175 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29176 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29177 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29178 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29179 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29180 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29181 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29182 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29183 IX86_BUILTIN_VFMSUBPD512_MASK3,
29184 IX86_BUILTIN_VFMSUBPS512_MASK3,
29185 IX86_BUILTIN_VFMSUBSD3_MASK3,
29186 IX86_BUILTIN_VFMSUBSS3_MASK3,
29187 IX86_BUILTIN_VFNMADDPD512_MASK,
29188 IX86_BUILTIN_VFNMADDPS512_MASK,
29189 IX86_BUILTIN_VFNMSUBPD512_MASK,
29190 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29191 IX86_BUILTIN_VFNMSUBPS512_MASK,
29192 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29193 IX86_BUILTIN_VPCLZCNTD512,
29194 IX86_BUILTIN_VPCLZCNTQ512,
29195 IX86_BUILTIN_VPCONFLICTD512,
29196 IX86_BUILTIN_VPCONFLICTQ512,
29197 IX86_BUILTIN_VPERMDF512,
29198 IX86_BUILTIN_VPERMDI512,
29199 IX86_BUILTIN_VPERMI2VARD512,
29200 IX86_BUILTIN_VPERMI2VARPD512,
29201 IX86_BUILTIN_VPERMI2VARPS512,
29202 IX86_BUILTIN_VPERMI2VARQ512,
29203 IX86_BUILTIN_VPERMILPD512,
29204 IX86_BUILTIN_VPERMILPS512,
29205 IX86_BUILTIN_VPERMILVARPD512,
29206 IX86_BUILTIN_VPERMILVARPS512,
29207 IX86_BUILTIN_VPERMT2VARD512,
29208 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29209 IX86_BUILTIN_VPERMT2VARPD512,
29210 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29211 IX86_BUILTIN_VPERMT2VARPS512,
29212 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29213 IX86_BUILTIN_VPERMT2VARQ512,
29214 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29215 IX86_BUILTIN_VPERMVARDF512,
29216 IX86_BUILTIN_VPERMVARDI512,
29217 IX86_BUILTIN_VPERMVARSF512,
29218 IX86_BUILTIN_VPERMVARSI512,
29219 IX86_BUILTIN_VTERNLOGD512_MASK,
29220 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29221 IX86_BUILTIN_VTERNLOGQ512_MASK,
29222 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29224 /* Mask arithmetic operations */
29225 IX86_BUILTIN_KAND16,
29226 IX86_BUILTIN_KANDN16,
29227 IX86_BUILTIN_KNOT16,
29228 IX86_BUILTIN_KOR16,
29229 IX86_BUILTIN_KORTESTC16,
29230 IX86_BUILTIN_KORTESTZ16,
29231 IX86_BUILTIN_KUNPCKBW,
29232 IX86_BUILTIN_KXNOR16,
29233 IX86_BUILTIN_KXOR16,
29234 IX86_BUILTIN_KMOV16,
29237 IX86_BUILTIN_PMOVUSQD256_MEM,
29238 IX86_BUILTIN_PMOVUSQD128_MEM,
29239 IX86_BUILTIN_PMOVSQD256_MEM,
29240 IX86_BUILTIN_PMOVSQD128_MEM,
29241 IX86_BUILTIN_PMOVQD256_MEM,
29242 IX86_BUILTIN_PMOVQD128_MEM,
29243 IX86_BUILTIN_PMOVUSQW256_MEM,
29244 IX86_BUILTIN_PMOVUSQW128_MEM,
29245 IX86_BUILTIN_PMOVSQW256_MEM,
29246 IX86_BUILTIN_PMOVSQW128_MEM,
29247 IX86_BUILTIN_PMOVQW256_MEM,
29248 IX86_BUILTIN_PMOVQW128_MEM,
29249 IX86_BUILTIN_PMOVUSQB256_MEM,
29250 IX86_BUILTIN_PMOVUSQB128_MEM,
29251 IX86_BUILTIN_PMOVSQB256_MEM,
29252 IX86_BUILTIN_PMOVSQB128_MEM,
29253 IX86_BUILTIN_PMOVQB256_MEM,
29254 IX86_BUILTIN_PMOVQB128_MEM,
29255 IX86_BUILTIN_PMOVUSDW256_MEM,
29256 IX86_BUILTIN_PMOVUSDW128_MEM,
29257 IX86_BUILTIN_PMOVSDW256_MEM,
29258 IX86_BUILTIN_PMOVSDW128_MEM,
29259 IX86_BUILTIN_PMOVDW256_MEM,
29260 IX86_BUILTIN_PMOVDW128_MEM,
29261 IX86_BUILTIN_PMOVUSDB256_MEM,
29262 IX86_BUILTIN_PMOVUSDB128_MEM,
29263 IX86_BUILTIN_PMOVSDB256_MEM,
29264 IX86_BUILTIN_PMOVSDB128_MEM,
29265 IX86_BUILTIN_PMOVDB256_MEM,
29266 IX86_BUILTIN_PMOVDB128_MEM,
29267 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29268 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29269 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29270 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29271 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29272 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29273 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29274 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29275 IX86_BUILTIN_LOADAPD256_MASK,
29276 IX86_BUILTIN_LOADAPD128_MASK,
29277 IX86_BUILTIN_LOADAPS256_MASK,
29278 IX86_BUILTIN_LOADAPS128_MASK,
29279 IX86_BUILTIN_STOREAPD256_MASK,
29280 IX86_BUILTIN_STOREAPD128_MASK,
29281 IX86_BUILTIN_STOREAPS256_MASK,
29282 IX86_BUILTIN_STOREAPS128_MASK,
29283 IX86_BUILTIN_LOADUPD256_MASK,
29284 IX86_BUILTIN_LOADUPD128_MASK,
29285 IX86_BUILTIN_LOADUPS256_MASK,
29286 IX86_BUILTIN_LOADUPS128_MASK,
29287 IX86_BUILTIN_STOREUPD256_MASK,
29288 IX86_BUILTIN_STOREUPD128_MASK,
29289 IX86_BUILTIN_STOREUPS256_MASK,
29290 IX86_BUILTIN_STOREUPS128_MASK,
29291 IX86_BUILTIN_LOADDQUDI256_MASK,
29292 IX86_BUILTIN_LOADDQUDI128_MASK,
29293 IX86_BUILTIN_LOADDQUSI256_MASK,
29294 IX86_BUILTIN_LOADDQUSI128_MASK,
29295 IX86_BUILTIN_LOADDQUHI256_MASK,
29296 IX86_BUILTIN_LOADDQUHI128_MASK,
29297 IX86_BUILTIN_LOADDQUQI256_MASK,
29298 IX86_BUILTIN_LOADDQUQI128_MASK,
29299 IX86_BUILTIN_STOREDQUDI256_MASK,
29300 IX86_BUILTIN_STOREDQUDI128_MASK,
29301 IX86_BUILTIN_STOREDQUSI256_MASK,
29302 IX86_BUILTIN_STOREDQUSI128_MASK,
29303 IX86_BUILTIN_STOREDQUHI256_MASK,
29304 IX86_BUILTIN_STOREDQUHI128_MASK,
29305 IX86_BUILTIN_STOREDQUQI256_MASK,
29306 IX86_BUILTIN_STOREDQUQI128_MASK,
29307 IX86_BUILTIN_COMPRESSPDSTORE256,
29308 IX86_BUILTIN_COMPRESSPDSTORE128,
29309 IX86_BUILTIN_COMPRESSPSSTORE256,
29310 IX86_BUILTIN_COMPRESSPSSTORE128,
29311 IX86_BUILTIN_PCOMPRESSQSTORE256,
29312 IX86_BUILTIN_PCOMPRESSQSTORE128,
29313 IX86_BUILTIN_PCOMPRESSDSTORE256,
29314 IX86_BUILTIN_PCOMPRESSDSTORE128,
29315 IX86_BUILTIN_EXPANDPDLOAD256,
29316 IX86_BUILTIN_EXPANDPDLOAD128,
29317 IX86_BUILTIN_EXPANDPSLOAD256,
29318 IX86_BUILTIN_EXPANDPSLOAD128,
29319 IX86_BUILTIN_PEXPANDQLOAD256,
29320 IX86_BUILTIN_PEXPANDQLOAD128,
29321 IX86_BUILTIN_PEXPANDDLOAD256,
29322 IX86_BUILTIN_PEXPANDDLOAD128,
29323 IX86_BUILTIN_EXPANDPDLOAD256Z,
29324 IX86_BUILTIN_EXPANDPDLOAD128Z,
29325 IX86_BUILTIN_EXPANDPSLOAD256Z,
29326 IX86_BUILTIN_EXPANDPSLOAD128Z,
29327 IX86_BUILTIN_PEXPANDQLOAD256Z,
29328 IX86_BUILTIN_PEXPANDQLOAD128Z,
29329 IX86_BUILTIN_PEXPANDDLOAD256Z,
29330 IX86_BUILTIN_PEXPANDDLOAD128Z,
29331 IX86_BUILTIN_PALIGNR256_MASK,
29332 IX86_BUILTIN_PALIGNR128_MASK,
29333 IX86_BUILTIN_MOVDQA64_256_MASK,
29334 IX86_BUILTIN_MOVDQA64_128_MASK,
29335 IX86_BUILTIN_MOVDQA32_256_MASK,
29336 IX86_BUILTIN_MOVDQA32_128_MASK,
29337 IX86_BUILTIN_MOVAPD256_MASK,
29338 IX86_BUILTIN_MOVAPD128_MASK,
29339 IX86_BUILTIN_MOVAPS256_MASK,
29340 IX86_BUILTIN_MOVAPS128_MASK,
29341 IX86_BUILTIN_MOVDQUHI256_MASK,
29342 IX86_BUILTIN_MOVDQUHI128_MASK,
29343 IX86_BUILTIN_MOVDQUQI256_MASK,
29344 IX86_BUILTIN_MOVDQUQI128_MASK,
29345 IX86_BUILTIN_MINPS128_MASK,
29346 IX86_BUILTIN_MAXPS128_MASK,
29347 IX86_BUILTIN_MINPD128_MASK,
29348 IX86_BUILTIN_MAXPD128_MASK,
29349 IX86_BUILTIN_MAXPD256_MASK,
29350 IX86_BUILTIN_MAXPS256_MASK,
29351 IX86_BUILTIN_MINPD256_MASK,
29352 IX86_BUILTIN_MINPS256_MASK,
29353 IX86_BUILTIN_MULPS128_MASK,
29354 IX86_BUILTIN_DIVPS128_MASK,
29355 IX86_BUILTIN_MULPD128_MASK,
29356 IX86_BUILTIN_DIVPD128_MASK,
29357 IX86_BUILTIN_DIVPD256_MASK,
29358 IX86_BUILTIN_DIVPS256_MASK,
29359 IX86_BUILTIN_MULPD256_MASK,
29360 IX86_BUILTIN_MULPS256_MASK,
29361 IX86_BUILTIN_ADDPD128_MASK,
29362 IX86_BUILTIN_ADDPD256_MASK,
29363 IX86_BUILTIN_ADDPS128_MASK,
29364 IX86_BUILTIN_ADDPS256_MASK,
29365 IX86_BUILTIN_SUBPD128_MASK,
29366 IX86_BUILTIN_SUBPD256_MASK,
29367 IX86_BUILTIN_SUBPS128_MASK,
29368 IX86_BUILTIN_SUBPS256_MASK,
29369 IX86_BUILTIN_XORPD256_MASK,
29370 IX86_BUILTIN_XORPD128_MASK,
29371 IX86_BUILTIN_XORPS256_MASK,
29372 IX86_BUILTIN_XORPS128_MASK,
29373 IX86_BUILTIN_ORPD256_MASK,
29374 IX86_BUILTIN_ORPD128_MASK,
29375 IX86_BUILTIN_ORPS256_MASK,
29376 IX86_BUILTIN_ORPS128_MASK,
29377 IX86_BUILTIN_BROADCASTF32x2_256,
29378 IX86_BUILTIN_BROADCASTI32x2_256,
29379 IX86_BUILTIN_BROADCASTI32x2_128,
29380 IX86_BUILTIN_BROADCASTF64X2_256,
29381 IX86_BUILTIN_BROADCASTI64X2_256,
29382 IX86_BUILTIN_BROADCASTF32X4_256,
29383 IX86_BUILTIN_BROADCASTI32X4_256,
29384 IX86_BUILTIN_EXTRACTF32X4_256,
29385 IX86_BUILTIN_EXTRACTI32X4_256,
29386 IX86_BUILTIN_DBPSADBW256,
29387 IX86_BUILTIN_DBPSADBW128,
29388 IX86_BUILTIN_CVTTPD2QQ256,
29389 IX86_BUILTIN_CVTTPD2QQ128,
29390 IX86_BUILTIN_CVTTPD2UQQ256,
29391 IX86_BUILTIN_CVTTPD2UQQ128,
29392 IX86_BUILTIN_CVTPD2QQ256,
29393 IX86_BUILTIN_CVTPD2QQ128,
29394 IX86_BUILTIN_CVTPD2UQQ256,
29395 IX86_BUILTIN_CVTPD2UQQ128,
29396 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29397 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29398 IX86_BUILTIN_CVTTPS2QQ256,
29399 IX86_BUILTIN_CVTTPS2QQ128,
29400 IX86_BUILTIN_CVTTPS2UQQ256,
29401 IX86_BUILTIN_CVTTPS2UQQ128,
29402 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29403 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29404 IX86_BUILTIN_CVTTPS2UDQ256,
29405 IX86_BUILTIN_CVTTPS2UDQ128,
29406 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29407 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29408 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29409 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29410 IX86_BUILTIN_CVTPD2DQ256_MASK,
29411 IX86_BUILTIN_CVTPD2DQ128_MASK,
29412 IX86_BUILTIN_CVTDQ2PD256_MASK,
29413 IX86_BUILTIN_CVTDQ2PD128_MASK,
29414 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29415 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29416 IX86_BUILTIN_CVTDQ2PS256_MASK,
29417 IX86_BUILTIN_CVTDQ2PS128_MASK,
29418 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29419 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29420 IX86_BUILTIN_CVTPS2PD256_MASK,
29421 IX86_BUILTIN_CVTPS2PD128_MASK,
29422 IX86_BUILTIN_PBROADCASTB256_MASK,
29423 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29424 IX86_BUILTIN_PBROADCASTB128_MASK,
29425 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29426 IX86_BUILTIN_PBROADCASTW256_MASK,
29427 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29428 IX86_BUILTIN_PBROADCASTW128_MASK,
29429 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29430 IX86_BUILTIN_PBROADCASTD256_MASK,
29431 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29432 IX86_BUILTIN_PBROADCASTD128_MASK,
29433 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29434 IX86_BUILTIN_PBROADCASTQ256_MASK,
29435 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29436 IX86_BUILTIN_PBROADCASTQ128_MASK,
29437 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29438 IX86_BUILTIN_BROADCASTSS256,
29439 IX86_BUILTIN_BROADCASTSS128,
29440 IX86_BUILTIN_BROADCASTSD256,
29441 IX86_BUILTIN_EXTRACTF64X2_256,
29442 IX86_BUILTIN_EXTRACTI64X2_256,
29443 IX86_BUILTIN_INSERTF32X4_256,
29444 IX86_BUILTIN_INSERTI32X4_256,
29445 IX86_BUILTIN_PMOVSXBW256_MASK,
29446 IX86_BUILTIN_PMOVSXBW128_MASK,
29447 IX86_BUILTIN_PMOVSXBD256_MASK,
29448 IX86_BUILTIN_PMOVSXBD128_MASK,
29449 IX86_BUILTIN_PMOVSXBQ256_MASK,
29450 IX86_BUILTIN_PMOVSXBQ128_MASK,
29451 IX86_BUILTIN_PMOVSXWD256_MASK,
29452 IX86_BUILTIN_PMOVSXWD128_MASK,
29453 IX86_BUILTIN_PMOVSXWQ256_MASK,
29454 IX86_BUILTIN_PMOVSXWQ128_MASK,
29455 IX86_BUILTIN_PMOVSXDQ256_MASK,
29456 IX86_BUILTIN_PMOVSXDQ128_MASK,
29457 IX86_BUILTIN_PMOVZXBW256_MASK,
29458 IX86_BUILTIN_PMOVZXBW128_MASK,
29459 IX86_BUILTIN_PMOVZXBD256_MASK,
29460 IX86_BUILTIN_PMOVZXBD128_MASK,
29461 IX86_BUILTIN_PMOVZXBQ256_MASK,
29462 IX86_BUILTIN_PMOVZXBQ128_MASK,
29463 IX86_BUILTIN_PMOVZXWD256_MASK,
29464 IX86_BUILTIN_PMOVZXWD128_MASK,
29465 IX86_BUILTIN_PMOVZXWQ256_MASK,
29466 IX86_BUILTIN_PMOVZXWQ128_MASK,
29467 IX86_BUILTIN_PMOVZXDQ256_MASK,
29468 IX86_BUILTIN_PMOVZXDQ128_MASK,
29469 IX86_BUILTIN_REDUCEPD256_MASK,
29470 IX86_BUILTIN_REDUCEPD128_MASK,
29471 IX86_BUILTIN_REDUCEPS256_MASK,
29472 IX86_BUILTIN_REDUCEPS128_MASK,
29473 IX86_BUILTIN_REDUCESD_MASK,
29474 IX86_BUILTIN_REDUCESS_MASK,
29475 IX86_BUILTIN_VPERMVARHI256_MASK,
29476 IX86_BUILTIN_VPERMVARHI128_MASK,
29477 IX86_BUILTIN_VPERMT2VARHI256,
29478 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29479 IX86_BUILTIN_VPERMT2VARHI128,
29480 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29481 IX86_BUILTIN_VPERMI2VARHI256,
29482 IX86_BUILTIN_VPERMI2VARHI128,
29483 IX86_BUILTIN_RCP14PD256,
29484 IX86_BUILTIN_RCP14PD128,
29485 IX86_BUILTIN_RCP14PS256,
29486 IX86_BUILTIN_RCP14PS128,
29487 IX86_BUILTIN_RSQRT14PD256_MASK,
29488 IX86_BUILTIN_RSQRT14PD128_MASK,
29489 IX86_BUILTIN_RSQRT14PS256_MASK,
29490 IX86_BUILTIN_RSQRT14PS128_MASK,
29491 IX86_BUILTIN_SQRTPD256_MASK,
29492 IX86_BUILTIN_SQRTPD128_MASK,
29493 IX86_BUILTIN_SQRTPS256_MASK,
29494 IX86_BUILTIN_SQRTPS128_MASK,
29495 IX86_BUILTIN_PADDB128_MASK,
29496 IX86_BUILTIN_PADDW128_MASK,
29497 IX86_BUILTIN_PADDD128_MASK,
29498 IX86_BUILTIN_PADDQ128_MASK,
29499 IX86_BUILTIN_PSUBB128_MASK,
29500 IX86_BUILTIN_PSUBW128_MASK,
29501 IX86_BUILTIN_PSUBD128_MASK,
29502 IX86_BUILTIN_PSUBQ128_MASK,
29503 IX86_BUILTIN_PADDSB128_MASK,
29504 IX86_BUILTIN_PADDSW128_MASK,
29505 IX86_BUILTIN_PSUBSB128_MASK,
29506 IX86_BUILTIN_PSUBSW128_MASK,
29507 IX86_BUILTIN_PADDUSB128_MASK,
29508 IX86_BUILTIN_PADDUSW128_MASK,
29509 IX86_BUILTIN_PSUBUSB128_MASK,
29510 IX86_BUILTIN_PSUBUSW128_MASK,
29511 IX86_BUILTIN_PADDB256_MASK,
29512 IX86_BUILTIN_PADDW256_MASK,
29513 IX86_BUILTIN_PADDD256_MASK,
29514 IX86_BUILTIN_PADDQ256_MASK,
29515 IX86_BUILTIN_PADDSB256_MASK,
29516 IX86_BUILTIN_PADDSW256_MASK,
29517 IX86_BUILTIN_PADDUSB256_MASK,
29518 IX86_BUILTIN_PADDUSW256_MASK,
29519 IX86_BUILTIN_PSUBB256_MASK,
29520 IX86_BUILTIN_PSUBW256_MASK,
29521 IX86_BUILTIN_PSUBD256_MASK,
29522 IX86_BUILTIN_PSUBQ256_MASK,
29523 IX86_BUILTIN_PSUBSB256_MASK,
29524 IX86_BUILTIN_PSUBSW256_MASK,
29525 IX86_BUILTIN_PSUBUSB256_MASK,
29526 IX86_BUILTIN_PSUBUSW256_MASK,
29527 IX86_BUILTIN_SHUF_F64x2_256,
29528 IX86_BUILTIN_SHUF_I64x2_256,
29529 IX86_BUILTIN_SHUF_I32x4_256,
29530 IX86_BUILTIN_SHUF_F32x4_256,
29531 IX86_BUILTIN_PMOVWB128,
29532 IX86_BUILTIN_PMOVWB256,
29533 IX86_BUILTIN_PMOVSWB128,
29534 IX86_BUILTIN_PMOVSWB256,
29535 IX86_BUILTIN_PMOVUSWB128,
29536 IX86_BUILTIN_PMOVUSWB256,
29537 IX86_BUILTIN_PMOVDB128,
29538 IX86_BUILTIN_PMOVDB256,
29539 IX86_BUILTIN_PMOVSDB128,
29540 IX86_BUILTIN_PMOVSDB256,
29541 IX86_BUILTIN_PMOVUSDB128,
29542 IX86_BUILTIN_PMOVUSDB256,
29543 IX86_BUILTIN_PMOVDW128,
29544 IX86_BUILTIN_PMOVDW256,
29545 IX86_BUILTIN_PMOVSDW128,
29546 IX86_BUILTIN_PMOVSDW256,
29547 IX86_BUILTIN_PMOVUSDW128,
29548 IX86_BUILTIN_PMOVUSDW256,
29549 IX86_BUILTIN_PMOVQB128,
29550 IX86_BUILTIN_PMOVQB256,
29551 IX86_BUILTIN_PMOVSQB128,
29552 IX86_BUILTIN_PMOVSQB256,
29553 IX86_BUILTIN_PMOVUSQB128,
29554 IX86_BUILTIN_PMOVUSQB256,
29555 IX86_BUILTIN_PMOVQW128,
29556 IX86_BUILTIN_PMOVQW256,
29557 IX86_BUILTIN_PMOVSQW128,
29558 IX86_BUILTIN_PMOVSQW256,
29559 IX86_BUILTIN_PMOVUSQW128,
29560 IX86_BUILTIN_PMOVUSQW256,
29561 IX86_BUILTIN_PMOVQD128,
29562 IX86_BUILTIN_PMOVQD256,
29563 IX86_BUILTIN_PMOVSQD128,
29564 IX86_BUILTIN_PMOVSQD256,
29565 IX86_BUILTIN_PMOVUSQD128,
29566 IX86_BUILTIN_PMOVUSQD256,
29567 IX86_BUILTIN_RANGEPD256,
29568 IX86_BUILTIN_RANGEPD128,
29569 IX86_BUILTIN_RANGEPS256,
29570 IX86_BUILTIN_RANGEPS128,
29571 IX86_BUILTIN_GETEXPPS256,
29572 IX86_BUILTIN_GETEXPPD256,
29573 IX86_BUILTIN_GETEXPPS128,
29574 IX86_BUILTIN_GETEXPPD128,
29575 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29576 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29577 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29578 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29579 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29580 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29581 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29582 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29583 IX86_BUILTIN_PABSQ256,
29584 IX86_BUILTIN_PABSQ128,
29585 IX86_BUILTIN_PABSD256_MASK,
29586 IX86_BUILTIN_PABSD128_MASK,
29587 IX86_BUILTIN_PMULHRSW256_MASK,
29588 IX86_BUILTIN_PMULHRSW128_MASK,
29589 IX86_BUILTIN_PMULHUW128_MASK,
29590 IX86_BUILTIN_PMULHUW256_MASK,
29591 IX86_BUILTIN_PMULHW256_MASK,
29592 IX86_BUILTIN_PMULHW128_MASK,
29593 IX86_BUILTIN_PMULLW256_MASK,
29594 IX86_BUILTIN_PMULLW128_MASK,
29595 IX86_BUILTIN_PMULLQ256,
29596 IX86_BUILTIN_PMULLQ128,
29597 IX86_BUILTIN_ANDPD256_MASK,
29598 IX86_BUILTIN_ANDPD128_MASK,
29599 IX86_BUILTIN_ANDPS256_MASK,
29600 IX86_BUILTIN_ANDPS128_MASK,
29601 IX86_BUILTIN_ANDNPD256_MASK,
29602 IX86_BUILTIN_ANDNPD128_MASK,
29603 IX86_BUILTIN_ANDNPS256_MASK,
29604 IX86_BUILTIN_ANDNPS128_MASK,
29605 IX86_BUILTIN_PSLLWI128_MASK,
29606 IX86_BUILTIN_PSLLDI128_MASK,
29607 IX86_BUILTIN_PSLLQI128_MASK,
29608 IX86_BUILTIN_PSLLW128_MASK,
29609 IX86_BUILTIN_PSLLD128_MASK,
29610 IX86_BUILTIN_PSLLQ128_MASK,
29611 IX86_BUILTIN_PSLLWI256_MASK ,
29612 IX86_BUILTIN_PSLLW256_MASK,
29613 IX86_BUILTIN_PSLLDI256_MASK,
29614 IX86_BUILTIN_PSLLD256_MASK,
29615 IX86_BUILTIN_PSLLQI256_MASK,
29616 IX86_BUILTIN_PSLLQ256_MASK,
29617 IX86_BUILTIN_PSRADI128_MASK,
29618 IX86_BUILTIN_PSRAD128_MASK,
29619 IX86_BUILTIN_PSRADI256_MASK,
29620 IX86_BUILTIN_PSRAD256_MASK,
29621 IX86_BUILTIN_PSRAQI128_MASK,
29622 IX86_BUILTIN_PSRAQ128_MASK,
29623 IX86_BUILTIN_PSRAQI256_MASK,
29624 IX86_BUILTIN_PSRAQ256_MASK,
29625 IX86_BUILTIN_PANDD256,
29626 IX86_BUILTIN_PANDD128,
29627 IX86_BUILTIN_PSRLDI128_MASK,
29628 IX86_BUILTIN_PSRLD128_MASK,
29629 IX86_BUILTIN_PSRLDI256_MASK,
29630 IX86_BUILTIN_PSRLD256_MASK,
29631 IX86_BUILTIN_PSRLQI128_MASK,
29632 IX86_BUILTIN_PSRLQ128_MASK,
29633 IX86_BUILTIN_PSRLQI256_MASK,
29634 IX86_BUILTIN_PSRLQ256_MASK,
29635 IX86_BUILTIN_PANDQ256,
29636 IX86_BUILTIN_PANDQ128,
29637 IX86_BUILTIN_PANDND256,
29638 IX86_BUILTIN_PANDND128,
29639 IX86_BUILTIN_PANDNQ256,
29640 IX86_BUILTIN_PANDNQ128,
29641 IX86_BUILTIN_PORD256,
29642 IX86_BUILTIN_PORD128,
29643 IX86_BUILTIN_PORQ256,
29644 IX86_BUILTIN_PORQ128,
29645 IX86_BUILTIN_PXORD256,
29646 IX86_BUILTIN_PXORD128,
29647 IX86_BUILTIN_PXORQ256,
29648 IX86_BUILTIN_PXORQ128,
29649 IX86_BUILTIN_PACKSSWB256_MASK,
29650 IX86_BUILTIN_PACKSSWB128_MASK,
29651 IX86_BUILTIN_PACKUSWB256_MASK,
29652 IX86_BUILTIN_PACKUSWB128_MASK,
29653 IX86_BUILTIN_RNDSCALEPS256,
29654 IX86_BUILTIN_RNDSCALEPD256,
29655 IX86_BUILTIN_RNDSCALEPS128,
29656 IX86_BUILTIN_RNDSCALEPD128,
29657 IX86_BUILTIN_VTERNLOGQ256_MASK,
29658 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29659 IX86_BUILTIN_VTERNLOGD256_MASK,
29660 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29661 IX86_BUILTIN_VTERNLOGQ128_MASK,
29662 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29663 IX86_BUILTIN_VTERNLOGD128_MASK,
29664 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29665 IX86_BUILTIN_SCALEFPD256,
29666 IX86_BUILTIN_SCALEFPS256,
29667 IX86_BUILTIN_SCALEFPD128,
29668 IX86_BUILTIN_SCALEFPS128,
29669 IX86_BUILTIN_VFMADDPD256_MASK,
29670 IX86_BUILTIN_VFMADDPD256_MASK3,
29671 IX86_BUILTIN_VFMADDPD256_MASKZ,
29672 IX86_BUILTIN_VFMADDPD128_MASK,
29673 IX86_BUILTIN_VFMADDPD128_MASK3,
29674 IX86_BUILTIN_VFMADDPD128_MASKZ,
29675 IX86_BUILTIN_VFMADDPS256_MASK,
29676 IX86_BUILTIN_VFMADDPS256_MASK3,
29677 IX86_BUILTIN_VFMADDPS256_MASKZ,
29678 IX86_BUILTIN_VFMADDPS128_MASK,
29679 IX86_BUILTIN_VFMADDPS128_MASK3,
29680 IX86_BUILTIN_VFMADDPS128_MASKZ,
29681 IX86_BUILTIN_VFMSUBPD256_MASK3,
29682 IX86_BUILTIN_VFMSUBPD128_MASK3,
29683 IX86_BUILTIN_VFMSUBPS256_MASK3,
29684 IX86_BUILTIN_VFMSUBPS128_MASK3,
29685 IX86_BUILTIN_VFNMADDPD256_MASK,
29686 IX86_BUILTIN_VFNMADDPD128_MASK,
29687 IX86_BUILTIN_VFNMADDPS256_MASK,
29688 IX86_BUILTIN_VFNMADDPS128_MASK,
29689 IX86_BUILTIN_VFNMSUBPD256_MASK,
29690 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29691 IX86_BUILTIN_VFNMSUBPD128_MASK,
29692 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29693 IX86_BUILTIN_VFNMSUBPS256_MASK,
29694 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29695 IX86_BUILTIN_VFNMSUBPS128_MASK,
29696 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29697 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29698 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29699 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29700 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29701 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29702 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29703 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29704 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29705 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29706 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29707 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29708 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29709 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29710 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29711 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29712 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29713 IX86_BUILTIN_INSERTF64X2_256,
29714 IX86_BUILTIN_INSERTI64X2_256,
29715 IX86_BUILTIN_PSRAVV16HI,
29716 IX86_BUILTIN_PSRAVV8HI,
29717 IX86_BUILTIN_PMADDUBSW256_MASK,
29718 IX86_BUILTIN_PMADDUBSW128_MASK,
29719 IX86_BUILTIN_PMADDWD256_MASK,
29720 IX86_BUILTIN_PMADDWD128_MASK,
29721 IX86_BUILTIN_PSRLVV16HI,
29722 IX86_BUILTIN_PSRLVV8HI,
29723 IX86_BUILTIN_CVTPS2DQ256_MASK,
29724 IX86_BUILTIN_CVTPS2DQ128_MASK,
29725 IX86_BUILTIN_CVTPS2UDQ256,
29726 IX86_BUILTIN_CVTPS2UDQ128,
29727 IX86_BUILTIN_CVTPS2QQ256,
29728 IX86_BUILTIN_CVTPS2QQ128,
29729 IX86_BUILTIN_CVTPS2UQQ256,
29730 IX86_BUILTIN_CVTPS2UQQ128,
29731 IX86_BUILTIN_GETMANTPS256,
29732 IX86_BUILTIN_GETMANTPS128,
29733 IX86_BUILTIN_GETMANTPD256,
29734 IX86_BUILTIN_GETMANTPD128,
29735 IX86_BUILTIN_MOVDDUP256_MASK,
29736 IX86_BUILTIN_MOVDDUP128_MASK,
29737 IX86_BUILTIN_MOVSHDUP256_MASK,
29738 IX86_BUILTIN_MOVSHDUP128_MASK,
29739 IX86_BUILTIN_MOVSLDUP256_MASK,
29740 IX86_BUILTIN_MOVSLDUP128_MASK,
29741 IX86_BUILTIN_CVTQQ2PS256,
29742 IX86_BUILTIN_CVTQQ2PS128,
29743 IX86_BUILTIN_CVTUQQ2PS256,
29744 IX86_BUILTIN_CVTUQQ2PS128,
29745 IX86_BUILTIN_CVTQQ2PD256,
29746 IX86_BUILTIN_CVTQQ2PD128,
29747 IX86_BUILTIN_CVTUQQ2PD256,
29748 IX86_BUILTIN_CVTUQQ2PD128,
29749 IX86_BUILTIN_VPERMT2VARQ256,
29750 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29751 IX86_BUILTIN_VPERMT2VARD256,
29752 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29753 IX86_BUILTIN_VPERMI2VARQ256,
29754 IX86_BUILTIN_VPERMI2VARD256,
29755 IX86_BUILTIN_VPERMT2VARPD256,
29756 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29757 IX86_BUILTIN_VPERMT2VARPS256,
29758 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29759 IX86_BUILTIN_VPERMI2VARPD256,
29760 IX86_BUILTIN_VPERMI2VARPS256,
29761 IX86_BUILTIN_VPERMT2VARQ128,
29762 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29763 IX86_BUILTIN_VPERMT2VARD128,
29764 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29765 IX86_BUILTIN_VPERMI2VARQ128,
29766 IX86_BUILTIN_VPERMI2VARD128,
29767 IX86_BUILTIN_VPERMT2VARPD128,
29768 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29769 IX86_BUILTIN_VPERMT2VARPS128,
29770 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29771 IX86_BUILTIN_VPERMI2VARPD128,
29772 IX86_BUILTIN_VPERMI2VARPS128,
29773 IX86_BUILTIN_PSHUFB256_MASK,
29774 IX86_BUILTIN_PSHUFB128_MASK,
29775 IX86_BUILTIN_PSHUFHW256_MASK,
29776 IX86_BUILTIN_PSHUFHW128_MASK,
29777 IX86_BUILTIN_PSHUFLW256_MASK,
29778 IX86_BUILTIN_PSHUFLW128_MASK,
29779 IX86_BUILTIN_PSHUFD256_MASK,
29780 IX86_BUILTIN_PSHUFD128_MASK,
29781 IX86_BUILTIN_SHUFPD256_MASK,
29782 IX86_BUILTIN_SHUFPD128_MASK,
29783 IX86_BUILTIN_SHUFPS256_MASK,
29784 IX86_BUILTIN_SHUFPS128_MASK,
29785 IX86_BUILTIN_PROLVQ256,
29786 IX86_BUILTIN_PROLVQ128,
29787 IX86_BUILTIN_PROLQ256,
29788 IX86_BUILTIN_PROLQ128,
29789 IX86_BUILTIN_PRORVQ256,
29790 IX86_BUILTIN_PRORVQ128,
29791 IX86_BUILTIN_PRORQ256,
29792 IX86_BUILTIN_PRORQ128,
29793 IX86_BUILTIN_PSRAVQ128,
29794 IX86_BUILTIN_PSRAVQ256,
29795 IX86_BUILTIN_PSLLVV4DI_MASK,
29796 IX86_BUILTIN_PSLLVV2DI_MASK,
29797 IX86_BUILTIN_PSLLVV8SI_MASK,
29798 IX86_BUILTIN_PSLLVV4SI_MASK,
29799 IX86_BUILTIN_PSRAVV8SI_MASK,
29800 IX86_BUILTIN_PSRAVV4SI_MASK,
29801 IX86_BUILTIN_PSRLVV4DI_MASK,
29802 IX86_BUILTIN_PSRLVV2DI_MASK,
29803 IX86_BUILTIN_PSRLVV8SI_MASK,
29804 IX86_BUILTIN_PSRLVV4SI_MASK,
29805 IX86_BUILTIN_PSRAWI256_MASK,
29806 IX86_BUILTIN_PSRAW256_MASK,
29807 IX86_BUILTIN_PSRAWI128_MASK,
29808 IX86_BUILTIN_PSRAW128_MASK,
29809 IX86_BUILTIN_PSRLWI256_MASK,
29810 IX86_BUILTIN_PSRLW256_MASK,
29811 IX86_BUILTIN_PSRLWI128_MASK,
29812 IX86_BUILTIN_PSRLW128_MASK,
29813 IX86_BUILTIN_PRORVD256,
29814 IX86_BUILTIN_PROLVD256,
29815 IX86_BUILTIN_PRORD256,
29816 IX86_BUILTIN_PROLD256,
29817 IX86_BUILTIN_PRORVD128,
29818 IX86_BUILTIN_PROLVD128,
29819 IX86_BUILTIN_PRORD128,
29820 IX86_BUILTIN_PROLD128,
29821 IX86_BUILTIN_FPCLASSPD256,
29822 IX86_BUILTIN_FPCLASSPD128,
29823 IX86_BUILTIN_FPCLASSSD,
29824 IX86_BUILTIN_FPCLASSPS256,
29825 IX86_BUILTIN_FPCLASSPS128,
29826 IX86_BUILTIN_FPCLASSSS,
29827 IX86_BUILTIN_CVTB2MASK128,
29828 IX86_BUILTIN_CVTB2MASK256,
29829 IX86_BUILTIN_CVTW2MASK128,
29830 IX86_BUILTIN_CVTW2MASK256,
29831 IX86_BUILTIN_CVTD2MASK128,
29832 IX86_BUILTIN_CVTD2MASK256,
29833 IX86_BUILTIN_CVTQ2MASK128,
29834 IX86_BUILTIN_CVTQ2MASK256,
29835 IX86_BUILTIN_CVTMASK2B128,
29836 IX86_BUILTIN_CVTMASK2B256,
29837 IX86_BUILTIN_CVTMASK2W128,
29838 IX86_BUILTIN_CVTMASK2W256,
29839 IX86_BUILTIN_CVTMASK2D128,
29840 IX86_BUILTIN_CVTMASK2D256,
29841 IX86_BUILTIN_CVTMASK2Q128,
29842 IX86_BUILTIN_CVTMASK2Q256,
29843 IX86_BUILTIN_PCMPEQB128_MASK,
29844 IX86_BUILTIN_PCMPEQB256_MASK,
29845 IX86_BUILTIN_PCMPEQW128_MASK,
29846 IX86_BUILTIN_PCMPEQW256_MASK,
29847 IX86_BUILTIN_PCMPEQD128_MASK,
29848 IX86_BUILTIN_PCMPEQD256_MASK,
29849 IX86_BUILTIN_PCMPEQQ128_MASK,
29850 IX86_BUILTIN_PCMPEQQ256_MASK,
29851 IX86_BUILTIN_PCMPGTB128_MASK,
29852 IX86_BUILTIN_PCMPGTB256_MASK,
29853 IX86_BUILTIN_PCMPGTW128_MASK,
29854 IX86_BUILTIN_PCMPGTW256_MASK,
29855 IX86_BUILTIN_PCMPGTD128_MASK,
29856 IX86_BUILTIN_PCMPGTD256_MASK,
29857 IX86_BUILTIN_PCMPGTQ128_MASK,
29858 IX86_BUILTIN_PCMPGTQ256_MASK,
29859 IX86_BUILTIN_PTESTMB128,
29860 IX86_BUILTIN_PTESTMB256,
29861 IX86_BUILTIN_PTESTMW128,
29862 IX86_BUILTIN_PTESTMW256,
29863 IX86_BUILTIN_PTESTMD128,
29864 IX86_BUILTIN_PTESTMD256,
29865 IX86_BUILTIN_PTESTMQ128,
29866 IX86_BUILTIN_PTESTMQ256,
29867 IX86_BUILTIN_PTESTNMB128,
29868 IX86_BUILTIN_PTESTNMB256,
29869 IX86_BUILTIN_PTESTNMW128,
29870 IX86_BUILTIN_PTESTNMW256,
29871 IX86_BUILTIN_PTESTNMD128,
29872 IX86_BUILTIN_PTESTNMD256,
29873 IX86_BUILTIN_PTESTNMQ128,
29874 IX86_BUILTIN_PTESTNMQ256,
29875 IX86_BUILTIN_PBROADCASTMB128,
29876 IX86_BUILTIN_PBROADCASTMB256,
29877 IX86_BUILTIN_PBROADCASTMW128,
29878 IX86_BUILTIN_PBROADCASTMW256,
29879 IX86_BUILTIN_COMPRESSPD256,
29880 IX86_BUILTIN_COMPRESSPD128,
29881 IX86_BUILTIN_COMPRESSPS256,
29882 IX86_BUILTIN_COMPRESSPS128,
29883 IX86_BUILTIN_PCOMPRESSQ256,
29884 IX86_BUILTIN_PCOMPRESSQ128,
29885 IX86_BUILTIN_PCOMPRESSD256,
29886 IX86_BUILTIN_PCOMPRESSD128,
29887 IX86_BUILTIN_EXPANDPD256,
29888 IX86_BUILTIN_EXPANDPD128,
29889 IX86_BUILTIN_EXPANDPS256,
29890 IX86_BUILTIN_EXPANDPS128,
29891 IX86_BUILTIN_PEXPANDQ256,
29892 IX86_BUILTIN_PEXPANDQ128,
29893 IX86_BUILTIN_PEXPANDD256,
29894 IX86_BUILTIN_PEXPANDD128,
29895 IX86_BUILTIN_EXPANDPD256Z,
29896 IX86_BUILTIN_EXPANDPD128Z,
29897 IX86_BUILTIN_EXPANDPS256Z,
29898 IX86_BUILTIN_EXPANDPS128Z,
29899 IX86_BUILTIN_PEXPANDQ256Z,
29900 IX86_BUILTIN_PEXPANDQ128Z,
29901 IX86_BUILTIN_PEXPANDD256Z,
29902 IX86_BUILTIN_PEXPANDD128Z,
29903 IX86_BUILTIN_PMAXSD256_MASK,
29904 IX86_BUILTIN_PMINSD256_MASK,
29905 IX86_BUILTIN_PMAXUD256_MASK,
29906 IX86_BUILTIN_PMINUD256_MASK,
29907 IX86_BUILTIN_PMAXSD128_MASK,
29908 IX86_BUILTIN_PMINSD128_MASK,
29909 IX86_BUILTIN_PMAXUD128_MASK,
29910 IX86_BUILTIN_PMINUD128_MASK,
29911 IX86_BUILTIN_PMAXSQ256_MASK,
29912 IX86_BUILTIN_PMINSQ256_MASK,
29913 IX86_BUILTIN_PMAXUQ256_MASK,
29914 IX86_BUILTIN_PMINUQ256_MASK,
29915 IX86_BUILTIN_PMAXSQ128_MASK,
29916 IX86_BUILTIN_PMINSQ128_MASK,
29917 IX86_BUILTIN_PMAXUQ128_MASK,
29918 IX86_BUILTIN_PMINUQ128_MASK,
29919 IX86_BUILTIN_PMINSB256_MASK,
29920 IX86_BUILTIN_PMINUB256_MASK,
29921 IX86_BUILTIN_PMAXSB256_MASK,
29922 IX86_BUILTIN_PMAXUB256_MASK,
29923 IX86_BUILTIN_PMINSB128_MASK,
29924 IX86_BUILTIN_PMINUB128_MASK,
29925 IX86_BUILTIN_PMAXSB128_MASK,
29926 IX86_BUILTIN_PMAXUB128_MASK,
29927 IX86_BUILTIN_PMINSW256_MASK,
29928 IX86_BUILTIN_PMINUW256_MASK,
29929 IX86_BUILTIN_PMAXSW256_MASK,
29930 IX86_BUILTIN_PMAXUW256_MASK,
29931 IX86_BUILTIN_PMINSW128_MASK,
29932 IX86_BUILTIN_PMINUW128_MASK,
29933 IX86_BUILTIN_PMAXSW128_MASK,
29934 IX86_BUILTIN_PMAXUW128_MASK,
29935 IX86_BUILTIN_VPCONFLICTQ256,
29936 IX86_BUILTIN_VPCONFLICTD256,
29937 IX86_BUILTIN_VPCLZCNTQ256,
29938 IX86_BUILTIN_VPCLZCNTD256,
29939 IX86_BUILTIN_UNPCKHPD256_MASK,
29940 IX86_BUILTIN_UNPCKHPD128_MASK,
29941 IX86_BUILTIN_UNPCKHPS256_MASK,
29942 IX86_BUILTIN_UNPCKHPS128_MASK,
29943 IX86_BUILTIN_UNPCKLPD256_MASK,
29944 IX86_BUILTIN_UNPCKLPD128_MASK,
29945 IX86_BUILTIN_UNPCKLPS256_MASK,
29946 IX86_BUILTIN_VPCONFLICTQ128,
29947 IX86_BUILTIN_VPCONFLICTD128,
29948 IX86_BUILTIN_VPCLZCNTQ128,
29949 IX86_BUILTIN_VPCLZCNTD128,
29950 IX86_BUILTIN_UNPCKLPS128_MASK,
29951 IX86_BUILTIN_ALIGND256,
29952 IX86_BUILTIN_ALIGNQ256,
29953 IX86_BUILTIN_ALIGND128,
29954 IX86_BUILTIN_ALIGNQ128,
29955 IX86_BUILTIN_CVTPS2PH256_MASK,
29956 IX86_BUILTIN_CVTPS2PH_MASK,
29957 IX86_BUILTIN_CVTPH2PS_MASK,
29958 IX86_BUILTIN_CVTPH2PS256_MASK,
29959 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29960 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29961 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29962 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29963 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29964 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29965 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29966 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29967 IX86_BUILTIN_PUNPCKHBW128_MASK,
29968 IX86_BUILTIN_PUNPCKHBW256_MASK,
29969 IX86_BUILTIN_PUNPCKHWD128_MASK,
29970 IX86_BUILTIN_PUNPCKHWD256_MASK,
29971 IX86_BUILTIN_PUNPCKLBW128_MASK,
29972 IX86_BUILTIN_PUNPCKLBW256_MASK,
29973 IX86_BUILTIN_PUNPCKLWD128_MASK,
29974 IX86_BUILTIN_PUNPCKLWD256_MASK,
29975 IX86_BUILTIN_PSLLVV16HI,
29976 IX86_BUILTIN_PSLLVV8HI,
29977 IX86_BUILTIN_PACKSSDW256_MASK,
29978 IX86_BUILTIN_PACKSSDW128_MASK,
29979 IX86_BUILTIN_PACKUSDW256_MASK,
29980 IX86_BUILTIN_PACKUSDW128_MASK,
29981 IX86_BUILTIN_PAVGB256_MASK,
29982 IX86_BUILTIN_PAVGW256_MASK,
29983 IX86_BUILTIN_PAVGB128_MASK,
29984 IX86_BUILTIN_PAVGW128_MASK,
29985 IX86_BUILTIN_VPERMVARSF256_MASK,
29986 IX86_BUILTIN_VPERMVARDF256_MASK,
29987 IX86_BUILTIN_VPERMDF256_MASK,
29988 IX86_BUILTIN_PABSB256_MASK,
29989 IX86_BUILTIN_PABSB128_MASK,
29990 IX86_BUILTIN_PABSW256_MASK,
29991 IX86_BUILTIN_PABSW128_MASK,
29992 IX86_BUILTIN_VPERMILVARPD_MASK,
29993 IX86_BUILTIN_VPERMILVARPS_MASK,
29994 IX86_BUILTIN_VPERMILVARPD256_MASK,
29995 IX86_BUILTIN_VPERMILVARPS256_MASK,
29996 IX86_BUILTIN_VPERMILPD_MASK,
29997 IX86_BUILTIN_VPERMILPS_MASK,
29998 IX86_BUILTIN_VPERMILPD256_MASK,
29999 IX86_BUILTIN_VPERMILPS256_MASK,
30000 IX86_BUILTIN_BLENDMQ256,
30001 IX86_BUILTIN_BLENDMD256,
30002 IX86_BUILTIN_BLENDMPD256,
30003 IX86_BUILTIN_BLENDMPS256,
30004 IX86_BUILTIN_BLENDMQ128,
30005 IX86_BUILTIN_BLENDMD128,
30006 IX86_BUILTIN_BLENDMPD128,
30007 IX86_BUILTIN_BLENDMPS128,
30008 IX86_BUILTIN_BLENDMW256,
30009 IX86_BUILTIN_BLENDMB256,
30010 IX86_BUILTIN_BLENDMW128,
30011 IX86_BUILTIN_BLENDMB128,
30012 IX86_BUILTIN_PMULLD256_MASK,
30013 IX86_BUILTIN_PMULLD128_MASK,
30014 IX86_BUILTIN_PMULUDQ256_MASK,
30015 IX86_BUILTIN_PMULDQ256_MASK,
30016 IX86_BUILTIN_PMULDQ128_MASK,
30017 IX86_BUILTIN_PMULUDQ128_MASK,
30018 IX86_BUILTIN_CVTPD2PS256_MASK,
30019 IX86_BUILTIN_CVTPD2PS_MASK,
30020 IX86_BUILTIN_VPERMVARSI256_MASK,
30021 IX86_BUILTIN_VPERMVARDI256_MASK,
30022 IX86_BUILTIN_VPERMDI256_MASK,
30023 IX86_BUILTIN_CMPQ256,
30024 IX86_BUILTIN_CMPD256,
30025 IX86_BUILTIN_UCMPQ256,
30026 IX86_BUILTIN_UCMPD256,
30027 IX86_BUILTIN_CMPB256,
30028 IX86_BUILTIN_CMPW256,
30029 IX86_BUILTIN_UCMPB256,
30030 IX86_BUILTIN_UCMPW256,
30031 IX86_BUILTIN_CMPPD256_MASK,
30032 IX86_BUILTIN_CMPPS256_MASK,
30033 IX86_BUILTIN_CMPQ128,
30034 IX86_BUILTIN_CMPD128,
30035 IX86_BUILTIN_UCMPQ128,
30036 IX86_BUILTIN_UCMPD128,
30037 IX86_BUILTIN_CMPB128,
30038 IX86_BUILTIN_CMPW128,
30039 IX86_BUILTIN_UCMPB128,
30040 IX86_BUILTIN_UCMPW128,
30041 IX86_BUILTIN_CMPPD128_MASK,
30042 IX86_BUILTIN_CMPPS128_MASK,
30044 IX86_BUILTIN_GATHER3SIV8SF,
30045 IX86_BUILTIN_GATHER3SIV4SF,
30046 IX86_BUILTIN_GATHER3SIV4DF,
30047 IX86_BUILTIN_GATHER3SIV2DF,
30048 IX86_BUILTIN_GATHER3DIV8SF,
30049 IX86_BUILTIN_GATHER3DIV4SF,
30050 IX86_BUILTIN_GATHER3DIV4DF,
30051 IX86_BUILTIN_GATHER3DIV2DF,
30052 IX86_BUILTIN_GATHER3SIV8SI,
30053 IX86_BUILTIN_GATHER3SIV4SI,
30054 IX86_BUILTIN_GATHER3SIV4DI,
30055 IX86_BUILTIN_GATHER3SIV2DI,
30056 IX86_BUILTIN_GATHER3DIV8SI,
30057 IX86_BUILTIN_GATHER3DIV4SI,
30058 IX86_BUILTIN_GATHER3DIV4DI,
30059 IX86_BUILTIN_GATHER3DIV2DI,
30060 IX86_BUILTIN_SCATTERSIV8SF,
30061 IX86_BUILTIN_SCATTERSIV4SF,
30062 IX86_BUILTIN_SCATTERSIV4DF,
30063 IX86_BUILTIN_SCATTERSIV2DF,
30064 IX86_BUILTIN_SCATTERDIV8SF,
30065 IX86_BUILTIN_SCATTERDIV4SF,
30066 IX86_BUILTIN_SCATTERDIV4DF,
30067 IX86_BUILTIN_SCATTERDIV2DF,
30068 IX86_BUILTIN_SCATTERSIV8SI,
30069 IX86_BUILTIN_SCATTERSIV4SI,
30070 IX86_BUILTIN_SCATTERSIV4DI,
30071 IX86_BUILTIN_SCATTERSIV2DI,
30072 IX86_BUILTIN_SCATTERDIV8SI,
30073 IX86_BUILTIN_SCATTERDIV4SI,
30074 IX86_BUILTIN_SCATTERDIV4DI,
30075 IX86_BUILTIN_SCATTERDIV2DI,
30078 IX86_BUILTIN_RANGESD128,
30079 IX86_BUILTIN_RANGESS128,
30080 IX86_BUILTIN_KUNPCKWD,
30081 IX86_BUILTIN_KUNPCKDQ,
30082 IX86_BUILTIN_BROADCASTF32x2_512,
30083 IX86_BUILTIN_BROADCASTI32x2_512,
30084 IX86_BUILTIN_BROADCASTF64X2_512,
30085 IX86_BUILTIN_BROADCASTI64X2_512,
30086 IX86_BUILTIN_BROADCASTF32X8_512,
30087 IX86_BUILTIN_BROADCASTI32X8_512,
30088 IX86_BUILTIN_EXTRACTF64X2_512,
30089 IX86_BUILTIN_EXTRACTF32X8,
30090 IX86_BUILTIN_EXTRACTI64X2_512,
30091 IX86_BUILTIN_EXTRACTI32X8,
30092 IX86_BUILTIN_REDUCEPD512_MASK,
30093 IX86_BUILTIN_REDUCEPS512_MASK,
30094 IX86_BUILTIN_PMULLQ512,
30095 IX86_BUILTIN_XORPD512,
30096 IX86_BUILTIN_XORPS512,
30097 IX86_BUILTIN_ORPD512,
30098 IX86_BUILTIN_ORPS512,
30099 IX86_BUILTIN_ANDPD512,
30100 IX86_BUILTIN_ANDPS512,
30101 IX86_BUILTIN_ANDNPD512,
30102 IX86_BUILTIN_ANDNPS512,
30103 IX86_BUILTIN_INSERTF32X8,
30104 IX86_BUILTIN_INSERTI32X8,
30105 IX86_BUILTIN_INSERTF64X2_512,
30106 IX86_BUILTIN_INSERTI64X2_512,
30107 IX86_BUILTIN_FPCLASSPD512,
30108 IX86_BUILTIN_FPCLASSPS512,
30109 IX86_BUILTIN_CVTD2MASK512,
30110 IX86_BUILTIN_CVTQ2MASK512,
30111 IX86_BUILTIN_CVTMASK2D512,
30112 IX86_BUILTIN_CVTMASK2Q512,
30113 IX86_BUILTIN_CVTPD2QQ512,
30114 IX86_BUILTIN_CVTPS2QQ512,
30115 IX86_BUILTIN_CVTPD2UQQ512,
30116 IX86_BUILTIN_CVTPS2UQQ512,
30117 IX86_BUILTIN_CVTQQ2PS512,
30118 IX86_BUILTIN_CVTUQQ2PS512,
30119 IX86_BUILTIN_CVTQQ2PD512,
30120 IX86_BUILTIN_CVTUQQ2PD512,
30121 IX86_BUILTIN_CVTTPS2QQ512,
30122 IX86_BUILTIN_CVTTPS2UQQ512,
30123 IX86_BUILTIN_CVTTPD2QQ512,
30124 IX86_BUILTIN_CVTTPD2UQQ512,
30125 IX86_BUILTIN_RANGEPS512,
30126 IX86_BUILTIN_RANGEPD512,
30129 IX86_BUILTIN_PACKUSDW512,
30130 IX86_BUILTIN_PACKSSDW512,
30131 IX86_BUILTIN_LOADDQUHI512_MASK,
30132 IX86_BUILTIN_LOADDQUQI512_MASK,
30133 IX86_BUILTIN_PSLLDQ512,
30134 IX86_BUILTIN_PSRLDQ512,
30135 IX86_BUILTIN_STOREDQUHI512_MASK,
30136 IX86_BUILTIN_STOREDQUQI512_MASK,
30137 IX86_BUILTIN_PALIGNR512,
30138 IX86_BUILTIN_PALIGNR512_MASK,
30139 IX86_BUILTIN_MOVDQUHI512_MASK,
30140 IX86_BUILTIN_MOVDQUQI512_MASK,
30141 IX86_BUILTIN_PSADBW512,
30142 IX86_BUILTIN_DBPSADBW512,
30143 IX86_BUILTIN_PBROADCASTB512,
30144 IX86_BUILTIN_PBROADCASTB512_GPR,
30145 IX86_BUILTIN_PBROADCASTW512,
30146 IX86_BUILTIN_PBROADCASTW512_GPR,
30147 IX86_BUILTIN_PMOVSXBW512_MASK,
30148 IX86_BUILTIN_PMOVZXBW512_MASK,
30149 IX86_BUILTIN_VPERMVARHI512_MASK,
30150 IX86_BUILTIN_VPERMT2VARHI512,
30151 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30152 IX86_BUILTIN_VPERMI2VARHI512,
30153 IX86_BUILTIN_PAVGB512,
30154 IX86_BUILTIN_PAVGW512,
30155 IX86_BUILTIN_PADDB512,
30156 IX86_BUILTIN_PSUBB512,
30157 IX86_BUILTIN_PSUBSB512,
30158 IX86_BUILTIN_PADDSB512,
30159 IX86_BUILTIN_PSUBUSB512,
30160 IX86_BUILTIN_PADDUSB512,
30161 IX86_BUILTIN_PSUBW512,
30162 IX86_BUILTIN_PADDW512,
30163 IX86_BUILTIN_PSUBSW512,
30164 IX86_BUILTIN_PADDSW512,
30165 IX86_BUILTIN_PSUBUSW512,
30166 IX86_BUILTIN_PADDUSW512,
30167 IX86_BUILTIN_PMAXUW512,
30168 IX86_BUILTIN_PMAXSW512,
30169 IX86_BUILTIN_PMINUW512,
30170 IX86_BUILTIN_PMINSW512,
30171 IX86_BUILTIN_PMAXUB512,
30172 IX86_BUILTIN_PMAXSB512,
30173 IX86_BUILTIN_PMINUB512,
30174 IX86_BUILTIN_PMINSB512,
30175 IX86_BUILTIN_PMOVWB512,
30176 IX86_BUILTIN_PMOVSWB512,
30177 IX86_BUILTIN_PMOVUSWB512,
30178 IX86_BUILTIN_PMULHRSW512_MASK,
30179 IX86_BUILTIN_PMULHUW512_MASK,
30180 IX86_BUILTIN_PMULHW512_MASK,
30181 IX86_BUILTIN_PMULLW512_MASK,
30182 IX86_BUILTIN_PSLLWI512_MASK,
30183 IX86_BUILTIN_PSLLW512_MASK,
30184 IX86_BUILTIN_PACKSSWB512,
30185 IX86_BUILTIN_PACKUSWB512,
30186 IX86_BUILTIN_PSRAVV32HI,
30187 IX86_BUILTIN_PMADDUBSW512_MASK,
30188 IX86_BUILTIN_PMADDWD512_MASK,
30189 IX86_BUILTIN_PSRLVV32HI,
30190 IX86_BUILTIN_PUNPCKHBW512,
30191 IX86_BUILTIN_PUNPCKHWD512,
30192 IX86_BUILTIN_PUNPCKLBW512,
30193 IX86_BUILTIN_PUNPCKLWD512,
30194 IX86_BUILTIN_PSHUFB512,
30195 IX86_BUILTIN_PSHUFHW512,
30196 IX86_BUILTIN_PSHUFLW512,
30197 IX86_BUILTIN_PSRAWI512,
30198 IX86_BUILTIN_PSRAW512,
30199 IX86_BUILTIN_PSRLWI512,
30200 IX86_BUILTIN_PSRLW512,
30201 IX86_BUILTIN_CVTB2MASK512,
30202 IX86_BUILTIN_CVTW2MASK512,
30203 IX86_BUILTIN_CVTMASK2B512,
30204 IX86_BUILTIN_CVTMASK2W512,
30205 IX86_BUILTIN_PCMPEQB512_MASK,
30206 IX86_BUILTIN_PCMPEQW512_MASK,
30207 IX86_BUILTIN_PCMPGTB512_MASK,
30208 IX86_BUILTIN_PCMPGTW512_MASK,
30209 IX86_BUILTIN_PTESTMB512,
30210 IX86_BUILTIN_PTESTMW512,
30211 IX86_BUILTIN_PTESTNMB512,
30212 IX86_BUILTIN_PTESTNMW512,
30213 IX86_BUILTIN_PSLLVV32HI,
30214 IX86_BUILTIN_PABSB512,
30215 IX86_BUILTIN_PABSW512,
30216 IX86_BUILTIN_BLENDMW512,
30217 IX86_BUILTIN_BLENDMB512,
30218 IX86_BUILTIN_CMPB512,
30219 IX86_BUILTIN_CMPW512,
30220 IX86_BUILTIN_UCMPB512,
30221 IX86_BUILTIN_UCMPW512,
30223 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30224 where all operands are 32-byte or 64-byte wide respectively. */
30225 IX86_BUILTIN_GATHERALTSIV4DF,
30226 IX86_BUILTIN_GATHERALTDIV8SF,
30227 IX86_BUILTIN_GATHERALTSIV4DI,
30228 IX86_BUILTIN_GATHERALTDIV8SI,
30229 IX86_BUILTIN_GATHER3ALTDIV16SF,
30230 IX86_BUILTIN_GATHER3ALTDIV16SI,
30231 IX86_BUILTIN_GATHER3ALTSIV4DF,
30232 IX86_BUILTIN_GATHER3ALTDIV8SF,
30233 IX86_BUILTIN_GATHER3ALTSIV4DI,
30234 IX86_BUILTIN_GATHER3ALTDIV8SI,
30235 IX86_BUILTIN_GATHER3ALTSIV8DF,
30236 IX86_BUILTIN_GATHER3ALTSIV8DI,
30237 IX86_BUILTIN_GATHER3DIV16SF,
30238 IX86_BUILTIN_GATHER3DIV16SI,
30239 IX86_BUILTIN_GATHER3DIV8DF,
30240 IX86_BUILTIN_GATHER3DIV8DI,
30241 IX86_BUILTIN_GATHER3SIV16SF,
30242 IX86_BUILTIN_GATHER3SIV16SI,
30243 IX86_BUILTIN_GATHER3SIV8DF,
30244 IX86_BUILTIN_GATHER3SIV8DI,
30245 IX86_BUILTIN_SCATTERDIV16SF,
30246 IX86_BUILTIN_SCATTERDIV16SI,
30247 IX86_BUILTIN_SCATTERDIV8DF,
30248 IX86_BUILTIN_SCATTERDIV8DI,
30249 IX86_BUILTIN_SCATTERSIV16SF,
30250 IX86_BUILTIN_SCATTERSIV16SI,
30251 IX86_BUILTIN_SCATTERSIV8DF,
30252 IX86_BUILTIN_SCATTERSIV8DI,
30255 IX86_BUILTIN_GATHERPFQPD,
30256 IX86_BUILTIN_GATHERPFDPS,
30257 IX86_BUILTIN_GATHERPFDPD,
30258 IX86_BUILTIN_GATHERPFQPS,
30259 IX86_BUILTIN_SCATTERPFDPD,
30260 IX86_BUILTIN_SCATTERPFDPS,
30261 IX86_BUILTIN_SCATTERPFQPD,
30262 IX86_BUILTIN_SCATTERPFQPS,
30265 IX86_BUILTIN_EXP2PD_MASK,
30266 IX86_BUILTIN_EXP2PS_MASK,
30267 IX86_BUILTIN_EXP2PS,
30268 IX86_BUILTIN_RCP28PD,
30269 IX86_BUILTIN_RCP28PS,
30270 IX86_BUILTIN_RCP28SD,
30271 IX86_BUILTIN_RCP28SS,
30272 IX86_BUILTIN_RSQRT28PD,
30273 IX86_BUILTIN_RSQRT28PS,
30274 IX86_BUILTIN_RSQRT28SD,
30275 IX86_BUILTIN_RSQRT28SS,
30278 IX86_BUILTIN_VPMADD52LUQ512,
30279 IX86_BUILTIN_VPMADD52HUQ512,
30280 IX86_BUILTIN_VPMADD52LUQ256,
30281 IX86_BUILTIN_VPMADD52HUQ256,
30282 IX86_BUILTIN_VPMADD52LUQ128,
30283 IX86_BUILTIN_VPMADD52HUQ128,
30284 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30285 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30286 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30287 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30288 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30289 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30292 IX86_BUILTIN_VPMULTISHIFTQB512,
30293 IX86_BUILTIN_VPMULTISHIFTQB256,
30294 IX86_BUILTIN_VPMULTISHIFTQB128,
30295 IX86_BUILTIN_VPERMVARQI512_MASK,
30296 IX86_BUILTIN_VPERMT2VARQI512,
30297 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30298 IX86_BUILTIN_VPERMI2VARQI512,
30299 IX86_BUILTIN_VPERMVARQI256_MASK,
30300 IX86_BUILTIN_VPERMVARQI128_MASK,
30301 IX86_BUILTIN_VPERMT2VARQI256,
30302 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30303 IX86_BUILTIN_VPERMT2VARQI128,
30304 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30305 IX86_BUILTIN_VPERMI2VARQI256,
30306 IX86_BUILTIN_VPERMI2VARQI128,
30308 /* SHA builtins. */
30309 IX86_BUILTIN_SHA1MSG1,
30310 IX86_BUILTIN_SHA1MSG2,
30311 IX86_BUILTIN_SHA1NEXTE,
30312 IX86_BUILTIN_SHA1RNDS4,
30313 IX86_BUILTIN_SHA256MSG1,
30314 IX86_BUILTIN_SHA256MSG2,
30315 IX86_BUILTIN_SHA256RNDS2,
30317 /* CLWB instructions. */
30320 /* PCOMMIT instructions. */
30321 IX86_BUILTIN_PCOMMIT,
30323 /* CLFLUSHOPT instructions. */
30324 IX86_BUILTIN_CLFLUSHOPT,
30326 /* TFmode support builtins. */
30328 IX86_BUILTIN_HUGE_VALQ,
30329 IX86_BUILTIN_FABSQ,
30330 IX86_BUILTIN_COPYSIGNQ,
30332 /* Vectorizer support builtins. */
30333 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30334 IX86_BUILTIN_CPYSGNPS,
30335 IX86_BUILTIN_CPYSGNPD,
30336 IX86_BUILTIN_CPYSGNPS256,
30337 IX86_BUILTIN_CPYSGNPS512,
30338 IX86_BUILTIN_CPYSGNPD256,
30339 IX86_BUILTIN_CPYSGNPD512,
30340 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30341 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30344 /* FMA4 instructions. */
30345 IX86_BUILTIN_VFMADDSS,
30346 IX86_BUILTIN_VFMADDSD,
30347 IX86_BUILTIN_VFMADDPS,
30348 IX86_BUILTIN_VFMADDPD,
30349 IX86_BUILTIN_VFMADDPS256,
30350 IX86_BUILTIN_VFMADDPD256,
30351 IX86_BUILTIN_VFMADDSUBPS,
30352 IX86_BUILTIN_VFMADDSUBPD,
30353 IX86_BUILTIN_VFMADDSUBPS256,
30354 IX86_BUILTIN_VFMADDSUBPD256,
30356 /* FMA3 instructions. */
30357 IX86_BUILTIN_VFMADDSS3,
30358 IX86_BUILTIN_VFMADDSD3,
30360 /* XOP instructions. */
30361 IX86_BUILTIN_VPCMOV,
30362 IX86_BUILTIN_VPCMOV_V2DI,
30363 IX86_BUILTIN_VPCMOV_V4SI,
30364 IX86_BUILTIN_VPCMOV_V8HI,
30365 IX86_BUILTIN_VPCMOV_V16QI,
30366 IX86_BUILTIN_VPCMOV_V4SF,
30367 IX86_BUILTIN_VPCMOV_V2DF,
30368 IX86_BUILTIN_VPCMOV256,
30369 IX86_BUILTIN_VPCMOV_V4DI256,
30370 IX86_BUILTIN_VPCMOV_V8SI256,
30371 IX86_BUILTIN_VPCMOV_V16HI256,
30372 IX86_BUILTIN_VPCMOV_V32QI256,
30373 IX86_BUILTIN_VPCMOV_V8SF256,
30374 IX86_BUILTIN_VPCMOV_V4DF256,
30376 IX86_BUILTIN_VPPERM,
30378 IX86_BUILTIN_VPMACSSWW,
30379 IX86_BUILTIN_VPMACSWW,
30380 IX86_BUILTIN_VPMACSSWD,
30381 IX86_BUILTIN_VPMACSWD,
30382 IX86_BUILTIN_VPMACSSDD,
30383 IX86_BUILTIN_VPMACSDD,
30384 IX86_BUILTIN_VPMACSSDQL,
30385 IX86_BUILTIN_VPMACSSDQH,
30386 IX86_BUILTIN_VPMACSDQL,
30387 IX86_BUILTIN_VPMACSDQH,
30388 IX86_BUILTIN_VPMADCSSWD,
30389 IX86_BUILTIN_VPMADCSWD,
30391 IX86_BUILTIN_VPHADDBW,
30392 IX86_BUILTIN_VPHADDBD,
30393 IX86_BUILTIN_VPHADDBQ,
30394 IX86_BUILTIN_VPHADDWD,
30395 IX86_BUILTIN_VPHADDWQ,
30396 IX86_BUILTIN_VPHADDDQ,
30397 IX86_BUILTIN_VPHADDUBW,
30398 IX86_BUILTIN_VPHADDUBD,
30399 IX86_BUILTIN_VPHADDUBQ,
30400 IX86_BUILTIN_VPHADDUWD,
30401 IX86_BUILTIN_VPHADDUWQ,
30402 IX86_BUILTIN_VPHADDUDQ,
30403 IX86_BUILTIN_VPHSUBBW,
30404 IX86_BUILTIN_VPHSUBWD,
30405 IX86_BUILTIN_VPHSUBDQ,
30407 IX86_BUILTIN_VPROTB,
30408 IX86_BUILTIN_VPROTW,
30409 IX86_BUILTIN_VPROTD,
30410 IX86_BUILTIN_VPROTQ,
30411 IX86_BUILTIN_VPROTB_IMM,
30412 IX86_BUILTIN_VPROTW_IMM,
30413 IX86_BUILTIN_VPROTD_IMM,
30414 IX86_BUILTIN_VPROTQ_IMM,
30416 IX86_BUILTIN_VPSHLB,
30417 IX86_BUILTIN_VPSHLW,
30418 IX86_BUILTIN_VPSHLD,
30419 IX86_BUILTIN_VPSHLQ,
30420 IX86_BUILTIN_VPSHAB,
30421 IX86_BUILTIN_VPSHAW,
30422 IX86_BUILTIN_VPSHAD,
30423 IX86_BUILTIN_VPSHAQ,
30425 IX86_BUILTIN_VFRCZSS,
30426 IX86_BUILTIN_VFRCZSD,
30427 IX86_BUILTIN_VFRCZPS,
30428 IX86_BUILTIN_VFRCZPD,
30429 IX86_BUILTIN_VFRCZPS256,
30430 IX86_BUILTIN_VFRCZPD256,
30432 IX86_BUILTIN_VPCOMEQUB,
30433 IX86_BUILTIN_VPCOMNEUB,
30434 IX86_BUILTIN_VPCOMLTUB,
30435 IX86_BUILTIN_VPCOMLEUB,
30436 IX86_BUILTIN_VPCOMGTUB,
30437 IX86_BUILTIN_VPCOMGEUB,
30438 IX86_BUILTIN_VPCOMFALSEUB,
30439 IX86_BUILTIN_VPCOMTRUEUB,
30441 IX86_BUILTIN_VPCOMEQUW,
30442 IX86_BUILTIN_VPCOMNEUW,
30443 IX86_BUILTIN_VPCOMLTUW,
30444 IX86_BUILTIN_VPCOMLEUW,
30445 IX86_BUILTIN_VPCOMGTUW,
30446 IX86_BUILTIN_VPCOMGEUW,
30447 IX86_BUILTIN_VPCOMFALSEUW,
30448 IX86_BUILTIN_VPCOMTRUEUW,
30450 IX86_BUILTIN_VPCOMEQUD,
30451 IX86_BUILTIN_VPCOMNEUD,
30452 IX86_BUILTIN_VPCOMLTUD,
30453 IX86_BUILTIN_VPCOMLEUD,
30454 IX86_BUILTIN_VPCOMGTUD,
30455 IX86_BUILTIN_VPCOMGEUD,
30456 IX86_BUILTIN_VPCOMFALSEUD,
30457 IX86_BUILTIN_VPCOMTRUEUD,
30459 IX86_BUILTIN_VPCOMEQUQ,
30460 IX86_BUILTIN_VPCOMNEUQ,
30461 IX86_BUILTIN_VPCOMLTUQ,
30462 IX86_BUILTIN_VPCOMLEUQ,
30463 IX86_BUILTIN_VPCOMGTUQ,
30464 IX86_BUILTIN_VPCOMGEUQ,
30465 IX86_BUILTIN_VPCOMFALSEUQ,
30466 IX86_BUILTIN_VPCOMTRUEUQ,
30468 IX86_BUILTIN_VPCOMEQB,
30469 IX86_BUILTIN_VPCOMNEB,
30470 IX86_BUILTIN_VPCOMLTB,
30471 IX86_BUILTIN_VPCOMLEB,
30472 IX86_BUILTIN_VPCOMGTB,
30473 IX86_BUILTIN_VPCOMGEB,
30474 IX86_BUILTIN_VPCOMFALSEB,
30475 IX86_BUILTIN_VPCOMTRUEB,
30477 IX86_BUILTIN_VPCOMEQW,
30478 IX86_BUILTIN_VPCOMNEW,
30479 IX86_BUILTIN_VPCOMLTW,
30480 IX86_BUILTIN_VPCOMLEW,
30481 IX86_BUILTIN_VPCOMGTW,
30482 IX86_BUILTIN_VPCOMGEW,
30483 IX86_BUILTIN_VPCOMFALSEW,
30484 IX86_BUILTIN_VPCOMTRUEW,
30486 IX86_BUILTIN_VPCOMEQD,
30487 IX86_BUILTIN_VPCOMNED,
30488 IX86_BUILTIN_VPCOMLTD,
30489 IX86_BUILTIN_VPCOMLED,
30490 IX86_BUILTIN_VPCOMGTD,
30491 IX86_BUILTIN_VPCOMGED,
30492 IX86_BUILTIN_VPCOMFALSED,
30493 IX86_BUILTIN_VPCOMTRUED,
30495 IX86_BUILTIN_VPCOMEQQ,
30496 IX86_BUILTIN_VPCOMNEQ,
30497 IX86_BUILTIN_VPCOMLTQ,
30498 IX86_BUILTIN_VPCOMLEQ,
30499 IX86_BUILTIN_VPCOMGTQ,
30500 IX86_BUILTIN_VPCOMGEQ,
30501 IX86_BUILTIN_VPCOMFALSEQ,
30502 IX86_BUILTIN_VPCOMTRUEQ,
30504 /* LWP instructions. */
30505 IX86_BUILTIN_LLWPCB,
30506 IX86_BUILTIN_SLWPCB,
30507 IX86_BUILTIN_LWPVAL32,
30508 IX86_BUILTIN_LWPVAL64,
30509 IX86_BUILTIN_LWPINS32,
30510 IX86_BUILTIN_LWPINS64,
30515 IX86_BUILTIN_XBEGIN,
30517 IX86_BUILTIN_XABORT,
30518 IX86_BUILTIN_XTEST,
30521 IX86_BUILTIN_BNDMK,
30522 IX86_BUILTIN_BNDSTX,
30523 IX86_BUILTIN_BNDLDX,
30524 IX86_BUILTIN_BNDCL,
30525 IX86_BUILTIN_BNDCU,
30526 IX86_BUILTIN_BNDRET,
30527 IX86_BUILTIN_BNDNARROW,
30528 IX86_BUILTIN_BNDINT,
30529 IX86_BUILTIN_SIZEOF,
30530 IX86_BUILTIN_BNDLOWER,
30531 IX86_BUILTIN_BNDUPPER,
30533 /* BMI instructions. */
30534 IX86_BUILTIN_BEXTR32,
30535 IX86_BUILTIN_BEXTR64,
30538 /* TBM instructions. */
30539 IX86_BUILTIN_BEXTRI32,
30540 IX86_BUILTIN_BEXTRI64,
30542 /* BMI2 instructions. */
30543 IX86_BUILTIN_BZHI32,
30544 IX86_BUILTIN_BZHI64,
30545 IX86_BUILTIN_PDEP32,
30546 IX86_BUILTIN_PDEP64,
30547 IX86_BUILTIN_PEXT32,
30548 IX86_BUILTIN_PEXT64,
30550 /* ADX instructions. */
30551 IX86_BUILTIN_ADDCARRYX32,
30552 IX86_BUILTIN_ADDCARRYX64,
30554 /* SBB instructions. */
30555 IX86_BUILTIN_SBB32,
30556 IX86_BUILTIN_SBB64,
30558 /* FSGSBASE instructions. */
30559 IX86_BUILTIN_RDFSBASE32,
30560 IX86_BUILTIN_RDFSBASE64,
30561 IX86_BUILTIN_RDGSBASE32,
30562 IX86_BUILTIN_RDGSBASE64,
30563 IX86_BUILTIN_WRFSBASE32,
30564 IX86_BUILTIN_WRFSBASE64,
30565 IX86_BUILTIN_WRGSBASE32,
30566 IX86_BUILTIN_WRGSBASE64,
30568 /* RDRND instructions. */
30569 IX86_BUILTIN_RDRAND16_STEP,
30570 IX86_BUILTIN_RDRAND32_STEP,
30571 IX86_BUILTIN_RDRAND64_STEP,
30573 /* RDSEED instructions. */
30574 IX86_BUILTIN_RDSEED16_STEP,
30575 IX86_BUILTIN_RDSEED32_STEP,
30576 IX86_BUILTIN_RDSEED64_STEP,
30578 /* F16C instructions. */
30579 IX86_BUILTIN_CVTPH2PS,
30580 IX86_BUILTIN_CVTPH2PS256,
30581 IX86_BUILTIN_CVTPS2PH,
30582 IX86_BUILTIN_CVTPS2PH256,
30584 /* CFString built-in for darwin */
30585 IX86_BUILTIN_CFSTRING,
30587 /* Builtins to get CPU type and supported features. */
30588 IX86_BUILTIN_CPU_INIT,
30589 IX86_BUILTIN_CPU_IS,
30590 IX86_BUILTIN_CPU_SUPPORTS,
30592 /* Read/write FLAGS register built-ins. */
30593 IX86_BUILTIN_READ_FLAGS,
30594 IX86_BUILTIN_WRITE_FLAGS,
30599 /* Table for the ix86 builtin decls. */
30600 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30602 /* Table of all of the builtin functions that are possible with different ISA's
30603 but are waiting to be built until a function is declared to use that
30605 struct builtin_isa {
30606 const char *name; /* function name */
30607 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30608 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30609 bool const_p; /* true if the declaration is constant */
30610 bool leaf_p; /* true if the declaration has leaf attribute */
30611 bool nothrow_p; /* true if the declaration has nothrow attribute */
30612 bool set_and_not_built_p;
30615 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30617 /* Bits that can still enable any inclusion of a builtin. */
30618 static HOST_WIDE_INT deferred_isa_values = 0;
30620 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30621 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30622 function decl in the ix86_builtins array. Returns the function decl or
30623 NULL_TREE, if the builtin was not added.
30625 If the front end has a special hook for builtin functions, delay adding
30626 builtin functions that aren't in the current ISA until the ISA is changed
30627 with function specific optimization. Doing so, can save about 300K for the
30628 default compiler. When the builtin is expanded, check at that time whether
30631 If the front end doesn't have a special hook, record all builtins, even if
30632 it isn't an instruction set in the current ISA in case the user uses
30633 function specific options for a different ISA, so that we don't get scope
30634 errors if a builtin is added in the middle of a function scope. */
30637 def_builtin (HOST_WIDE_INT mask, const char *name,
30638 enum ix86_builtin_func_type tcode,
30639 enum ix86_builtins code)
30641 tree decl = NULL_TREE;
30643 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30645 ix86_builtins_isa[(int) code].isa = mask;
30647 mask &= ~OPTION_MASK_ISA_64BIT;
30649 || (mask & ix86_isa_flags) != 0
30650 || (lang_hooks.builtin_function
30651 == lang_hooks.builtin_function_ext_scope))
30654 tree type = ix86_get_builtin_func_type (tcode);
30655 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30657 ix86_builtins[(int) code] = decl;
30658 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30662 /* Just a MASK where set_and_not_built_p == true can potentially
30663 include a builtin. */
30664 deferred_isa_values |= mask;
30665 ix86_builtins[(int) code] = NULL_TREE;
30666 ix86_builtins_isa[(int) code].tcode = tcode;
30667 ix86_builtins_isa[(int) code].name = name;
30668 ix86_builtins_isa[(int) code].leaf_p = false;
30669 ix86_builtins_isa[(int) code].nothrow_p = false;
30670 ix86_builtins_isa[(int) code].const_p = false;
30671 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30678 /* Like def_builtin, but also marks the function decl "const". */
30681 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30682 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30684 tree decl = def_builtin (mask, name, tcode, code);
30686 TREE_READONLY (decl) = 1;
30688 ix86_builtins_isa[(int) code].const_p = true;
30693 /* Add any new builtin functions for a given ISA that may not have been
30694 declared. This saves a bit of space compared to adding all of the
30695 declarations to the tree, even if we didn't use them. */
30698 ix86_add_new_builtins (HOST_WIDE_INT isa)
30700 if ((isa & deferred_isa_values) == 0)
30703 /* Bits in ISA value can be removed from potential isa values. */
30704 deferred_isa_values &= ~isa;
30707 tree saved_current_target_pragma = current_target_pragma;
30708 current_target_pragma = NULL_TREE;
30710 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30712 if ((ix86_builtins_isa[i].isa & isa) != 0
30713 && ix86_builtins_isa[i].set_and_not_built_p)
30717 /* Don't define the builtin again. */
30718 ix86_builtins_isa[i].set_and_not_built_p = false;
30720 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30721 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30722 type, i, BUILT_IN_MD, NULL,
30725 ix86_builtins[i] = decl;
30726 if (ix86_builtins_isa[i].const_p)
30727 TREE_READONLY (decl) = 1;
30728 if (ix86_builtins_isa[i].leaf_p)
30729 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30731 if (ix86_builtins_isa[i].nothrow_p)
30732 TREE_NOTHROW (decl) = 1;
30736 current_target_pragma = saved_current_target_pragma;
30739 /* Bits for builtin_description.flag. */
30741 /* Set when we don't support the comparison natively, and should
30742 swap_comparison in order to support it. */
30743 #define BUILTIN_DESC_SWAP_OPERANDS 1
30745 struct builtin_description
30747 const HOST_WIDE_INT mask;
30748 const enum insn_code icode;
30749 const char *const name;
30750 const enum ix86_builtins code;
30751 const enum rtx_code comparison;
30755 static const struct builtin_description bdesc_comi[] =
30757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30783 static const struct builtin_description bdesc_pcmpestr[] =
30786 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30787 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30788 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30789 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30790 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30795 static const struct builtin_description bdesc_pcmpistr[] =
30798 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30799 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30800 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30801 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30802 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30807 /* Special builtins with variable number of arguments. */
30808 static const struct builtin_description bdesc_special_args[] =
30810 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30811 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30812 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30814 /* 80387 (for use internally for atomic compound assignment). */
30815 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30816 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30817 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30818 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30824 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30826 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30827 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30828 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30829 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30830 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30831 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30832 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30833 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30834 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30836 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30837 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30838 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30839 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30840 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30841 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30842 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30843 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30850 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30855 /* SSE or 3DNow!A */
30856 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30857 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30867 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30875 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30881 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30882 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30888 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30889 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30890 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30975 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30976 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30977 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30978 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30979 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30980 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30983 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30984 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30985 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30986 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30987 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30988 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30989 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30990 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30993 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30994 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30995 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30998 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30999 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31000 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31001 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31004 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31040 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31041 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31100 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31103 /* Builtins with variable number of arguments. */
31104 static const struct builtin_description bdesc_args[] =
31106 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31107 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31108 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31109 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31110 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31111 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31179 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31201 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31202 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31203 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31204 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31205 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31206 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31211 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31213 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31217 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31220 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31224 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31225 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31226 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31254 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31255 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31259 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31261 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31262 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31264 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31269 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31270 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31274 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31276 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31282 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31283 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31285 /* SSE MMX or 3Dnow!A */
31286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31288 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31319 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31320 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31361 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31366 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31369 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31370 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31372 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31376 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31377 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31378 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31380 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31392 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31395 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31397 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31411 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31415 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31420 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31428 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31432 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31437 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31445 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31453 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31454 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31457 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31475 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31476 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31478 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31483 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31562 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31575 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31576 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31586 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31587 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31591 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31594 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31595 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31596 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31597 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31598 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31601 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31602 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31603 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31604 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31610 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31667 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31672 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31690 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31694 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31706 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31720 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31731 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31732 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31733 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31754 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31755 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31757 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31907 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31910 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31911 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31912 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31915 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31916 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31919 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31920 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31921 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31922 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31925 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31926 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31927 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31930 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31988 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31989 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32099 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32100 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32101 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32102 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32134 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32139 /* Mask arithmetic operations */
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32171 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32172 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32173 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32199 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32200 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32201 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32202 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32203 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32204 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32205 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32206 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32210 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32216 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32218 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32219 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32220 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32221 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32222 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32223 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32224 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32225 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32228 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32229 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32230 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32231 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32271 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32272 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32275 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32287 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32299 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32300 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32301 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32302 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32303 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32304 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32305 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32306 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32308 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32309 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32329 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32333 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32356 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32397 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32398 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32399 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32400 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32425 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32426 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32427 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32428 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32429 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32430 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32431 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32432 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32433 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32434 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32435 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32438 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32543 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32557 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32558 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32559 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32560 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32571 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32572 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32573 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32574 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32575 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32576 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32577 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32578 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32638 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32639 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32651 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32652 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32653 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32654 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32655 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32656 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32661 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32662 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32663 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32664 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32668 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32669 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32670 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32671 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32672 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32705 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32706 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32707 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32708 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32749 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32754 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32756 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32758 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32764 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32765 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32766 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32767 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32768 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32776 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32777 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32778 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32779 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32797 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32798 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32799 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32800 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32801 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32818 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32819 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32820 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32821 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32839 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32840 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32857 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32858 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32859 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32860 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32867 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32868 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32869 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32870 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32875 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32876 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32877 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32878 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32879 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32895 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32896 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32897 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32898 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32899 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32900 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32901 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32902 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32903 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32904 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32905 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32908 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32909 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32910 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32911 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32912 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32969 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32970 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32971 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32972 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32973 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32974 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32975 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32977 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32978 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32979 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32980 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32981 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32982 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32983 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32984 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32985 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32986 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32987 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32988 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32989 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32990 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32991 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32992 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32993 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32994 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32995 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32996 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32997 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32998 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33001 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33002 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33003 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33004 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33005 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33006 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33007 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33008 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33009 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33010 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33011 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33012 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33015 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33016 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33017 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33018 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33019 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33020 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33021 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33022 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33023 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33024 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33025 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33026 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33027 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33028 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33029 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33032 /* Builtins with rounding support. */
33033 static const struct builtin_description bdesc_round_args[] =
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33055 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33057 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33064 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33066 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33116 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33118 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33120 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33122 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33124 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33126 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33128 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33130 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33157 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33158 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33159 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33160 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33161 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33162 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33163 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33164 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33165 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33166 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33169 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33170 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33171 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33172 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33173 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33174 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33175 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33176 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33177 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33178 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33179 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33180 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33181 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33182 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33183 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33184 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33187 /* Bultins for MPX. */
33188 static const struct builtin_description bdesc_mpx[] =
33190 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33191 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33192 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33195 /* Const builtins for MPX. */
33196 static const struct builtin_description bdesc_mpx_const[] =
33198 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33199 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33200 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33201 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33202 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33203 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33204 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33205 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33208 /* FMA4 and XOP. */
33209 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33210 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33211 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33212 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33213 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33214 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33215 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33216 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33217 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33218 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33219 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33220 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33221 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33222 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33223 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33224 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33225 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33226 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33227 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33228 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33229 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33230 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33231 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33232 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33233 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33234 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33235 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33236 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33237 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33238 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33239 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33240 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33241 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33242 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33243 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33244 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33245 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33246 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33247 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33248 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33249 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33250 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33251 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33252 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33253 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33254 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33255 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33256 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33257 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33258 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33259 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33260 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33262 static const struct builtin_description bdesc_multi_arg[] =
33264 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33265 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33266 UNKNOWN, (int)MULTI_ARG_3_SF },
33267 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33268 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33269 UNKNOWN, (int)MULTI_ARG_3_DF },
33271 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33272 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33273 UNKNOWN, (int)MULTI_ARG_3_SF },
33274 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33275 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33276 UNKNOWN, (int)MULTI_ARG_3_DF },
33278 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33279 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33280 UNKNOWN, (int)MULTI_ARG_3_SF },
33281 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33282 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33283 UNKNOWN, (int)MULTI_ARG_3_DF },
33284 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33285 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33286 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33287 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33288 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33289 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33291 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33292 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33293 UNKNOWN, (int)MULTI_ARG_3_SF },
33294 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33295 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33296 UNKNOWN, (int)MULTI_ARG_3_DF },
33297 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33298 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33299 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33300 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33301 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33302 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33464 /* TM vector builtins. */
33466 /* Reuse the existing x86-specific `struct builtin_description' cause
33467 we're lazy. Add casts to make them fit. */
33468 static const struct builtin_description bdesc_tm[] =
33470 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33471 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33472 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33473 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33474 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33475 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33476 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33478 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33479 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33480 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33481 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33482 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33483 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33484 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33486 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33487 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33488 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33489 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33490 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33491 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33492 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33494 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33495 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33496 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33499 /* TM callbacks. */
33501 /* Return the builtin decl needed to load a vector of TYPE. */
33504 ix86_builtin_tm_load (tree type)
33506 if (TREE_CODE (type) == VECTOR_TYPE)
33508 switch (tree_to_uhwi (TYPE_SIZE (type)))
33511 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33513 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33515 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33521 /* Return the builtin decl needed to store a vector of TYPE. */
33524 ix86_builtin_tm_store (tree type)
33526 if (TREE_CODE (type) == VECTOR_TYPE)
33528 switch (tree_to_uhwi (TYPE_SIZE (type)))
33531 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33533 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33535 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33541 /* Initialize the transactional memory vector load/store builtins. */
33544 ix86_init_tm_builtins (void)
33546 enum ix86_builtin_func_type ftype;
33547 const struct builtin_description *d;
33550 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33551 tree attrs_log, attrs_type_log;
33556 /* If there are no builtins defined, we must be compiling in a
33557 language without trans-mem support. */
33558 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33561 /* Use whatever attributes a normal TM load has. */
33562 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33563 attrs_load = DECL_ATTRIBUTES (decl);
33564 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33565 /* Use whatever attributes a normal TM store has. */
33566 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33567 attrs_store = DECL_ATTRIBUTES (decl);
33568 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33569 /* Use whatever attributes a normal TM log has. */
33570 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33571 attrs_log = DECL_ATTRIBUTES (decl);
33572 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33574 for (i = 0, d = bdesc_tm;
33575 i < ARRAY_SIZE (bdesc_tm);
33578 if ((d->mask & ix86_isa_flags) != 0
33579 || (lang_hooks.builtin_function
33580 == lang_hooks.builtin_function_ext_scope))
33582 tree type, attrs, attrs_type;
33583 enum built_in_function code = (enum built_in_function) d->code;
33585 ftype = (enum ix86_builtin_func_type) d->flag;
33586 type = ix86_get_builtin_func_type (ftype);
33588 if (BUILTIN_TM_LOAD_P (code))
33590 attrs = attrs_load;
33591 attrs_type = attrs_type_load;
33593 else if (BUILTIN_TM_STORE_P (code))
33595 attrs = attrs_store;
33596 attrs_type = attrs_type_store;
33601 attrs_type = attrs_type_log;
33603 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33604 /* The builtin without the prefix for
33605 calling it directly. */
33606 d->name + strlen ("__builtin_"),
33608 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33609 set the TYPE_ATTRIBUTES. */
33610 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33612 set_builtin_decl (code, decl, false);
33617 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33618 in the current target ISA to allow the user to compile particular modules
33619 with different target specific options that differ from the command line
33622 ix86_init_mmx_sse_builtins (void)
33624 const struct builtin_description * d;
33625 enum ix86_builtin_func_type ftype;
33628 /* Add all special builtins with variable number of operands. */
33629 for (i = 0, d = bdesc_special_args;
33630 i < ARRAY_SIZE (bdesc_special_args);
33636 ftype = (enum ix86_builtin_func_type) d->flag;
33637 def_builtin (d->mask, d->name, ftype, d->code);
33640 /* Add all builtins with variable number of operands. */
33641 for (i = 0, d = bdesc_args;
33642 i < ARRAY_SIZE (bdesc_args);
33648 ftype = (enum ix86_builtin_func_type) d->flag;
33649 def_builtin_const (d->mask, d->name, ftype, d->code);
33652 /* Add all builtins with rounding. */
33653 for (i = 0, d = bdesc_round_args;
33654 i < ARRAY_SIZE (bdesc_round_args);
33660 ftype = (enum ix86_builtin_func_type) d->flag;
33661 def_builtin_const (d->mask, d->name, ftype, d->code);
33664 /* pcmpestr[im] insns. */
33665 for (i = 0, d = bdesc_pcmpestr;
33666 i < ARRAY_SIZE (bdesc_pcmpestr);
33669 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33670 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33672 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33673 def_builtin_const (d->mask, d->name, ftype, d->code);
33676 /* pcmpistr[im] insns. */
33677 for (i = 0, d = bdesc_pcmpistr;
33678 i < ARRAY_SIZE (bdesc_pcmpistr);
33681 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33682 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33684 ftype = INT_FTYPE_V16QI_V16QI_INT;
33685 def_builtin_const (d->mask, d->name, ftype, d->code);
33688 /* comi/ucomi insns. */
33689 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33691 if (d->mask == OPTION_MASK_ISA_SSE2)
33692 ftype = INT_FTYPE_V2DF_V2DF;
33694 ftype = INT_FTYPE_V4SF_V4SF;
33695 def_builtin_const (d->mask, d->name, ftype, d->code);
33699 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33700 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33701 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33702 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33704 /* SSE or 3DNow!A */
33705 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33706 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33707 IX86_BUILTIN_MASKMOVQ);
33710 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33711 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33713 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33714 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33715 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33716 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33719 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33720 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33721 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33722 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33725 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33726 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33727 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33728 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33729 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33730 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33731 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33732 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33733 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33734 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33735 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33736 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33739 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33740 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33743 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33744 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33745 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33746 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33747 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33748 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33749 IX86_BUILTIN_RDRAND64_STEP);
33752 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33753 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33754 IX86_BUILTIN_GATHERSIV2DF);
33756 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33757 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33758 IX86_BUILTIN_GATHERSIV4DF);
33760 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33761 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33762 IX86_BUILTIN_GATHERDIV2DF);
33764 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33765 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33766 IX86_BUILTIN_GATHERDIV4DF);
33768 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33769 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33770 IX86_BUILTIN_GATHERSIV4SF);
33772 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33773 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33774 IX86_BUILTIN_GATHERSIV8SF);
33776 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33777 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33778 IX86_BUILTIN_GATHERDIV4SF);
33780 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33781 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33782 IX86_BUILTIN_GATHERDIV8SF);
33784 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33785 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33786 IX86_BUILTIN_GATHERSIV2DI);
33788 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33789 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33790 IX86_BUILTIN_GATHERSIV4DI);
33792 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33793 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33794 IX86_BUILTIN_GATHERDIV2DI);
33796 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33797 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33798 IX86_BUILTIN_GATHERDIV4DI);
33800 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33801 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33802 IX86_BUILTIN_GATHERSIV4SI);
33804 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33805 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33806 IX86_BUILTIN_GATHERSIV8SI);
33808 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33809 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33810 IX86_BUILTIN_GATHERDIV4SI);
33812 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33813 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33814 IX86_BUILTIN_GATHERDIV8SI);
33816 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33817 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33818 IX86_BUILTIN_GATHERALTSIV4DF);
33820 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33821 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33822 IX86_BUILTIN_GATHERALTDIV8SF);
33824 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33825 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33826 IX86_BUILTIN_GATHERALTSIV4DI);
33828 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33829 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33830 IX86_BUILTIN_GATHERALTDIV8SI);
33833 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33834 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33835 IX86_BUILTIN_GATHER3SIV16SF);
33837 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33838 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33839 IX86_BUILTIN_GATHER3SIV8DF);
33841 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33842 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33843 IX86_BUILTIN_GATHER3DIV16SF);
33845 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33846 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33847 IX86_BUILTIN_GATHER3DIV8DF);
33849 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33850 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33851 IX86_BUILTIN_GATHER3SIV16SI);
33853 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33854 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33855 IX86_BUILTIN_GATHER3SIV8DI);
33857 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33858 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33859 IX86_BUILTIN_GATHER3DIV16SI);
33861 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33862 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33863 IX86_BUILTIN_GATHER3DIV8DI);
33865 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33866 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33867 IX86_BUILTIN_GATHER3ALTSIV8DF);
33869 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33870 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33871 IX86_BUILTIN_GATHER3ALTDIV16SF);
33873 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33874 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33875 IX86_BUILTIN_GATHER3ALTSIV8DI);
33877 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33878 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33879 IX86_BUILTIN_GATHER3ALTDIV16SI);
33881 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33882 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33883 IX86_BUILTIN_SCATTERSIV16SF);
33885 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33886 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33887 IX86_BUILTIN_SCATTERSIV8DF);
33889 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33890 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33891 IX86_BUILTIN_SCATTERDIV16SF);
33893 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33894 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33895 IX86_BUILTIN_SCATTERDIV8DF);
33897 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33898 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33899 IX86_BUILTIN_SCATTERSIV16SI);
33901 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33902 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33903 IX86_BUILTIN_SCATTERSIV8DI);
33905 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33906 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33907 IX86_BUILTIN_SCATTERDIV16SI);
33909 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33910 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33911 IX86_BUILTIN_SCATTERDIV8DI);
33914 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33915 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33916 IX86_BUILTIN_GATHER3SIV2DF);
33918 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33919 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33920 IX86_BUILTIN_GATHER3SIV4DF);
33922 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33923 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33924 IX86_BUILTIN_GATHER3DIV2DF);
33926 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33927 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33928 IX86_BUILTIN_GATHER3DIV4DF);
33930 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33931 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33932 IX86_BUILTIN_GATHER3SIV4SF);
33934 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33935 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33936 IX86_BUILTIN_GATHER3SIV8SF);
33938 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33939 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33940 IX86_BUILTIN_GATHER3DIV4SF);
33942 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33943 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33944 IX86_BUILTIN_GATHER3DIV8SF);
33946 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33947 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33948 IX86_BUILTIN_GATHER3SIV2DI);
33950 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33951 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33952 IX86_BUILTIN_GATHER3SIV4DI);
33954 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33955 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33956 IX86_BUILTIN_GATHER3DIV2DI);
33958 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33959 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33960 IX86_BUILTIN_GATHER3DIV4DI);
33962 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33963 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33964 IX86_BUILTIN_GATHER3SIV4SI);
33966 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33967 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33968 IX86_BUILTIN_GATHER3SIV8SI);
33970 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33971 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33972 IX86_BUILTIN_GATHER3DIV4SI);
33974 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33975 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33976 IX86_BUILTIN_GATHER3DIV8SI);
33978 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33979 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33980 IX86_BUILTIN_GATHER3ALTSIV4DF);
33982 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33983 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33984 IX86_BUILTIN_GATHER3ALTDIV8SF);
33986 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33987 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33988 IX86_BUILTIN_GATHER3ALTSIV4DI);
33990 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33991 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33992 IX86_BUILTIN_GATHER3ALTDIV8SI);
33994 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33995 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33996 IX86_BUILTIN_SCATTERSIV8SF);
33998 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33999 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34000 IX86_BUILTIN_SCATTERSIV4SF);
34002 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34003 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34004 IX86_BUILTIN_SCATTERSIV4DF);
34006 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34007 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34008 IX86_BUILTIN_SCATTERSIV2DF);
34010 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34011 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34012 IX86_BUILTIN_SCATTERDIV8SF);
34014 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34015 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34016 IX86_BUILTIN_SCATTERDIV4SF);
34018 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34019 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34020 IX86_BUILTIN_SCATTERDIV4DF);
34022 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34023 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34024 IX86_BUILTIN_SCATTERDIV2DF);
34026 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34027 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34028 IX86_BUILTIN_SCATTERSIV8SI);
34030 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34031 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34032 IX86_BUILTIN_SCATTERSIV4SI);
34034 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34035 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34036 IX86_BUILTIN_SCATTERSIV4DI);
34038 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34039 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34040 IX86_BUILTIN_SCATTERSIV2DI);
34042 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34043 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34044 IX86_BUILTIN_SCATTERDIV8SI);
34046 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34047 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34048 IX86_BUILTIN_SCATTERDIV4SI);
34050 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34051 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34052 IX86_BUILTIN_SCATTERDIV4DI);
34054 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34055 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34056 IX86_BUILTIN_SCATTERDIV2DI);
34059 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34060 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34061 IX86_BUILTIN_GATHERPFDPD);
34062 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34063 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34064 IX86_BUILTIN_GATHERPFDPS);
34065 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34066 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34067 IX86_BUILTIN_GATHERPFQPD);
34068 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34069 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34070 IX86_BUILTIN_GATHERPFQPS);
34071 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34072 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34073 IX86_BUILTIN_SCATTERPFDPD);
34074 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34075 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34076 IX86_BUILTIN_SCATTERPFDPS);
34077 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34078 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34079 IX86_BUILTIN_SCATTERPFQPD);
34080 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34081 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34082 IX86_BUILTIN_SCATTERPFQPS);
34085 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34086 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34087 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34088 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34089 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34090 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34091 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34092 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34093 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34094 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34095 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34096 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34097 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34098 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34101 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34102 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34104 /* MMX access to the vec_init patterns. */
34105 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34106 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34108 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34109 V4HI_FTYPE_HI_HI_HI_HI,
34110 IX86_BUILTIN_VEC_INIT_V4HI);
34112 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34113 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34114 IX86_BUILTIN_VEC_INIT_V8QI);
34116 /* Access to the vec_extract patterns. */
34117 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34118 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34119 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34120 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34121 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34122 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34124 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34126 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34128 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34129 "__builtin_ia32_vec_ext_v4hi",
34130 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34132 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34133 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34135 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34136 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34138 /* Access to the vec_set patterns. */
34139 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34140 "__builtin_ia32_vec_set_v2di",
34141 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34143 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34144 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34146 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34147 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34149 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34150 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34152 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34153 "__builtin_ia32_vec_set_v4hi",
34154 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34156 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34157 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34160 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34161 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34162 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34163 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34164 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34165 "__builtin_ia32_rdseed_di_step",
34166 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34169 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34170 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34171 def_builtin (OPTION_MASK_ISA_64BIT,
34172 "__builtin_ia32_addcarryx_u64",
34173 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34174 IX86_BUILTIN_ADDCARRYX64);
34177 def_builtin (0, "__builtin_ia32_sbb_u32",
34178 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34179 def_builtin (OPTION_MASK_ISA_64BIT,
34180 "__builtin_ia32_sbb_u64",
34181 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34182 IX86_BUILTIN_SBB64);
34184 /* Read/write FLAGS. */
34185 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34186 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34187 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34188 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34189 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34190 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34191 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34192 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34195 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34196 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34199 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34200 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34202 /* Add FMA4 multi-arg argument instructions */
34203 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34208 ftype = (enum ix86_builtin_func_type) d->flag;
34209 def_builtin_const (d->mask, d->name, ftype, d->code);
34214 ix86_init_mpx_builtins ()
34216 const struct builtin_description * d;
34217 enum ix86_builtin_func_type ftype;
34221 for (i = 0, d = bdesc_mpx;
34222 i < ARRAY_SIZE (bdesc_mpx);
34228 ftype = (enum ix86_builtin_func_type) d->flag;
34229 decl = def_builtin (d->mask, d->name, ftype, d->code);
34231 /* With no leaf and nothrow flags for MPX builtins
34232 abnormal edges may follow its call when setjmp
34233 presents in the function. Since we may have a lot
34234 of MPX builtins calls it causes lots of useless
34235 edges and enormous PHI nodes. To avoid this we mark
34236 MPX builtins as leaf and nothrow. */
34239 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34241 TREE_NOTHROW (decl) = 1;
34245 ix86_builtins_isa[(int)d->code].leaf_p = true;
34246 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34250 for (i = 0, d = bdesc_mpx_const;
34251 i < ARRAY_SIZE (bdesc_mpx_const);
34257 ftype = (enum ix86_builtin_func_type) d->flag;
34258 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34262 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34264 TREE_NOTHROW (decl) = 1;
34268 ix86_builtins_isa[(int)d->code].leaf_p = true;
34269 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34274 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34275 to return a pointer to VERSION_DECL if the outcome of the expression
34276 formed by PREDICATE_CHAIN is true. This function will be called during
34277 version dispatch to decide which function version to execute. It returns
34278 the basic block at the end, to which more conditions can be added. */
34281 add_condition_to_bb (tree function_decl, tree version_decl,
34282 tree predicate_chain, basic_block new_bb)
34284 gimple return_stmt;
34285 tree convert_expr, result_var;
34286 gimple convert_stmt;
34287 gimple call_cond_stmt;
34288 gimple if_else_stmt;
34290 basic_block bb1, bb2, bb3;
34293 tree cond_var, and_expr_var = NULL_TREE;
34296 tree predicate_decl, predicate_arg;
34298 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34300 gcc_assert (new_bb != NULL);
34301 gseq = bb_seq (new_bb);
34304 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34305 build_fold_addr_expr (version_decl));
34306 result_var = create_tmp_var (ptr_type_node);
34307 convert_stmt = gimple_build_assign (result_var, convert_expr);
34308 return_stmt = gimple_build_return (result_var);
34310 if (predicate_chain == NULL_TREE)
34312 gimple_seq_add_stmt (&gseq, convert_stmt);
34313 gimple_seq_add_stmt (&gseq, return_stmt);
34314 set_bb_seq (new_bb, gseq);
34315 gimple_set_bb (convert_stmt, new_bb);
34316 gimple_set_bb (return_stmt, new_bb);
34321 while (predicate_chain != NULL)
34323 cond_var = create_tmp_var (integer_type_node);
34324 predicate_decl = TREE_PURPOSE (predicate_chain);
34325 predicate_arg = TREE_VALUE (predicate_chain);
34326 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34327 gimple_call_set_lhs (call_cond_stmt, cond_var);
34329 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34330 gimple_set_bb (call_cond_stmt, new_bb);
34331 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34333 predicate_chain = TREE_CHAIN (predicate_chain);
34335 if (and_expr_var == NULL)
34336 and_expr_var = cond_var;
34339 gimple assign_stmt;
34340 /* Use MIN_EXPR to check if any integer is zero?.
34341 and_expr_var = min_expr <cond_var, and_expr_var> */
34342 assign_stmt = gimple_build_assign (and_expr_var,
34343 build2 (MIN_EXPR, integer_type_node,
34344 cond_var, and_expr_var));
34346 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34347 gimple_set_bb (assign_stmt, new_bb);
34348 gimple_seq_add_stmt (&gseq, assign_stmt);
34352 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34354 NULL_TREE, NULL_TREE);
34355 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34356 gimple_set_bb (if_else_stmt, new_bb);
34357 gimple_seq_add_stmt (&gseq, if_else_stmt);
34359 gimple_seq_add_stmt (&gseq, convert_stmt);
34360 gimple_seq_add_stmt (&gseq, return_stmt);
34361 set_bb_seq (new_bb, gseq);
34364 e12 = split_block (bb1, if_else_stmt);
34366 e12->flags &= ~EDGE_FALLTHRU;
34367 e12->flags |= EDGE_TRUE_VALUE;
34369 e23 = split_block (bb2, return_stmt);
34371 gimple_set_bb (convert_stmt, bb2);
34372 gimple_set_bb (return_stmt, bb2);
34375 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34378 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34385 /* This parses the attribute arguments to target in DECL and determines
34386 the right builtin to use to match the platform specification.
34387 It returns the priority value for this version decl. If PREDICATE_LIST
34388 is not NULL, it stores the list of cpu features that need to be checked
34389 before dispatching this function. */
34391 static unsigned int
34392 get_builtin_code_for_version (tree decl, tree *predicate_list)
34395 struct cl_target_option cur_target;
34397 struct cl_target_option *new_target;
34398 const char *arg_str = NULL;
34399 const char *attrs_str = NULL;
34400 char *tok_str = NULL;
34403 /* Priority of i386 features, greater value is higher priority. This is
34404 used to decide the order in which function dispatch must happen. For
34405 instance, a version specialized for SSE4.2 should be checked for dispatch
34406 before a version for SSE3, as SSE4.2 implies SSE3. */
34407 enum feature_priority
34438 enum feature_priority priority = P_ZERO;
34440 /* These are the target attribute strings for which a dispatcher is
34441 available, from fold_builtin_cpu. */
34443 static struct _feature_list
34445 const char *const name;
34446 const enum feature_priority priority;
34448 const feature_list[] =
34454 {"sse4a", P_SSE4_A},
34455 {"ssse3", P_SSSE3},
34456 {"sse4.1", P_SSE4_1},
34457 {"sse4.2", P_SSE4_2},
34458 {"popcnt", P_POPCNT},
34466 {"avx512f", P_AVX512F}
34470 static unsigned int NUM_FEATURES
34471 = sizeof (feature_list) / sizeof (struct _feature_list);
34475 tree predicate_chain = NULL_TREE;
34476 tree predicate_decl, predicate_arg;
34478 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34479 gcc_assert (attrs != NULL);
34481 attrs = TREE_VALUE (TREE_VALUE (attrs));
34483 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34484 attrs_str = TREE_STRING_POINTER (attrs);
34486 /* Return priority zero for default function. */
34487 if (strcmp (attrs_str, "default") == 0)
34490 /* Handle arch= if specified. For priority, set it to be 1 more than
34491 the best instruction set the processor can handle. For instance, if
34492 there is a version for atom and a version for ssse3 (the highest ISA
34493 priority for atom), the atom version must be checked for dispatch
34494 before the ssse3 version. */
34495 if (strstr (attrs_str, "arch=") != NULL)
34497 cl_target_option_save (&cur_target, &global_options);
34498 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34499 &global_options_set);
34501 gcc_assert (target_node);
34502 new_target = TREE_TARGET_OPTION (target_node);
34503 gcc_assert (new_target);
34505 if (new_target->arch_specified && new_target->arch > 0)
34507 switch (new_target->arch)
34509 case PROCESSOR_CORE2:
34511 priority = P_PROC_SSSE3;
34513 case PROCESSOR_NEHALEM:
34514 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34515 arg_str = "westmere";
34517 /* We translate "arch=corei7" and "arch=nehalem" to
34518 "corei7" so that it will be mapped to M_INTEL_COREI7
34519 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34520 arg_str = "corei7";
34521 priority = P_PROC_SSE4_2;
34523 case PROCESSOR_SANDYBRIDGE:
34524 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34525 arg_str = "ivybridge";
34527 arg_str = "sandybridge";
34528 priority = P_PROC_AVX;
34530 case PROCESSOR_HASWELL:
34531 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34532 arg_str = "broadwell";
34534 arg_str = "haswell";
34535 priority = P_PROC_AVX2;
34537 case PROCESSOR_BONNELL:
34538 arg_str = "bonnell";
34539 priority = P_PROC_SSSE3;
34541 case PROCESSOR_KNL:
34543 priority = P_PROC_AVX512F;
34545 case PROCESSOR_SILVERMONT:
34546 arg_str = "silvermont";
34547 priority = P_PROC_SSE4_2;
34549 case PROCESSOR_AMDFAM10:
34550 arg_str = "amdfam10h";
34551 priority = P_PROC_SSE4_A;
34553 case PROCESSOR_BTVER1:
34554 arg_str = "btver1";
34555 priority = P_PROC_SSE4_A;
34557 case PROCESSOR_BTVER2:
34558 arg_str = "btver2";
34559 priority = P_PROC_BMI;
34561 case PROCESSOR_BDVER1:
34562 arg_str = "bdver1";
34563 priority = P_PROC_XOP;
34565 case PROCESSOR_BDVER2:
34566 arg_str = "bdver2";
34567 priority = P_PROC_FMA;
34569 case PROCESSOR_BDVER3:
34570 arg_str = "bdver3";
34571 priority = P_PROC_FMA;
34573 case PROCESSOR_BDVER4:
34574 arg_str = "bdver4";
34575 priority = P_PROC_AVX2;
34580 cl_target_option_restore (&global_options, &cur_target);
34582 if (predicate_list && arg_str == NULL)
34584 error_at (DECL_SOURCE_LOCATION (decl),
34585 "No dispatcher found for the versioning attributes");
34589 if (predicate_list)
34591 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34592 /* For a C string literal the length includes the trailing NULL. */
34593 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34594 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34599 /* Process feature name. */
34600 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34601 strcpy (tok_str, attrs_str);
34602 token = strtok (tok_str, ",");
34603 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34605 while (token != NULL)
34607 /* Do not process "arch=" */
34608 if (strncmp (token, "arch=", 5) == 0)
34610 token = strtok (NULL, ",");
34613 for (i = 0; i < NUM_FEATURES; ++i)
34615 if (strcmp (token, feature_list[i].name) == 0)
34617 if (predicate_list)
34619 predicate_arg = build_string_literal (
34620 strlen (feature_list[i].name) + 1,
34621 feature_list[i].name);
34622 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34625 /* Find the maximum priority feature. */
34626 if (feature_list[i].priority > priority)
34627 priority = feature_list[i].priority;
34632 if (predicate_list && i == NUM_FEATURES)
34634 error_at (DECL_SOURCE_LOCATION (decl),
34635 "No dispatcher found for %s", token);
34638 token = strtok (NULL, ",");
34642 if (predicate_list && predicate_chain == NULL_TREE)
34644 error_at (DECL_SOURCE_LOCATION (decl),
34645 "No dispatcher found for the versioning attributes : %s",
34649 else if (predicate_list)
34651 predicate_chain = nreverse (predicate_chain);
34652 *predicate_list = predicate_chain;
34658 /* This compares the priority of target features in function DECL1
34659 and DECL2. It returns positive value if DECL1 is higher priority,
34660 negative value if DECL2 is higher priority and 0 if they are the
34664 ix86_compare_version_priority (tree decl1, tree decl2)
34666 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34667 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34669 return (int)priority1 - (int)priority2;
34672 /* V1 and V2 point to function versions with different priorities
34673 based on the target ISA. This function compares their priorities. */
34676 feature_compare (const void *v1, const void *v2)
34678 typedef struct _function_version_info
34681 tree predicate_chain;
34682 unsigned int dispatch_priority;
34683 } function_version_info;
34685 const function_version_info c1 = *(const function_version_info *)v1;
34686 const function_version_info c2 = *(const function_version_info *)v2;
34687 return (c2.dispatch_priority - c1.dispatch_priority);
34690 /* This function generates the dispatch function for
34691 multi-versioned functions. DISPATCH_DECL is the function which will
34692 contain the dispatch logic. FNDECLS are the function choices for
34693 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34694 in DISPATCH_DECL in which the dispatch code is generated. */
34697 dispatch_function_versions (tree dispatch_decl,
34699 basic_block *empty_bb)
34702 gimple ifunc_cpu_init_stmt;
34706 vec<tree> *fndecls;
34707 unsigned int num_versions = 0;
34708 unsigned int actual_versions = 0;
34711 struct _function_version_info
34714 tree predicate_chain;
34715 unsigned int dispatch_priority;
34716 }*function_version_info;
34718 gcc_assert (dispatch_decl != NULL
34719 && fndecls_p != NULL
34720 && empty_bb != NULL);
34722 /*fndecls_p is actually a vector. */
34723 fndecls = static_cast<vec<tree> *> (fndecls_p);
34725 /* At least one more version other than the default. */
34726 num_versions = fndecls->length ();
34727 gcc_assert (num_versions >= 2);
34729 function_version_info = (struct _function_version_info *)
34730 XNEWVEC (struct _function_version_info, (num_versions - 1));
34732 /* The first version in the vector is the default decl. */
34733 default_decl = (*fndecls)[0];
34735 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34737 gseq = bb_seq (*empty_bb);
34738 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34739 constructors, so explicity call __builtin_cpu_init here. */
34740 ifunc_cpu_init_stmt = gimple_build_call_vec (
34741 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34742 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34743 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34744 set_bb_seq (*empty_bb, gseq);
34749 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34751 tree version_decl = ele;
34752 tree predicate_chain = NULL_TREE;
34753 unsigned int priority;
34754 /* Get attribute string, parse it and find the right predicate decl.
34755 The predicate function could be a lengthy combination of many
34756 features, like arch-type and various isa-variants. */
34757 priority = get_builtin_code_for_version (version_decl,
34760 if (predicate_chain == NULL_TREE)
34763 function_version_info [actual_versions].version_decl = version_decl;
34764 function_version_info [actual_versions].predicate_chain
34766 function_version_info [actual_versions].dispatch_priority = priority;
34770 /* Sort the versions according to descending order of dispatch priority. The
34771 priority is based on the ISA. This is not a perfect solution. There
34772 could still be ambiguity. If more than one function version is suitable
34773 to execute, which one should be dispatched? In future, allow the user
34774 to specify a dispatch priority next to the version. */
34775 qsort (function_version_info, actual_versions,
34776 sizeof (struct _function_version_info), feature_compare);
34778 for (i = 0; i < actual_versions; ++i)
34779 *empty_bb = add_condition_to_bb (dispatch_decl,
34780 function_version_info[i].version_decl,
34781 function_version_info[i].predicate_chain,
34784 /* dispatch default version at the end. */
34785 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34788 free (function_version_info);
34792 /* Comparator function to be used in qsort routine to sort attribute
34793 specification strings to "target". */
34796 attr_strcmp (const void *v1, const void *v2)
34798 const char *c1 = *(char *const*)v1;
34799 const char *c2 = *(char *const*)v2;
34800 return strcmp (c1, c2);
34803 /* ARGLIST is the argument to target attribute. This function tokenizes
34804 the comma separated arguments, sorts them and returns a string which
34805 is a unique identifier for the comma separated arguments. It also
34806 replaces non-identifier characters "=,-" with "_". */
34809 sorted_attr_string (tree arglist)
34812 size_t str_len_sum = 0;
34813 char **args = NULL;
34814 char *attr_str, *ret_str;
34816 unsigned int argnum = 1;
34819 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34821 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34822 size_t len = strlen (str);
34823 str_len_sum += len + 1;
34824 if (arg != arglist)
34826 for (i = 0; i < strlen (str); i++)
34831 attr_str = XNEWVEC (char, str_len_sum);
34833 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34835 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34836 size_t len = strlen (str);
34837 memcpy (attr_str + str_len_sum, str, len);
34838 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34839 str_len_sum += len + 1;
34842 /* Replace "=,-" with "_". */
34843 for (i = 0; i < strlen (attr_str); i++)
34844 if (attr_str[i] == '=' || attr_str[i]== '-')
34850 args = XNEWVEC (char *, argnum);
34853 attr = strtok (attr_str, ",");
34854 while (attr != NULL)
34858 attr = strtok (NULL, ",");
34861 qsort (args, argnum, sizeof (char *), attr_strcmp);
34863 ret_str = XNEWVEC (char, str_len_sum);
34865 for (i = 0; i < argnum; i++)
34867 size_t len = strlen (args[i]);
34868 memcpy (ret_str + str_len_sum, args[i], len);
34869 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34870 str_len_sum += len + 1;
34874 XDELETEVEC (attr_str);
34878 /* This function changes the assembler name for functions that are
34879 versions. If DECL is a function version and has a "target"
34880 attribute, it appends the attribute string to its assembler name. */
34883 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34886 const char *orig_name, *version_string;
34887 char *attr_str, *assembler_name;
34889 if (DECL_DECLARED_INLINE_P (decl)
34890 && lookup_attribute ("gnu_inline",
34891 DECL_ATTRIBUTES (decl)))
34892 error_at (DECL_SOURCE_LOCATION (decl),
34893 "Function versions cannot be marked as gnu_inline,"
34894 " bodies have to be generated");
34896 if (DECL_VIRTUAL_P (decl)
34897 || DECL_VINDEX (decl))
34898 sorry ("Virtual function multiversioning not supported");
34900 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34902 /* target attribute string cannot be NULL. */
34903 gcc_assert (version_attr != NULL_TREE);
34905 orig_name = IDENTIFIER_POINTER (id);
34907 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34909 if (strcmp (version_string, "default") == 0)
34912 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34913 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34915 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34917 /* Allow assembler name to be modified if already set. */
34918 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34919 SET_DECL_RTL (decl, NULL);
34921 tree ret = get_identifier (assembler_name);
34922 XDELETEVEC (attr_str);
34923 XDELETEVEC (assembler_name);
34927 /* This function returns true if FN1 and FN2 are versions of the same function,
34928 that is, the target strings of the function decls are different. This assumes
34929 that FN1 and FN2 have the same signature. */
34932 ix86_function_versions (tree fn1, tree fn2)
34935 char *target1, *target2;
34938 if (TREE_CODE (fn1) != FUNCTION_DECL
34939 || TREE_CODE (fn2) != FUNCTION_DECL)
34942 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34943 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34945 /* At least one function decl should have the target attribute specified. */
34946 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34949 /* Diagnose missing target attribute if one of the decls is already
34950 multi-versioned. */
34951 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34953 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34955 if (attr2 != NULL_TREE)
34962 error_at (DECL_SOURCE_LOCATION (fn2),
34963 "missing %<target%> attribute for multi-versioned %D",
34965 inform (DECL_SOURCE_LOCATION (fn1),
34966 "previous declaration of %D", fn1);
34967 /* Prevent diagnosing of the same error multiple times. */
34968 DECL_ATTRIBUTES (fn2)
34969 = tree_cons (get_identifier ("target"),
34970 copy_node (TREE_VALUE (attr1)),
34971 DECL_ATTRIBUTES (fn2));
34976 target1 = sorted_attr_string (TREE_VALUE (attr1));
34977 target2 = sorted_attr_string (TREE_VALUE (attr2));
34979 /* The sorted target strings must be different for fn1 and fn2
34981 if (strcmp (target1, target2) == 0)
34986 XDELETEVEC (target1);
34987 XDELETEVEC (target2);
34993 ix86_mangle_decl_assembler_name (tree decl, tree id)
34995 /* For function version, add the target suffix to the assembler name. */
34996 if (TREE_CODE (decl) == FUNCTION_DECL
34997 && DECL_FUNCTION_VERSIONED (decl))
34998 id = ix86_mangle_function_version_assembler_name (decl, id);
34999 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35000 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35006 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35007 is true, append the full path name of the source file. */
35010 make_name (tree decl, const char *suffix, bool make_unique)
35012 char *global_var_name;
35015 const char *unique_name = NULL;
35017 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35019 /* Get a unique name that can be used globally without any chances
35020 of collision at link time. */
35022 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35024 name_len = strlen (name) + strlen (suffix) + 2;
35027 name_len += strlen (unique_name) + 1;
35028 global_var_name = XNEWVEC (char, name_len);
35030 /* Use '.' to concatenate names as it is demangler friendly. */
35032 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35035 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35037 return global_var_name;
35040 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35042 /* Make a dispatcher declaration for the multi-versioned function DECL.
35043 Calls to DECL function will be replaced with calls to the dispatcher
35044 by the front-end. Return the decl created. */
35047 make_dispatcher_decl (const tree decl)
35051 tree fn_type, func_type;
35052 bool is_uniq = false;
35054 if (TREE_PUBLIC (decl) == 0)
35057 func_name = make_name (decl, "ifunc", is_uniq);
35059 fn_type = TREE_TYPE (decl);
35060 func_type = build_function_type (TREE_TYPE (fn_type),
35061 TYPE_ARG_TYPES (fn_type));
35063 func_decl = build_fn_decl (func_name, func_type);
35064 XDELETEVEC (func_name);
35065 TREE_USED (func_decl) = 1;
35066 DECL_CONTEXT (func_decl) = NULL_TREE;
35067 DECL_INITIAL (func_decl) = error_mark_node;
35068 DECL_ARTIFICIAL (func_decl) = 1;
35069 /* Mark this func as external, the resolver will flip it again if
35070 it gets generated. */
35071 DECL_EXTERNAL (func_decl) = 1;
35072 /* This will be of type IFUNCs have to be externally visible. */
35073 TREE_PUBLIC (func_decl) = 1;
35080 /* Returns true if decl is multi-versioned and DECL is the default function,
35081 that is it is not tagged with target specific optimization. */
35084 is_function_default_version (const tree decl)
35086 if (TREE_CODE (decl) != FUNCTION_DECL
35087 || !DECL_FUNCTION_VERSIONED (decl))
35089 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35091 attr = TREE_VALUE (TREE_VALUE (attr));
35092 return (TREE_CODE (attr) == STRING_CST
35093 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35096 /* Make a dispatcher declaration for the multi-versioned function DECL.
35097 Calls to DECL function will be replaced with calls to the dispatcher
35098 by the front-end. Returns the decl of the dispatcher function. */
35101 ix86_get_function_versions_dispatcher (void *decl)
35103 tree fn = (tree) decl;
35104 struct cgraph_node *node = NULL;
35105 struct cgraph_node *default_node = NULL;
35106 struct cgraph_function_version_info *node_v = NULL;
35107 struct cgraph_function_version_info *first_v = NULL;
35109 tree dispatch_decl = NULL;
35111 struct cgraph_function_version_info *default_version_info = NULL;
35113 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35115 node = cgraph_node::get (fn);
35116 gcc_assert (node != NULL);
35118 node_v = node->function_version ();
35119 gcc_assert (node_v != NULL);
35121 if (node_v->dispatcher_resolver != NULL)
35122 return node_v->dispatcher_resolver;
35124 /* Find the default version and make it the first node. */
35126 /* Go to the beginning of the chain. */
35127 while (first_v->prev != NULL)
35128 first_v = first_v->prev;
35129 default_version_info = first_v;
35130 while (default_version_info != NULL)
35132 if (is_function_default_version
35133 (default_version_info->this_node->decl))
35135 default_version_info = default_version_info->next;
35138 /* If there is no default node, just return NULL. */
35139 if (default_version_info == NULL)
35142 /* Make default info the first node. */
35143 if (first_v != default_version_info)
35145 default_version_info->prev->next = default_version_info->next;
35146 if (default_version_info->next)
35147 default_version_info->next->prev = default_version_info->prev;
35148 first_v->prev = default_version_info;
35149 default_version_info->next = first_v;
35150 default_version_info->prev = NULL;
35153 default_node = default_version_info->this_node;
35155 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35156 if (targetm.has_ifunc_p ())
35158 struct cgraph_function_version_info *it_v = NULL;
35159 struct cgraph_node *dispatcher_node = NULL;
35160 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35162 /* Right now, the dispatching is done via ifunc. */
35163 dispatch_decl = make_dispatcher_decl (default_node->decl);
35165 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35166 gcc_assert (dispatcher_node != NULL);
35167 dispatcher_node->dispatcher_function = 1;
35168 dispatcher_version_info
35169 = dispatcher_node->insert_new_function_version ();
35170 dispatcher_version_info->next = default_version_info;
35171 dispatcher_node->definition = 1;
35173 /* Set the dispatcher for all the versions. */
35174 it_v = default_version_info;
35175 while (it_v != NULL)
35177 it_v->dispatcher_resolver = dispatch_decl;
35184 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35185 "multiversioning needs ifunc which is not supported "
35189 return dispatch_decl;
35192 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35196 make_attribute (const char *name, const char *arg_name, tree chain)
35199 tree attr_arg_name;
35203 attr_name = get_identifier (name);
35204 attr_arg_name = build_string (strlen (arg_name), arg_name);
35205 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35206 attr = tree_cons (attr_name, attr_args, chain);
35210 /* Make the resolver function decl to dispatch the versions of
35211 a multi-versioned function, DEFAULT_DECL. Create an
35212 empty basic block in the resolver and store the pointer in
35213 EMPTY_BB. Return the decl of the resolver function. */
35216 make_resolver_func (const tree default_decl,
35217 const tree dispatch_decl,
35218 basic_block *empty_bb)
35220 char *resolver_name;
35221 tree decl, type, decl_name, t;
35222 bool is_uniq = false;
35224 /* IFUNC's have to be globally visible. So, if the default_decl is
35225 not, then the name of the IFUNC should be made unique. */
35226 if (TREE_PUBLIC (default_decl) == 0)
35229 /* Append the filename to the resolver function if the versions are
35230 not externally visible. This is because the resolver function has
35231 to be externally visible for the loader to find it. So, appending
35232 the filename will prevent conflicts with a resolver function from
35233 another module which is based on the same version name. */
35234 resolver_name = make_name (default_decl, "resolver", is_uniq);
35236 /* The resolver function should return a (void *). */
35237 type = build_function_type_list (ptr_type_node, NULL_TREE);
35239 decl = build_fn_decl (resolver_name, type);
35240 decl_name = get_identifier (resolver_name);
35241 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35243 DECL_NAME (decl) = decl_name;
35244 TREE_USED (decl) = 1;
35245 DECL_ARTIFICIAL (decl) = 1;
35246 DECL_IGNORED_P (decl) = 0;
35247 /* IFUNC resolvers have to be externally visible. */
35248 TREE_PUBLIC (decl) = 1;
35249 DECL_UNINLINABLE (decl) = 1;
35251 /* Resolver is not external, body is generated. */
35252 DECL_EXTERNAL (decl) = 0;
35253 DECL_EXTERNAL (dispatch_decl) = 0;
35255 DECL_CONTEXT (decl) = NULL_TREE;
35256 DECL_INITIAL (decl) = make_node (BLOCK);
35257 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35259 if (DECL_COMDAT_GROUP (default_decl)
35260 || TREE_PUBLIC (default_decl))
35262 /* In this case, each translation unit with a call to this
35263 versioned function will put out a resolver. Ensure it
35264 is comdat to keep just one copy. */
35265 DECL_COMDAT (decl) = 1;
35266 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35268 /* Build result decl and add to function_decl. */
35269 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35270 DECL_ARTIFICIAL (t) = 1;
35271 DECL_IGNORED_P (t) = 1;
35272 DECL_RESULT (decl) = t;
35274 gimplify_function_tree (decl);
35275 push_cfun (DECL_STRUCT_FUNCTION (decl));
35276 *empty_bb = init_lowered_empty_function (decl, false, 0);
35278 cgraph_node::add_new_function (decl, true);
35279 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35283 gcc_assert (dispatch_decl != NULL);
35284 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35285 DECL_ATTRIBUTES (dispatch_decl)
35286 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35288 /* Create the alias for dispatch to resolver here. */
35289 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35290 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35291 XDELETEVEC (resolver_name);
35295 /* Generate the dispatching code body to dispatch multi-versioned function
35296 DECL. The target hook is called to process the "target" attributes and
35297 provide the code to dispatch the right function at run-time. NODE points
35298 to the dispatcher decl whose body will be created. */
35301 ix86_generate_version_dispatcher_body (void *node_p)
35303 tree resolver_decl;
35304 basic_block empty_bb;
35305 tree default_ver_decl;
35306 struct cgraph_node *versn;
35307 struct cgraph_node *node;
35309 struct cgraph_function_version_info *node_version_info = NULL;
35310 struct cgraph_function_version_info *versn_info = NULL;
35312 node = (cgraph_node *)node_p;
35314 node_version_info = node->function_version ();
35315 gcc_assert (node->dispatcher_function
35316 && node_version_info != NULL);
35318 if (node_version_info->dispatcher_resolver)
35319 return node_version_info->dispatcher_resolver;
35321 /* The first version in the chain corresponds to the default version. */
35322 default_ver_decl = node_version_info->next->this_node->decl;
35324 /* node is going to be an alias, so remove the finalized bit. */
35325 node->definition = false;
35327 resolver_decl = make_resolver_func (default_ver_decl,
35328 node->decl, &empty_bb);
35330 node_version_info->dispatcher_resolver = resolver_decl;
35332 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35334 auto_vec<tree, 2> fn_ver_vec;
35336 for (versn_info = node_version_info->next; versn_info;
35337 versn_info = versn_info->next)
35339 versn = versn_info->this_node;
35340 /* Check for virtual functions here again, as by this time it should
35341 have been determined if this function needs a vtable index or
35342 not. This happens for methods in derived classes that override
35343 virtual methods in base classes but are not explicitly marked as
35345 if (DECL_VINDEX (versn->decl))
35346 sorry ("Virtual function multiversioning not supported");
35348 fn_ver_vec.safe_push (versn->decl);
35351 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35352 cgraph_edge::rebuild_edges ();
35354 return resolver_decl;
35356 /* This builds the processor_model struct type defined in
35357 libgcc/config/i386/cpuinfo.c */
35360 build_processor_model_struct (void)
35362 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35364 tree field = NULL_TREE, field_chain = NULL_TREE;
35366 tree type = make_node (RECORD_TYPE);
35368 /* The first 3 fields are unsigned int. */
35369 for (i = 0; i < 3; ++i)
35371 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35372 get_identifier (field_name[i]), unsigned_type_node);
35373 if (field_chain != NULL_TREE)
35374 DECL_CHAIN (field) = field_chain;
35375 field_chain = field;
35378 /* The last field is an array of unsigned integers of size one. */
35379 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35380 get_identifier (field_name[3]),
35381 build_array_type (unsigned_type_node,
35382 build_index_type (size_one_node)));
35383 if (field_chain != NULL_TREE)
35384 DECL_CHAIN (field) = field_chain;
35385 field_chain = field;
35387 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35391 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35394 make_var_decl (tree type, const char *name)
35398 new_decl = build_decl (UNKNOWN_LOCATION,
35400 get_identifier(name),
35403 DECL_EXTERNAL (new_decl) = 1;
35404 TREE_STATIC (new_decl) = 1;
35405 TREE_PUBLIC (new_decl) = 1;
35406 DECL_INITIAL (new_decl) = 0;
35407 DECL_ARTIFICIAL (new_decl) = 0;
35408 DECL_PRESERVE_P (new_decl) = 1;
35410 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35411 assemble_variable (new_decl, 0, 0, 0);
35416 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35417 into an integer defined in libgcc/config/i386/cpuinfo.c */
35420 fold_builtin_cpu (tree fndecl, tree *args)
35423 enum ix86_builtins fn_code = (enum ix86_builtins)
35424 DECL_FUNCTION_CODE (fndecl);
35425 tree param_string_cst = NULL;
35427 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35428 enum processor_features
35451 /* These are the values for vendor types and cpu types and subtypes
35452 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35453 the corresponding start value. */
35454 enum processor_model
35464 M_INTEL_SILVERMONT,
35468 M_CPU_SUBTYPE_START,
35469 M_INTEL_COREI7_NEHALEM,
35470 M_INTEL_COREI7_WESTMERE,
35471 M_INTEL_COREI7_SANDYBRIDGE,
35472 M_AMDFAM10H_BARCELONA,
35473 M_AMDFAM10H_SHANGHAI,
35474 M_AMDFAM10H_ISTANBUL,
35475 M_AMDFAM15H_BDVER1,
35476 M_AMDFAM15H_BDVER2,
35477 M_AMDFAM15H_BDVER3,
35478 M_AMDFAM15H_BDVER4,
35479 M_INTEL_COREI7_IVYBRIDGE,
35480 M_INTEL_COREI7_HASWELL,
35481 M_INTEL_COREI7_BROADWELL
35484 static struct _arch_names_table
35486 const char *const name;
35487 const enum processor_model model;
35489 const arch_names_table[] =
35492 {"intel", M_INTEL},
35493 {"atom", M_INTEL_BONNELL},
35494 {"slm", M_INTEL_SILVERMONT},
35495 {"core2", M_INTEL_CORE2},
35496 {"corei7", M_INTEL_COREI7},
35497 {"nehalem", M_INTEL_COREI7_NEHALEM},
35498 {"westmere", M_INTEL_COREI7_WESTMERE},
35499 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35500 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35501 {"haswell", M_INTEL_COREI7_HASWELL},
35502 {"broadwell", M_INTEL_COREI7_BROADWELL},
35503 {"bonnell", M_INTEL_BONNELL},
35504 {"silvermont", M_INTEL_SILVERMONT},
35505 {"knl", M_INTEL_KNL},
35506 {"amdfam10h", M_AMDFAM10H},
35507 {"barcelona", M_AMDFAM10H_BARCELONA},
35508 {"shanghai", M_AMDFAM10H_SHANGHAI},
35509 {"istanbul", M_AMDFAM10H_ISTANBUL},
35510 {"btver1", M_AMD_BTVER1},
35511 {"amdfam15h", M_AMDFAM15H},
35512 {"bdver1", M_AMDFAM15H_BDVER1},
35513 {"bdver2", M_AMDFAM15H_BDVER2},
35514 {"bdver3", M_AMDFAM15H_BDVER3},
35515 {"bdver4", M_AMDFAM15H_BDVER4},
35516 {"btver2", M_AMD_BTVER2},
35519 static struct _isa_names_table
35521 const char *const name;
35522 const enum processor_features feature;
35524 const isa_names_table[] =
35528 {"popcnt", F_POPCNT},
35532 {"ssse3", F_SSSE3},
35533 {"sse4a", F_SSE4_A},
35534 {"sse4.1", F_SSE4_1},
35535 {"sse4.2", F_SSE4_2},
35541 {"avx512f",F_AVX512F},
35546 tree __processor_model_type = build_processor_model_struct ();
35547 tree __cpu_model_var = make_var_decl (__processor_model_type,
35551 varpool_node::add (__cpu_model_var);
35553 gcc_assert ((args != NULL) && (*args != NULL));
35555 param_string_cst = *args;
35556 while (param_string_cst
35557 && TREE_CODE (param_string_cst) != STRING_CST)
35559 /* *args must be a expr that can contain other EXPRS leading to a
35561 if (!EXPR_P (param_string_cst))
35563 error ("Parameter to builtin must be a string constant or literal");
35564 return integer_zero_node;
35566 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35569 gcc_assert (param_string_cst);
35571 if (fn_code == IX86_BUILTIN_CPU_IS)
35577 unsigned int field_val = 0;
35578 unsigned int NUM_ARCH_NAMES
35579 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35581 for (i = 0; i < NUM_ARCH_NAMES; i++)
35582 if (strcmp (arch_names_table[i].name,
35583 TREE_STRING_POINTER (param_string_cst)) == 0)
35586 if (i == NUM_ARCH_NAMES)
35588 error ("Parameter to builtin not valid: %s",
35589 TREE_STRING_POINTER (param_string_cst));
35590 return integer_zero_node;
35593 field = TYPE_FIELDS (__processor_model_type);
35594 field_val = arch_names_table[i].model;
35596 /* CPU types are stored in the next field. */
35597 if (field_val > M_CPU_TYPE_START
35598 && field_val < M_CPU_SUBTYPE_START)
35600 field = DECL_CHAIN (field);
35601 field_val -= M_CPU_TYPE_START;
35604 /* CPU subtypes are stored in the next field. */
35605 if (field_val > M_CPU_SUBTYPE_START)
35607 field = DECL_CHAIN ( DECL_CHAIN (field));
35608 field_val -= M_CPU_SUBTYPE_START;
35611 /* Get the appropriate field in __cpu_model. */
35612 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35615 /* Check the value. */
35616 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35617 build_int_cstu (unsigned_type_node, field_val));
35618 return build1 (CONVERT_EXPR, integer_type_node, final);
35620 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35627 unsigned int field_val = 0;
35628 unsigned int NUM_ISA_NAMES
35629 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35631 for (i = 0; i < NUM_ISA_NAMES; i++)
35632 if (strcmp (isa_names_table[i].name,
35633 TREE_STRING_POINTER (param_string_cst)) == 0)
35636 if (i == NUM_ISA_NAMES)
35638 error ("Parameter to builtin not valid: %s",
35639 TREE_STRING_POINTER (param_string_cst));
35640 return integer_zero_node;
35643 field = TYPE_FIELDS (__processor_model_type);
35644 /* Get the last field, which is __cpu_features. */
35645 while (DECL_CHAIN (field))
35646 field = DECL_CHAIN (field);
35648 /* Get the appropriate field: __cpu_model.__cpu_features */
35649 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35652 /* Access the 0th element of __cpu_features array. */
35653 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35654 integer_zero_node, NULL_TREE, NULL_TREE);
35656 field_val = (1 << isa_names_table[i].feature);
35657 /* Return __cpu_model.__cpu_features[0] & field_val */
35658 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35659 build_int_cstu (unsigned_type_node, field_val));
35660 return build1 (CONVERT_EXPR, integer_type_node, final);
35662 gcc_unreachable ();
35666 ix86_fold_builtin (tree fndecl, int n_args,
35667 tree *args, bool ignore ATTRIBUTE_UNUSED)
35669 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35671 enum ix86_builtins fn_code = (enum ix86_builtins)
35672 DECL_FUNCTION_CODE (fndecl);
35673 if (fn_code == IX86_BUILTIN_CPU_IS
35674 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35676 gcc_assert (n_args == 1);
35677 return fold_builtin_cpu (fndecl, args);
35681 #ifdef SUBTARGET_FOLD_BUILTIN
35682 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35688 /* Make builtins to detect cpu type and features supported. NAME is
35689 the builtin name, CODE is the builtin code, and FTYPE is the function
35690 type of the builtin. */
35693 make_cpu_type_builtin (const char* name, int code,
35694 enum ix86_builtin_func_type ftype, bool is_const)
35699 type = ix86_get_builtin_func_type (ftype);
35700 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35702 gcc_assert (decl != NULL_TREE);
35703 ix86_builtins[(int) code] = decl;
35704 TREE_READONLY (decl) = is_const;
35707 /* Make builtins to get CPU type and features supported. The created
35710 __builtin_cpu_init (), to detect cpu type and features,
35711 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35712 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35716 ix86_init_platform_type_builtins (void)
35718 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35719 INT_FTYPE_VOID, false);
35720 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35721 INT_FTYPE_PCCHAR, true);
35722 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35723 INT_FTYPE_PCCHAR, true);
35726 /* Internal method for ix86_init_builtins. */
35729 ix86_init_builtins_va_builtins_abi (void)
35731 tree ms_va_ref, sysv_va_ref;
35732 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35733 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35734 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35735 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35739 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35740 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35741 ms_va_ref = build_reference_type (ms_va_list_type_node);
35743 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35746 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35747 fnvoid_va_start_ms =
35748 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35749 fnvoid_va_end_sysv =
35750 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35751 fnvoid_va_start_sysv =
35752 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35754 fnvoid_va_copy_ms =
35755 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35757 fnvoid_va_copy_sysv =
35758 build_function_type_list (void_type_node, sysv_va_ref,
35759 sysv_va_ref, NULL_TREE);
35761 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35762 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35763 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35764 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35765 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35766 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35767 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35768 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35769 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35770 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35771 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35772 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35776 ix86_init_builtin_types (void)
35778 tree float128_type_node, float80_type_node;
35780 /* The __float80 type. */
35781 float80_type_node = long_double_type_node;
35782 if (TYPE_MODE (float80_type_node) != XFmode)
35784 /* The __float80 type. */
35785 float80_type_node = make_node (REAL_TYPE);
35787 TYPE_PRECISION (float80_type_node) = 80;
35788 layout_type (float80_type_node);
35790 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35792 /* The __float128 type. */
35793 float128_type_node = make_node (REAL_TYPE);
35794 TYPE_PRECISION (float128_type_node) = 128;
35795 layout_type (float128_type_node);
35796 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35798 /* This macro is built by i386-builtin-types.awk. */
35799 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35803 ix86_init_builtins (void)
35807 ix86_init_builtin_types ();
35809 /* Builtins to get CPU type and features. */
35810 ix86_init_platform_type_builtins ();
35812 /* TFmode support builtins. */
35813 def_builtin_const (0, "__builtin_infq",
35814 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35815 def_builtin_const (0, "__builtin_huge_valq",
35816 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35818 /* We will expand them to normal call if SSE isn't available since
35819 they are used by libgcc. */
35820 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35821 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35822 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35823 TREE_READONLY (t) = 1;
35824 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35826 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35827 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35828 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35829 TREE_READONLY (t) = 1;
35830 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35832 ix86_init_tm_builtins ();
35833 ix86_init_mmx_sse_builtins ();
35834 ix86_init_mpx_builtins ();
35837 ix86_init_builtins_va_builtins_abi ();
35839 #ifdef SUBTARGET_INIT_BUILTINS
35840 SUBTARGET_INIT_BUILTINS;
35844 /* Return the ix86 builtin for CODE. */
35847 ix86_builtin_decl (unsigned code, bool)
35849 if (code >= IX86_BUILTIN_MAX)
35850 return error_mark_node;
35852 return ix86_builtins[code];
35855 /* Errors in the source file can cause expand_expr to return const0_rtx
35856 where we expect a vector. To avoid crashing, use one of the vector
35857 clear instructions. */
35859 safe_vector_operand (rtx x, machine_mode mode)
35861 if (x == const0_rtx)
35862 x = CONST0_RTX (mode);
35866 /* Fixup modeless constants to fit required mode. */
35868 fixup_modeless_constant (rtx x, machine_mode mode)
35870 if (GET_MODE (x) == VOIDmode)
35871 x = convert_to_mode (mode, x, 1);
35875 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35878 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35881 tree arg0 = CALL_EXPR_ARG (exp, 0);
35882 tree arg1 = CALL_EXPR_ARG (exp, 1);
35883 rtx op0 = expand_normal (arg0);
35884 rtx op1 = expand_normal (arg1);
35885 machine_mode tmode = insn_data[icode].operand[0].mode;
35886 machine_mode mode0 = insn_data[icode].operand[1].mode;
35887 machine_mode mode1 = insn_data[icode].operand[2].mode;
35889 if (VECTOR_MODE_P (mode0))
35890 op0 = safe_vector_operand (op0, mode0);
35891 if (VECTOR_MODE_P (mode1))
35892 op1 = safe_vector_operand (op1, mode1);
35894 if (optimize || !target
35895 || GET_MODE (target) != tmode
35896 || !insn_data[icode].operand[0].predicate (target, tmode))
35897 target = gen_reg_rtx (tmode);
35899 if (GET_MODE (op1) == SImode && mode1 == TImode)
35901 rtx x = gen_reg_rtx (V4SImode);
35902 emit_insn (gen_sse2_loadd (x, op1));
35903 op1 = gen_lowpart (TImode, x);
35906 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35907 op0 = copy_to_mode_reg (mode0, op0);
35908 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35909 op1 = copy_to_mode_reg (mode1, op1);
35911 pat = GEN_FCN (icode) (target, op0, op1);
35920 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35923 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35924 enum ix86_builtin_func_type m_type,
35925 enum rtx_code sub_code)
35930 bool comparison_p = false;
35932 bool last_arg_constant = false;
35933 int num_memory = 0;
35939 machine_mode tmode = insn_data[icode].operand[0].mode;
35943 case MULTI_ARG_4_DF2_DI_I:
35944 case MULTI_ARG_4_DF2_DI_I1:
35945 case MULTI_ARG_4_SF2_SI_I:
35946 case MULTI_ARG_4_SF2_SI_I1:
35948 last_arg_constant = true;
35951 case MULTI_ARG_3_SF:
35952 case MULTI_ARG_3_DF:
35953 case MULTI_ARG_3_SF2:
35954 case MULTI_ARG_3_DF2:
35955 case MULTI_ARG_3_DI:
35956 case MULTI_ARG_3_SI:
35957 case MULTI_ARG_3_SI_DI:
35958 case MULTI_ARG_3_HI:
35959 case MULTI_ARG_3_HI_SI:
35960 case MULTI_ARG_3_QI:
35961 case MULTI_ARG_3_DI2:
35962 case MULTI_ARG_3_SI2:
35963 case MULTI_ARG_3_HI2:
35964 case MULTI_ARG_3_QI2:
35968 case MULTI_ARG_2_SF:
35969 case MULTI_ARG_2_DF:
35970 case MULTI_ARG_2_DI:
35971 case MULTI_ARG_2_SI:
35972 case MULTI_ARG_2_HI:
35973 case MULTI_ARG_2_QI:
35977 case MULTI_ARG_2_DI_IMM:
35978 case MULTI_ARG_2_SI_IMM:
35979 case MULTI_ARG_2_HI_IMM:
35980 case MULTI_ARG_2_QI_IMM:
35982 last_arg_constant = true;
35985 case MULTI_ARG_1_SF:
35986 case MULTI_ARG_1_DF:
35987 case MULTI_ARG_1_SF2:
35988 case MULTI_ARG_1_DF2:
35989 case MULTI_ARG_1_DI:
35990 case MULTI_ARG_1_SI:
35991 case MULTI_ARG_1_HI:
35992 case MULTI_ARG_1_QI:
35993 case MULTI_ARG_1_SI_DI:
35994 case MULTI_ARG_1_HI_DI:
35995 case MULTI_ARG_1_HI_SI:
35996 case MULTI_ARG_1_QI_DI:
35997 case MULTI_ARG_1_QI_SI:
35998 case MULTI_ARG_1_QI_HI:
36002 case MULTI_ARG_2_DI_CMP:
36003 case MULTI_ARG_2_SI_CMP:
36004 case MULTI_ARG_2_HI_CMP:
36005 case MULTI_ARG_2_QI_CMP:
36007 comparison_p = true;
36010 case MULTI_ARG_2_SF_TF:
36011 case MULTI_ARG_2_DF_TF:
36012 case MULTI_ARG_2_DI_TF:
36013 case MULTI_ARG_2_SI_TF:
36014 case MULTI_ARG_2_HI_TF:
36015 case MULTI_ARG_2_QI_TF:
36021 gcc_unreachable ();
36024 if (optimize || !target
36025 || GET_MODE (target) != tmode
36026 || !insn_data[icode].operand[0].predicate (target, tmode))
36027 target = gen_reg_rtx (tmode);
36029 gcc_assert (nargs <= 4);
36031 for (i = 0; i < nargs; i++)
36033 tree arg = CALL_EXPR_ARG (exp, i);
36034 rtx op = expand_normal (arg);
36035 int adjust = (comparison_p) ? 1 : 0;
36036 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36038 if (last_arg_constant && i == nargs - 1)
36040 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36042 enum insn_code new_icode = icode;
36045 case CODE_FOR_xop_vpermil2v2df3:
36046 case CODE_FOR_xop_vpermil2v4sf3:
36047 case CODE_FOR_xop_vpermil2v4df3:
36048 case CODE_FOR_xop_vpermil2v8sf3:
36049 error ("the last argument must be a 2-bit immediate");
36050 return gen_reg_rtx (tmode);
36051 case CODE_FOR_xop_rotlv2di3:
36052 new_icode = CODE_FOR_rotlv2di3;
36054 case CODE_FOR_xop_rotlv4si3:
36055 new_icode = CODE_FOR_rotlv4si3;
36057 case CODE_FOR_xop_rotlv8hi3:
36058 new_icode = CODE_FOR_rotlv8hi3;
36060 case CODE_FOR_xop_rotlv16qi3:
36061 new_icode = CODE_FOR_rotlv16qi3;
36063 if (CONST_INT_P (op))
36065 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36066 op = GEN_INT (INTVAL (op) & mask);
36067 gcc_checking_assert
36068 (insn_data[icode].operand[i + 1].predicate (op, mode));
36072 gcc_checking_assert
36074 && insn_data[new_icode].operand[0].mode == tmode
36075 && insn_data[new_icode].operand[1].mode == tmode
36076 && insn_data[new_icode].operand[2].mode == mode
36077 && insn_data[new_icode].operand[0].predicate
36078 == insn_data[icode].operand[0].predicate
36079 && insn_data[new_icode].operand[1].predicate
36080 == insn_data[icode].operand[1].predicate);
36086 gcc_unreachable ();
36093 if (VECTOR_MODE_P (mode))
36094 op = safe_vector_operand (op, mode);
36096 /* If we aren't optimizing, only allow one memory operand to be
36098 if (memory_operand (op, mode))
36101 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36104 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36106 op = force_reg (mode, op);
36110 args[i].mode = mode;
36116 pat = GEN_FCN (icode) (target, args[0].op);
36121 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36122 GEN_INT ((int)sub_code));
36123 else if (! comparison_p)
36124 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36127 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36131 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36136 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36140 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36144 gcc_unreachable ();
36154 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36155 insns with vec_merge. */
36158 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36162 tree arg0 = CALL_EXPR_ARG (exp, 0);
36163 rtx op1, op0 = expand_normal (arg0);
36164 machine_mode tmode = insn_data[icode].operand[0].mode;
36165 machine_mode mode0 = insn_data[icode].operand[1].mode;
36167 if (optimize || !target
36168 || GET_MODE (target) != tmode
36169 || !insn_data[icode].operand[0].predicate (target, tmode))
36170 target = gen_reg_rtx (tmode);
36172 if (VECTOR_MODE_P (mode0))
36173 op0 = safe_vector_operand (op0, mode0);
36175 if ((optimize && !register_operand (op0, mode0))
36176 || !insn_data[icode].operand[1].predicate (op0, mode0))
36177 op0 = copy_to_mode_reg (mode0, op0);
36180 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36181 op1 = copy_to_mode_reg (mode0, op1);
36183 pat = GEN_FCN (icode) (target, op0, op1);
36190 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36193 ix86_expand_sse_compare (const struct builtin_description *d,
36194 tree exp, rtx target, bool swap)
36197 tree arg0 = CALL_EXPR_ARG (exp, 0);
36198 tree arg1 = CALL_EXPR_ARG (exp, 1);
36199 rtx op0 = expand_normal (arg0);
36200 rtx op1 = expand_normal (arg1);
36202 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36203 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36204 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36205 enum rtx_code comparison = d->comparison;
36207 if (VECTOR_MODE_P (mode0))
36208 op0 = safe_vector_operand (op0, mode0);
36209 if (VECTOR_MODE_P (mode1))
36210 op1 = safe_vector_operand (op1, mode1);
36212 /* Swap operands if we have a comparison that isn't available in
36215 std::swap (op0, op1);
36217 if (optimize || !target
36218 || GET_MODE (target) != tmode
36219 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36220 target = gen_reg_rtx (tmode);
36222 if ((optimize && !register_operand (op0, mode0))
36223 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36224 op0 = copy_to_mode_reg (mode0, op0);
36225 if ((optimize && !register_operand (op1, mode1))
36226 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36227 op1 = copy_to_mode_reg (mode1, op1);
36229 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36230 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36237 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36240 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36244 tree arg0 = CALL_EXPR_ARG (exp, 0);
36245 tree arg1 = CALL_EXPR_ARG (exp, 1);
36246 rtx op0 = expand_normal (arg0);
36247 rtx op1 = expand_normal (arg1);
36248 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36249 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36250 enum rtx_code comparison = d->comparison;
36252 if (VECTOR_MODE_P (mode0))
36253 op0 = safe_vector_operand (op0, mode0);
36254 if (VECTOR_MODE_P (mode1))
36255 op1 = safe_vector_operand (op1, mode1);
36257 /* Swap operands if we have a comparison that isn't available in
36259 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36260 std::swap (op0, op1);
36262 target = gen_reg_rtx (SImode);
36263 emit_move_insn (target, const0_rtx);
36264 target = gen_rtx_SUBREG (QImode, target, 0);
36266 if ((optimize && !register_operand (op0, mode0))
36267 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36268 op0 = copy_to_mode_reg (mode0, op0);
36269 if ((optimize && !register_operand (op1, mode1))
36270 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36271 op1 = copy_to_mode_reg (mode1, op1);
36273 pat = GEN_FCN (d->icode) (op0, op1);
36277 emit_insn (gen_rtx_SET (VOIDmode,
36278 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36279 gen_rtx_fmt_ee (comparison, QImode,
36283 return SUBREG_REG (target);
36286 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36289 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36293 tree arg0 = CALL_EXPR_ARG (exp, 0);
36294 rtx op1, op0 = expand_normal (arg0);
36295 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36296 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36298 if (optimize || target == 0
36299 || GET_MODE (target) != tmode
36300 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36301 target = gen_reg_rtx (tmode);
36303 if (VECTOR_MODE_P (mode0))
36304 op0 = safe_vector_operand (op0, mode0);
36306 if ((optimize && !register_operand (op0, mode0))
36307 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36308 op0 = copy_to_mode_reg (mode0, op0);
36310 op1 = GEN_INT (d->comparison);
36312 pat = GEN_FCN (d->icode) (target, op0, op1);
36320 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36321 tree exp, rtx target)
36324 tree arg0 = CALL_EXPR_ARG (exp, 0);
36325 tree arg1 = CALL_EXPR_ARG (exp, 1);
36326 rtx op0 = expand_normal (arg0);
36327 rtx op1 = expand_normal (arg1);
36329 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36330 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36331 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36333 if (optimize || target == 0
36334 || GET_MODE (target) != tmode
36335 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36336 target = gen_reg_rtx (tmode);
36338 op0 = safe_vector_operand (op0, mode0);
36339 op1 = safe_vector_operand (op1, mode1);
36341 if ((optimize && !register_operand (op0, mode0))
36342 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36343 op0 = copy_to_mode_reg (mode0, op0);
36344 if ((optimize && !register_operand (op1, mode1))
36345 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36346 op1 = copy_to_mode_reg (mode1, op1);
36348 op2 = GEN_INT (d->comparison);
36350 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36357 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36360 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36364 tree arg0 = CALL_EXPR_ARG (exp, 0);
36365 tree arg1 = CALL_EXPR_ARG (exp, 1);
36366 rtx op0 = expand_normal (arg0);
36367 rtx op1 = expand_normal (arg1);
36368 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36369 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36370 enum rtx_code comparison = d->comparison;
36372 if (VECTOR_MODE_P (mode0))
36373 op0 = safe_vector_operand (op0, mode0);
36374 if (VECTOR_MODE_P (mode1))
36375 op1 = safe_vector_operand (op1, mode1);
36377 target = gen_reg_rtx (SImode);
36378 emit_move_insn (target, const0_rtx);
36379 target = gen_rtx_SUBREG (QImode, target, 0);
36381 if ((optimize && !register_operand (op0, mode0))
36382 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36383 op0 = copy_to_mode_reg (mode0, op0);
36384 if ((optimize && !register_operand (op1, mode1))
36385 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36386 op1 = copy_to_mode_reg (mode1, op1);
36388 pat = GEN_FCN (d->icode) (op0, op1);
36392 emit_insn (gen_rtx_SET (VOIDmode,
36393 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36394 gen_rtx_fmt_ee (comparison, QImode,
36398 return SUBREG_REG (target);
36401 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36404 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36405 tree exp, rtx target)
36408 tree arg0 = CALL_EXPR_ARG (exp, 0);
36409 tree arg1 = CALL_EXPR_ARG (exp, 1);
36410 tree arg2 = CALL_EXPR_ARG (exp, 2);
36411 tree arg3 = CALL_EXPR_ARG (exp, 3);
36412 tree arg4 = CALL_EXPR_ARG (exp, 4);
36413 rtx scratch0, scratch1;
36414 rtx op0 = expand_normal (arg0);
36415 rtx op1 = expand_normal (arg1);
36416 rtx op2 = expand_normal (arg2);
36417 rtx op3 = expand_normal (arg3);
36418 rtx op4 = expand_normal (arg4);
36419 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36421 tmode0 = insn_data[d->icode].operand[0].mode;
36422 tmode1 = insn_data[d->icode].operand[1].mode;
36423 modev2 = insn_data[d->icode].operand[2].mode;
36424 modei3 = insn_data[d->icode].operand[3].mode;
36425 modev4 = insn_data[d->icode].operand[4].mode;
36426 modei5 = insn_data[d->icode].operand[5].mode;
36427 modeimm = insn_data[d->icode].operand[6].mode;
36429 if (VECTOR_MODE_P (modev2))
36430 op0 = safe_vector_operand (op0, modev2);
36431 if (VECTOR_MODE_P (modev4))
36432 op2 = safe_vector_operand (op2, modev4);
36434 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36435 op0 = copy_to_mode_reg (modev2, op0);
36436 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36437 op1 = copy_to_mode_reg (modei3, op1);
36438 if ((optimize && !register_operand (op2, modev4))
36439 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36440 op2 = copy_to_mode_reg (modev4, op2);
36441 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36442 op3 = copy_to_mode_reg (modei5, op3);
36444 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36446 error ("the fifth argument must be an 8-bit immediate");
36450 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36452 if (optimize || !target
36453 || GET_MODE (target) != tmode0
36454 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36455 target = gen_reg_rtx (tmode0);
36457 scratch1 = gen_reg_rtx (tmode1);
36459 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36461 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36463 if (optimize || !target
36464 || GET_MODE (target) != tmode1
36465 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36466 target = gen_reg_rtx (tmode1);
36468 scratch0 = gen_reg_rtx (tmode0);
36470 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36474 gcc_assert (d->flag);
36476 scratch0 = gen_reg_rtx (tmode0);
36477 scratch1 = gen_reg_rtx (tmode1);
36479 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36489 target = gen_reg_rtx (SImode);
36490 emit_move_insn (target, const0_rtx);
36491 target = gen_rtx_SUBREG (QImode, target, 0);
36494 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36495 gen_rtx_fmt_ee (EQ, QImode,
36496 gen_rtx_REG ((machine_mode) d->flag,
36499 return SUBREG_REG (target);
36506 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36509 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36510 tree exp, rtx target)
36513 tree arg0 = CALL_EXPR_ARG (exp, 0);
36514 tree arg1 = CALL_EXPR_ARG (exp, 1);
36515 tree arg2 = CALL_EXPR_ARG (exp, 2);
36516 rtx scratch0, scratch1;
36517 rtx op0 = expand_normal (arg0);
36518 rtx op1 = expand_normal (arg1);
36519 rtx op2 = expand_normal (arg2);
36520 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36522 tmode0 = insn_data[d->icode].operand[0].mode;
36523 tmode1 = insn_data[d->icode].operand[1].mode;
36524 modev2 = insn_data[d->icode].operand[2].mode;
36525 modev3 = insn_data[d->icode].operand[3].mode;
36526 modeimm = insn_data[d->icode].operand[4].mode;
36528 if (VECTOR_MODE_P (modev2))
36529 op0 = safe_vector_operand (op0, modev2);
36530 if (VECTOR_MODE_P (modev3))
36531 op1 = safe_vector_operand (op1, modev3);
36533 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36534 op0 = copy_to_mode_reg (modev2, op0);
36535 if ((optimize && !register_operand (op1, modev3))
36536 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36537 op1 = copy_to_mode_reg (modev3, op1);
36539 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36541 error ("the third argument must be an 8-bit immediate");
36545 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36547 if (optimize || !target
36548 || GET_MODE (target) != tmode0
36549 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36550 target = gen_reg_rtx (tmode0);
36552 scratch1 = gen_reg_rtx (tmode1);
36554 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36556 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36558 if (optimize || !target
36559 || GET_MODE (target) != tmode1
36560 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36561 target = gen_reg_rtx (tmode1);
36563 scratch0 = gen_reg_rtx (tmode0);
36565 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36569 gcc_assert (d->flag);
36571 scratch0 = gen_reg_rtx (tmode0);
36572 scratch1 = gen_reg_rtx (tmode1);
36574 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36584 target = gen_reg_rtx (SImode);
36585 emit_move_insn (target, const0_rtx);
36586 target = gen_rtx_SUBREG (QImode, target, 0);
36589 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36590 gen_rtx_fmt_ee (EQ, QImode,
36591 gen_rtx_REG ((machine_mode) d->flag,
36594 return SUBREG_REG (target);
36600 /* Subroutine of ix86_expand_builtin to take care of insns with
36601 variable number of operands. */
36604 ix86_expand_args_builtin (const struct builtin_description *d,
36605 tree exp, rtx target)
36607 rtx pat, real_target;
36608 unsigned int i, nargs;
36609 unsigned int nargs_constant = 0;
36610 unsigned int mask_pos = 0;
36611 int num_memory = 0;
36617 bool last_arg_count = false;
36618 enum insn_code icode = d->icode;
36619 const struct insn_data_d *insn_p = &insn_data[icode];
36620 machine_mode tmode = insn_p->operand[0].mode;
36621 machine_mode rmode = VOIDmode;
36623 enum rtx_code comparison = d->comparison;
36625 switch ((enum ix86_builtin_func_type) d->flag)
36627 case V2DF_FTYPE_V2DF_ROUND:
36628 case V4DF_FTYPE_V4DF_ROUND:
36629 case V4SF_FTYPE_V4SF_ROUND:
36630 case V8SF_FTYPE_V8SF_ROUND:
36631 case V4SI_FTYPE_V4SF_ROUND:
36632 case V8SI_FTYPE_V8SF_ROUND:
36633 return ix86_expand_sse_round (d, exp, target);
36634 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36635 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36636 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36637 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36638 case INT_FTYPE_V8SF_V8SF_PTEST:
36639 case INT_FTYPE_V4DI_V4DI_PTEST:
36640 case INT_FTYPE_V4DF_V4DF_PTEST:
36641 case INT_FTYPE_V4SF_V4SF_PTEST:
36642 case INT_FTYPE_V2DI_V2DI_PTEST:
36643 case INT_FTYPE_V2DF_V2DF_PTEST:
36644 return ix86_expand_sse_ptest (d, exp, target);
36645 case FLOAT128_FTYPE_FLOAT128:
36646 case FLOAT_FTYPE_FLOAT:
36647 case INT_FTYPE_INT:
36648 case UINT64_FTYPE_INT:
36649 case UINT16_FTYPE_UINT16:
36650 case INT64_FTYPE_INT64:
36651 case INT64_FTYPE_V4SF:
36652 case INT64_FTYPE_V2DF:
36653 case INT_FTYPE_V16QI:
36654 case INT_FTYPE_V8QI:
36655 case INT_FTYPE_V8SF:
36656 case INT_FTYPE_V4DF:
36657 case INT_FTYPE_V4SF:
36658 case INT_FTYPE_V2DF:
36659 case INT_FTYPE_V32QI:
36660 case V16QI_FTYPE_V16QI:
36661 case V8SI_FTYPE_V8SF:
36662 case V8SI_FTYPE_V4SI:
36663 case V8HI_FTYPE_V8HI:
36664 case V8HI_FTYPE_V16QI:
36665 case V8QI_FTYPE_V8QI:
36666 case V8SF_FTYPE_V8SF:
36667 case V8SF_FTYPE_V8SI:
36668 case V8SF_FTYPE_V4SF:
36669 case V8SF_FTYPE_V8HI:
36670 case V4SI_FTYPE_V4SI:
36671 case V4SI_FTYPE_V16QI:
36672 case V4SI_FTYPE_V4SF:
36673 case V4SI_FTYPE_V8SI:
36674 case V4SI_FTYPE_V8HI:
36675 case V4SI_FTYPE_V4DF:
36676 case V4SI_FTYPE_V2DF:
36677 case V4HI_FTYPE_V4HI:
36678 case V4DF_FTYPE_V4DF:
36679 case V4DF_FTYPE_V4SI:
36680 case V4DF_FTYPE_V4SF:
36681 case V4DF_FTYPE_V2DF:
36682 case V4SF_FTYPE_V4SF:
36683 case V4SF_FTYPE_V4SI:
36684 case V4SF_FTYPE_V8SF:
36685 case V4SF_FTYPE_V4DF:
36686 case V4SF_FTYPE_V8HI:
36687 case V4SF_FTYPE_V2DF:
36688 case V2DI_FTYPE_V2DI:
36689 case V2DI_FTYPE_V16QI:
36690 case V2DI_FTYPE_V8HI:
36691 case V2DI_FTYPE_V4SI:
36692 case V2DF_FTYPE_V2DF:
36693 case V2DF_FTYPE_V4SI:
36694 case V2DF_FTYPE_V4DF:
36695 case V2DF_FTYPE_V4SF:
36696 case V2DF_FTYPE_V2SI:
36697 case V2SI_FTYPE_V2SI:
36698 case V2SI_FTYPE_V4SF:
36699 case V2SI_FTYPE_V2SF:
36700 case V2SI_FTYPE_V2DF:
36701 case V2SF_FTYPE_V2SF:
36702 case V2SF_FTYPE_V2SI:
36703 case V32QI_FTYPE_V32QI:
36704 case V32QI_FTYPE_V16QI:
36705 case V16HI_FTYPE_V16HI:
36706 case V16HI_FTYPE_V8HI:
36707 case V8SI_FTYPE_V8SI:
36708 case V16HI_FTYPE_V16QI:
36709 case V8SI_FTYPE_V16QI:
36710 case V4DI_FTYPE_V16QI:
36711 case V8SI_FTYPE_V8HI:
36712 case V4DI_FTYPE_V8HI:
36713 case V4DI_FTYPE_V4SI:
36714 case V4DI_FTYPE_V2DI:
36716 case HI_FTYPE_V16QI:
36717 case SI_FTYPE_V32QI:
36718 case DI_FTYPE_V64QI:
36719 case V16QI_FTYPE_HI:
36720 case V32QI_FTYPE_SI:
36721 case V64QI_FTYPE_DI:
36722 case V8HI_FTYPE_QI:
36723 case V16HI_FTYPE_HI:
36724 case V32HI_FTYPE_SI:
36725 case V4SI_FTYPE_QI:
36726 case V8SI_FTYPE_QI:
36727 case V4SI_FTYPE_HI:
36728 case V8SI_FTYPE_HI:
36729 case QI_FTYPE_V8HI:
36730 case HI_FTYPE_V16HI:
36731 case SI_FTYPE_V32HI:
36732 case QI_FTYPE_V4SI:
36733 case QI_FTYPE_V8SI:
36734 case HI_FTYPE_V16SI:
36735 case QI_FTYPE_V2DI:
36736 case QI_FTYPE_V4DI:
36737 case QI_FTYPE_V8DI:
36738 case UINT_FTYPE_V2DF:
36739 case UINT_FTYPE_V4SF:
36740 case UINT64_FTYPE_V2DF:
36741 case UINT64_FTYPE_V4SF:
36742 case V16QI_FTYPE_V8DI:
36743 case V16HI_FTYPE_V16SI:
36744 case V16SI_FTYPE_HI:
36745 case V2DI_FTYPE_QI:
36746 case V4DI_FTYPE_QI:
36747 case V16SI_FTYPE_V16SI:
36748 case V16SI_FTYPE_INT:
36749 case V16SF_FTYPE_FLOAT:
36750 case V16SF_FTYPE_V8SF:
36751 case V16SI_FTYPE_V8SI:
36752 case V16SF_FTYPE_V4SF:
36753 case V16SI_FTYPE_V4SI:
36754 case V16SF_FTYPE_V16SF:
36755 case V8HI_FTYPE_V8DI:
36756 case V8UHI_FTYPE_V8UHI:
36757 case V8SI_FTYPE_V8DI:
36758 case V8SF_FTYPE_V8DF:
36759 case V8DI_FTYPE_QI:
36760 case V8DI_FTYPE_INT64:
36761 case V8DI_FTYPE_V4DI:
36762 case V8DI_FTYPE_V8DI:
36763 case V8DF_FTYPE_DOUBLE:
36764 case V8DF_FTYPE_V4DF:
36765 case V8DF_FTYPE_V2DF:
36766 case V8DF_FTYPE_V8DF:
36767 case V8DF_FTYPE_V8SI:
36770 case V4SF_FTYPE_V4SF_VEC_MERGE:
36771 case V2DF_FTYPE_V2DF_VEC_MERGE:
36772 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36773 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36774 case V16QI_FTYPE_V16QI_V16QI:
36775 case V16QI_FTYPE_V8HI_V8HI:
36776 case V16SI_FTYPE_V16SI_V16SI:
36777 case V16SF_FTYPE_V16SF_V16SF:
36778 case V16SF_FTYPE_V16SF_V16SI:
36779 case V8QI_FTYPE_V8QI_V8QI:
36780 case V8QI_FTYPE_V4HI_V4HI:
36781 case V8HI_FTYPE_V8HI_V8HI:
36782 case V8HI_FTYPE_V16QI_V16QI:
36783 case V8HI_FTYPE_V4SI_V4SI:
36784 case V8SF_FTYPE_V8SF_V8SF:
36785 case V8SF_FTYPE_V8SF_V8SI:
36786 case V8DI_FTYPE_V8DI_V8DI:
36787 case V8DF_FTYPE_V8DF_V8DF:
36788 case V8DF_FTYPE_V8DF_V8DI:
36789 case V4SI_FTYPE_V4SI_V4SI:
36790 case V4SI_FTYPE_V8HI_V8HI:
36791 case V4SI_FTYPE_V4SF_V4SF:
36792 case V4SI_FTYPE_V2DF_V2DF:
36793 case V4HI_FTYPE_V4HI_V4HI:
36794 case V4HI_FTYPE_V8QI_V8QI:
36795 case V4HI_FTYPE_V2SI_V2SI:
36796 case V4DF_FTYPE_V4DF_V4DF:
36797 case V4DF_FTYPE_V4DF_V4DI:
36798 case V4SF_FTYPE_V4SF_V4SF:
36799 case V4SF_FTYPE_V4SF_V4SI:
36800 case V4SF_FTYPE_V4SF_V2SI:
36801 case V4SF_FTYPE_V4SF_V2DF:
36802 case V4SF_FTYPE_V4SF_UINT:
36803 case V4SF_FTYPE_V4SF_UINT64:
36804 case V4SF_FTYPE_V4SF_DI:
36805 case V4SF_FTYPE_V4SF_SI:
36806 case V2DI_FTYPE_V2DI_V2DI:
36807 case V2DI_FTYPE_V16QI_V16QI:
36808 case V2DI_FTYPE_V4SI_V4SI:
36809 case V2UDI_FTYPE_V4USI_V4USI:
36810 case V2DI_FTYPE_V2DI_V16QI:
36811 case V2DI_FTYPE_V2DF_V2DF:
36812 case V2SI_FTYPE_V2SI_V2SI:
36813 case V2SI_FTYPE_V4HI_V4HI:
36814 case V2SI_FTYPE_V2SF_V2SF:
36815 case V2DF_FTYPE_V2DF_V2DF:
36816 case V2DF_FTYPE_V2DF_V4SF:
36817 case V2DF_FTYPE_V2DF_V2DI:
36818 case V2DF_FTYPE_V2DF_DI:
36819 case V2DF_FTYPE_V2DF_SI:
36820 case V2DF_FTYPE_V2DF_UINT:
36821 case V2DF_FTYPE_V2DF_UINT64:
36822 case V2SF_FTYPE_V2SF_V2SF:
36823 case V1DI_FTYPE_V1DI_V1DI:
36824 case V1DI_FTYPE_V8QI_V8QI:
36825 case V1DI_FTYPE_V2SI_V2SI:
36826 case V32QI_FTYPE_V16HI_V16HI:
36827 case V16HI_FTYPE_V8SI_V8SI:
36828 case V32QI_FTYPE_V32QI_V32QI:
36829 case V16HI_FTYPE_V32QI_V32QI:
36830 case V16HI_FTYPE_V16HI_V16HI:
36831 case V8SI_FTYPE_V4DF_V4DF:
36832 case V8SI_FTYPE_V8SI_V8SI:
36833 case V8SI_FTYPE_V16HI_V16HI:
36834 case V4DI_FTYPE_V4DI_V4DI:
36835 case V4DI_FTYPE_V8SI_V8SI:
36836 case V4UDI_FTYPE_V8USI_V8USI:
36837 case QI_FTYPE_V8DI_V8DI:
36838 case V8DI_FTYPE_V64QI_V64QI:
36839 case HI_FTYPE_V16SI_V16SI:
36840 if (comparison == UNKNOWN)
36841 return ix86_expand_binop_builtin (icode, exp, target);
36844 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36845 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36846 gcc_assert (comparison != UNKNOWN);
36850 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36851 case V16HI_FTYPE_V16HI_SI_COUNT:
36852 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36853 case V8SI_FTYPE_V8SI_SI_COUNT:
36854 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36855 case V4DI_FTYPE_V4DI_INT_COUNT:
36856 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36857 case V8HI_FTYPE_V8HI_SI_COUNT:
36858 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36859 case V4SI_FTYPE_V4SI_SI_COUNT:
36860 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36861 case V4HI_FTYPE_V4HI_SI_COUNT:
36862 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36863 case V2DI_FTYPE_V2DI_SI_COUNT:
36864 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36865 case V2SI_FTYPE_V2SI_SI_COUNT:
36866 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36867 case V1DI_FTYPE_V1DI_SI_COUNT:
36869 last_arg_count = true;
36871 case UINT64_FTYPE_UINT64_UINT64:
36872 case UINT_FTYPE_UINT_UINT:
36873 case UINT_FTYPE_UINT_USHORT:
36874 case UINT_FTYPE_UINT_UCHAR:
36875 case UINT16_FTYPE_UINT16_INT:
36876 case UINT8_FTYPE_UINT8_INT:
36877 case HI_FTYPE_HI_HI:
36878 case SI_FTYPE_SI_SI:
36879 case DI_FTYPE_DI_DI:
36880 case V16SI_FTYPE_V8DF_V8DF:
36883 case V2DI_FTYPE_V2DI_INT_CONVERT:
36886 nargs_constant = 1;
36888 case V4DI_FTYPE_V4DI_INT_CONVERT:
36891 nargs_constant = 1;
36893 case V8DI_FTYPE_V8DI_INT_CONVERT:
36896 nargs_constant = 1;
36898 case V8HI_FTYPE_V8HI_INT:
36899 case V8HI_FTYPE_V8SF_INT:
36900 case V16HI_FTYPE_V16SF_INT:
36901 case V8HI_FTYPE_V4SF_INT:
36902 case V8SF_FTYPE_V8SF_INT:
36903 case V4SF_FTYPE_V16SF_INT:
36904 case V16SF_FTYPE_V16SF_INT:
36905 case V4SI_FTYPE_V4SI_INT:
36906 case V4SI_FTYPE_V8SI_INT:
36907 case V4HI_FTYPE_V4HI_INT:
36908 case V4DF_FTYPE_V4DF_INT:
36909 case V4DF_FTYPE_V8DF_INT:
36910 case V4SF_FTYPE_V4SF_INT:
36911 case V4SF_FTYPE_V8SF_INT:
36912 case V2DI_FTYPE_V2DI_INT:
36913 case V2DF_FTYPE_V2DF_INT:
36914 case V2DF_FTYPE_V4DF_INT:
36915 case V16HI_FTYPE_V16HI_INT:
36916 case V8SI_FTYPE_V8SI_INT:
36917 case V16SI_FTYPE_V16SI_INT:
36918 case V4SI_FTYPE_V16SI_INT:
36919 case V4DI_FTYPE_V4DI_INT:
36920 case V2DI_FTYPE_V4DI_INT:
36921 case V4DI_FTYPE_V8DI_INT:
36922 case HI_FTYPE_HI_INT:
36923 case QI_FTYPE_V4SF_INT:
36924 case QI_FTYPE_V2DF_INT:
36926 nargs_constant = 1;
36928 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36929 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36930 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36931 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36932 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36933 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36934 case HI_FTYPE_V16SI_V16SI_HI:
36935 case QI_FTYPE_V8DI_V8DI_QI:
36936 case V16HI_FTYPE_V16SI_V16HI_HI:
36937 case V16QI_FTYPE_V16SI_V16QI_HI:
36938 case V16QI_FTYPE_V8DI_V16QI_QI:
36939 case V16SF_FTYPE_V16SF_V16SF_HI:
36940 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36941 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36942 case V16SF_FTYPE_V16SI_V16SF_HI:
36943 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36944 case V16SF_FTYPE_V4SF_V16SF_HI:
36945 case V16SI_FTYPE_SI_V16SI_HI:
36946 case V16SI_FTYPE_V16HI_V16SI_HI:
36947 case V16SI_FTYPE_V16QI_V16SI_HI:
36948 case V16SI_FTYPE_V16SF_V16SI_HI:
36949 case V8SF_FTYPE_V4SF_V8SF_QI:
36950 case V4DF_FTYPE_V2DF_V4DF_QI:
36951 case V8SI_FTYPE_V4SI_V8SI_QI:
36952 case V8SI_FTYPE_SI_V8SI_QI:
36953 case V4SI_FTYPE_V4SI_V4SI_QI:
36954 case V4SI_FTYPE_SI_V4SI_QI:
36955 case V4DI_FTYPE_V2DI_V4DI_QI:
36956 case V4DI_FTYPE_DI_V4DI_QI:
36957 case V2DI_FTYPE_V2DI_V2DI_QI:
36958 case V2DI_FTYPE_DI_V2DI_QI:
36959 case V64QI_FTYPE_V64QI_V64QI_DI:
36960 case V64QI_FTYPE_V16QI_V64QI_DI:
36961 case V64QI_FTYPE_QI_V64QI_DI:
36962 case V32QI_FTYPE_V32QI_V32QI_SI:
36963 case V32QI_FTYPE_V16QI_V32QI_SI:
36964 case V32QI_FTYPE_QI_V32QI_SI:
36965 case V16QI_FTYPE_V16QI_V16QI_HI:
36966 case V16QI_FTYPE_QI_V16QI_HI:
36967 case V32HI_FTYPE_V8HI_V32HI_SI:
36968 case V32HI_FTYPE_HI_V32HI_SI:
36969 case V16HI_FTYPE_V8HI_V16HI_HI:
36970 case V16HI_FTYPE_HI_V16HI_HI:
36971 case V8HI_FTYPE_V8HI_V8HI_QI:
36972 case V8HI_FTYPE_HI_V8HI_QI:
36973 case V8SF_FTYPE_V8HI_V8SF_QI:
36974 case V4SF_FTYPE_V8HI_V4SF_QI:
36975 case V8SI_FTYPE_V8SF_V8SI_QI:
36976 case V4SI_FTYPE_V4SF_V4SI_QI:
36977 case V8DI_FTYPE_V8SF_V8DI_QI:
36978 case V4DI_FTYPE_V4SF_V4DI_QI:
36979 case V2DI_FTYPE_V4SF_V2DI_QI:
36980 case V8SF_FTYPE_V8DI_V8SF_QI:
36981 case V4SF_FTYPE_V4DI_V4SF_QI:
36982 case V4SF_FTYPE_V2DI_V4SF_QI:
36983 case V8DF_FTYPE_V8DI_V8DF_QI:
36984 case V4DF_FTYPE_V4DI_V4DF_QI:
36985 case V2DF_FTYPE_V2DI_V2DF_QI:
36986 case V16QI_FTYPE_V8HI_V16QI_QI:
36987 case V16QI_FTYPE_V16HI_V16QI_HI:
36988 case V16QI_FTYPE_V4SI_V16QI_QI:
36989 case V16QI_FTYPE_V8SI_V16QI_QI:
36990 case V8HI_FTYPE_V4SI_V8HI_QI:
36991 case V8HI_FTYPE_V8SI_V8HI_QI:
36992 case V16QI_FTYPE_V2DI_V16QI_QI:
36993 case V16QI_FTYPE_V4DI_V16QI_QI:
36994 case V8HI_FTYPE_V2DI_V8HI_QI:
36995 case V8HI_FTYPE_V4DI_V8HI_QI:
36996 case V4SI_FTYPE_V2DI_V4SI_QI:
36997 case V4SI_FTYPE_V4DI_V4SI_QI:
36998 case V32QI_FTYPE_V32HI_V32QI_SI:
36999 case HI_FTYPE_V16QI_V16QI_HI:
37000 case SI_FTYPE_V32QI_V32QI_SI:
37001 case DI_FTYPE_V64QI_V64QI_DI:
37002 case QI_FTYPE_V8HI_V8HI_QI:
37003 case HI_FTYPE_V16HI_V16HI_HI:
37004 case SI_FTYPE_V32HI_V32HI_SI:
37005 case QI_FTYPE_V4SI_V4SI_QI:
37006 case QI_FTYPE_V8SI_V8SI_QI:
37007 case QI_FTYPE_V2DI_V2DI_QI:
37008 case QI_FTYPE_V4DI_V4DI_QI:
37009 case V4SF_FTYPE_V2DF_V4SF_QI:
37010 case V4SF_FTYPE_V4DF_V4SF_QI:
37011 case V16SI_FTYPE_V16SI_V16SI_HI:
37012 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37013 case V16SI_FTYPE_V4SI_V16SI_HI:
37014 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37015 case V2DI_FTYPE_V4SI_V2DI_QI:
37016 case V2DI_FTYPE_V8HI_V2DI_QI:
37017 case V2DI_FTYPE_V16QI_V2DI_QI:
37018 case V4DI_FTYPE_V4DI_V4DI_QI:
37019 case V4DI_FTYPE_V4SI_V4DI_QI:
37020 case V4DI_FTYPE_V8HI_V4DI_QI:
37021 case V4DI_FTYPE_V16QI_V4DI_QI:
37022 case V8DI_FTYPE_V8DF_V8DI_QI:
37023 case V4DI_FTYPE_V4DF_V4DI_QI:
37024 case V2DI_FTYPE_V2DF_V2DI_QI:
37025 case V4SI_FTYPE_V4DF_V4SI_QI:
37026 case V4SI_FTYPE_V2DF_V4SI_QI:
37027 case V4SI_FTYPE_V8HI_V4SI_QI:
37028 case V4SI_FTYPE_V16QI_V4SI_QI:
37029 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37030 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37031 case V8DF_FTYPE_V2DF_V8DF_QI:
37032 case V8DF_FTYPE_V4DF_V8DF_QI:
37033 case V8DF_FTYPE_V8DF_V8DF_QI:
37034 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37035 case V8SF_FTYPE_V8SF_V8SF_QI:
37036 case V8SF_FTYPE_V8SI_V8SF_QI:
37037 case V4DF_FTYPE_V4DF_V4DF_QI:
37038 case V4SF_FTYPE_V4SF_V4SF_QI:
37039 case V2DF_FTYPE_V2DF_V2DF_QI:
37040 case V2DF_FTYPE_V4SF_V2DF_QI:
37041 case V2DF_FTYPE_V4SI_V2DF_QI:
37042 case V4SF_FTYPE_V4SI_V4SF_QI:
37043 case V4DF_FTYPE_V4SF_V4DF_QI:
37044 case V4DF_FTYPE_V4SI_V4DF_QI:
37045 case V8SI_FTYPE_V8SI_V8SI_QI:
37046 case V8SI_FTYPE_V8HI_V8SI_QI:
37047 case V8SI_FTYPE_V16QI_V8SI_QI:
37048 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37049 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37050 case V8DF_FTYPE_V8SF_V8DF_QI:
37051 case V8DF_FTYPE_V8SI_V8DF_QI:
37052 case V8DI_FTYPE_DI_V8DI_QI:
37053 case V16SF_FTYPE_V8SF_V16SF_HI:
37054 case V16SI_FTYPE_V8SI_V16SI_HI:
37055 case V16HI_FTYPE_V16HI_V16HI_HI:
37056 case V8HI_FTYPE_V16QI_V8HI_QI:
37057 case V16HI_FTYPE_V16QI_V16HI_HI:
37058 case V32HI_FTYPE_V32HI_V32HI_SI:
37059 case V32HI_FTYPE_V32QI_V32HI_SI:
37060 case V8DI_FTYPE_V16QI_V8DI_QI:
37061 case V8DI_FTYPE_V2DI_V8DI_QI:
37062 case V8DI_FTYPE_V4DI_V8DI_QI:
37063 case V8DI_FTYPE_V8DI_V8DI_QI:
37064 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37065 case V8DI_FTYPE_V8HI_V8DI_QI:
37066 case V8DI_FTYPE_V8SI_V8DI_QI:
37067 case V8HI_FTYPE_V8DI_V8HI_QI:
37068 case V8SF_FTYPE_V8DF_V8SF_QI:
37069 case V8SI_FTYPE_V8DF_V8SI_QI:
37070 case V8SI_FTYPE_V8DI_V8SI_QI:
37071 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37074 case V32QI_FTYPE_V32QI_V32QI_INT:
37075 case V16HI_FTYPE_V16HI_V16HI_INT:
37076 case V16QI_FTYPE_V16QI_V16QI_INT:
37077 case V4DI_FTYPE_V4DI_V4DI_INT:
37078 case V8HI_FTYPE_V8HI_V8HI_INT:
37079 case V8SI_FTYPE_V8SI_V8SI_INT:
37080 case V8SI_FTYPE_V8SI_V4SI_INT:
37081 case V8SF_FTYPE_V8SF_V8SF_INT:
37082 case V8SF_FTYPE_V8SF_V4SF_INT:
37083 case V4SI_FTYPE_V4SI_V4SI_INT:
37084 case V4DF_FTYPE_V4DF_V4DF_INT:
37085 case V16SF_FTYPE_V16SF_V16SF_INT:
37086 case V16SF_FTYPE_V16SF_V4SF_INT:
37087 case V16SI_FTYPE_V16SI_V4SI_INT:
37088 case V4DF_FTYPE_V4DF_V2DF_INT:
37089 case V4SF_FTYPE_V4SF_V4SF_INT:
37090 case V2DI_FTYPE_V2DI_V2DI_INT:
37091 case V4DI_FTYPE_V4DI_V2DI_INT:
37092 case V2DF_FTYPE_V2DF_V2DF_INT:
37093 case QI_FTYPE_V8DI_V8DI_INT:
37094 case QI_FTYPE_V8DF_V8DF_INT:
37095 case QI_FTYPE_V2DF_V2DF_INT:
37096 case QI_FTYPE_V4SF_V4SF_INT:
37097 case HI_FTYPE_V16SI_V16SI_INT:
37098 case HI_FTYPE_V16SF_V16SF_INT:
37100 nargs_constant = 1;
37102 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37105 nargs_constant = 1;
37107 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37110 nargs_constant = 1;
37112 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37115 nargs_constant = 1;
37117 case V2DI_FTYPE_V2DI_UINT_UINT:
37119 nargs_constant = 2;
37121 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37124 nargs_constant = 1;
37126 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37130 nargs_constant = 1;
37132 case QI_FTYPE_V8DF_INT_QI:
37133 case QI_FTYPE_V4DF_INT_QI:
37134 case QI_FTYPE_V2DF_INT_QI:
37135 case HI_FTYPE_V16SF_INT_HI:
37136 case QI_FTYPE_V8SF_INT_QI:
37137 case QI_FTYPE_V4SF_INT_QI:
37140 nargs_constant = 1;
37142 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37146 nargs_constant = 1;
37148 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37152 nargs_constant = 1;
37154 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37155 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37156 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37157 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37158 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37159 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37160 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37161 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37162 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37163 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37164 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37165 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37166 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37167 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37168 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37169 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37170 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37171 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37172 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37173 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37174 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37175 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37176 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37177 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37178 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37179 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37180 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37181 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37182 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37183 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37184 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37185 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37186 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37187 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37188 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37189 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37190 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37191 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37192 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37193 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37194 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37195 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37196 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37197 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37198 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37199 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37200 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37201 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37202 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37203 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37204 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37205 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37206 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37207 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37210 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37211 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37212 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37213 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37214 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37216 nargs_constant = 1;
37218 case QI_FTYPE_V4DI_V4DI_INT_QI:
37219 case QI_FTYPE_V8SI_V8SI_INT_QI:
37220 case QI_FTYPE_V4DF_V4DF_INT_QI:
37221 case QI_FTYPE_V8SF_V8SF_INT_QI:
37222 case QI_FTYPE_V2DI_V2DI_INT_QI:
37223 case QI_FTYPE_V4SI_V4SI_INT_QI:
37224 case QI_FTYPE_V2DF_V2DF_INT_QI:
37225 case QI_FTYPE_V4SF_V4SF_INT_QI:
37226 case DI_FTYPE_V64QI_V64QI_INT_DI:
37227 case SI_FTYPE_V32QI_V32QI_INT_SI:
37228 case HI_FTYPE_V16QI_V16QI_INT_HI:
37229 case SI_FTYPE_V32HI_V32HI_INT_SI:
37230 case HI_FTYPE_V16HI_V16HI_INT_HI:
37231 case QI_FTYPE_V8HI_V8HI_INT_QI:
37234 nargs_constant = 1;
37236 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37238 nargs_constant = 2;
37240 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37241 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37244 case QI_FTYPE_V8DI_V8DI_INT_QI:
37245 case HI_FTYPE_V16SI_V16SI_INT_HI:
37246 case QI_FTYPE_V8DF_V8DF_INT_QI:
37247 case HI_FTYPE_V16SF_V16SF_INT_HI:
37250 nargs_constant = 1;
37252 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37253 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37254 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37255 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37256 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37257 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37258 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37259 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37260 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37261 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37262 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37263 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37264 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37265 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37266 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37267 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37268 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37269 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37270 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37271 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37272 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37273 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37274 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37275 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37276 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37277 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37278 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37279 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37280 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37281 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37284 nargs_constant = 1;
37286 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37287 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37288 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37289 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37290 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37291 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37292 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37293 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37294 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37295 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37296 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37297 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37298 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37299 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37300 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37301 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37302 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37303 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37304 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37305 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37306 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37307 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37308 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37309 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37310 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37311 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37312 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37315 nargs_constant = 1;
37317 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37318 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37319 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37320 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37321 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37322 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37323 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37324 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37325 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37326 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37327 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37331 nargs_constant = 1;
37335 gcc_unreachable ();
37338 gcc_assert (nargs <= ARRAY_SIZE (args));
37340 if (comparison != UNKNOWN)
37342 gcc_assert (nargs == 2);
37343 return ix86_expand_sse_compare (d, exp, target, swap);
37346 if (rmode == VOIDmode || rmode == tmode)
37350 || GET_MODE (target) != tmode
37351 || !insn_p->operand[0].predicate (target, tmode))
37352 target = gen_reg_rtx (tmode);
37353 real_target = target;
37357 real_target = gen_reg_rtx (tmode);
37358 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37361 for (i = 0; i < nargs; i++)
37363 tree arg = CALL_EXPR_ARG (exp, i);
37364 rtx op = expand_normal (arg);
37365 machine_mode mode = insn_p->operand[i + 1].mode;
37366 bool match = insn_p->operand[i + 1].predicate (op, mode);
37368 if (last_arg_count && (i + 1) == nargs)
37370 /* SIMD shift insns take either an 8-bit immediate or
37371 register as count. But builtin functions take int as
37372 count. If count doesn't match, we put it in register. */
37375 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37376 if (!insn_p->operand[i + 1].predicate (op, mode))
37377 op = copy_to_reg (op);
37380 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37381 (!mask_pos && (nargs - i) <= nargs_constant))
37386 case CODE_FOR_avx_vinsertf128v4di:
37387 case CODE_FOR_avx_vextractf128v4di:
37388 error ("the last argument must be an 1-bit immediate");
37391 case CODE_FOR_avx512f_cmpv8di3_mask:
37392 case CODE_FOR_avx512f_cmpv16si3_mask:
37393 case CODE_FOR_avx512f_ucmpv8di3_mask:
37394 case CODE_FOR_avx512f_ucmpv16si3_mask:
37395 case CODE_FOR_avx512vl_cmpv4di3_mask:
37396 case CODE_FOR_avx512vl_cmpv8si3_mask:
37397 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37398 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37399 case CODE_FOR_avx512vl_cmpv2di3_mask:
37400 case CODE_FOR_avx512vl_cmpv4si3_mask:
37401 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37402 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37403 error ("the last argument must be a 3-bit immediate");
37406 case CODE_FOR_sse4_1_roundsd:
37407 case CODE_FOR_sse4_1_roundss:
37409 case CODE_FOR_sse4_1_roundpd:
37410 case CODE_FOR_sse4_1_roundps:
37411 case CODE_FOR_avx_roundpd256:
37412 case CODE_FOR_avx_roundps256:
37414 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37415 case CODE_FOR_sse4_1_roundps_sfix:
37416 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37417 case CODE_FOR_avx_roundps_sfix256:
37419 case CODE_FOR_sse4_1_blendps:
37420 case CODE_FOR_avx_blendpd256:
37421 case CODE_FOR_avx_vpermilv4df:
37422 case CODE_FOR_avx_vpermilv4df_mask:
37423 case CODE_FOR_avx512f_getmantv8df_mask:
37424 case CODE_FOR_avx512f_getmantv16sf_mask:
37425 case CODE_FOR_avx512vl_getmantv8sf_mask:
37426 case CODE_FOR_avx512vl_getmantv4df_mask:
37427 case CODE_FOR_avx512vl_getmantv4sf_mask:
37428 case CODE_FOR_avx512vl_getmantv2df_mask:
37429 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37430 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37431 case CODE_FOR_avx512dq_rangepv4df_mask:
37432 case CODE_FOR_avx512dq_rangepv8sf_mask:
37433 case CODE_FOR_avx512dq_rangepv2df_mask:
37434 case CODE_FOR_avx512dq_rangepv4sf_mask:
37435 case CODE_FOR_avx_shufpd256_mask:
37436 error ("the last argument must be a 4-bit immediate");
37439 case CODE_FOR_sha1rnds4:
37440 case CODE_FOR_sse4_1_blendpd:
37441 case CODE_FOR_avx_vpermilv2df:
37442 case CODE_FOR_avx_vpermilv2df_mask:
37443 case CODE_FOR_xop_vpermil2v2df3:
37444 case CODE_FOR_xop_vpermil2v4sf3:
37445 case CODE_FOR_xop_vpermil2v4df3:
37446 case CODE_FOR_xop_vpermil2v8sf3:
37447 case CODE_FOR_avx512f_vinsertf32x4_mask:
37448 case CODE_FOR_avx512f_vinserti32x4_mask:
37449 case CODE_FOR_avx512f_vextractf32x4_mask:
37450 case CODE_FOR_avx512f_vextracti32x4_mask:
37451 case CODE_FOR_sse2_shufpd:
37452 case CODE_FOR_sse2_shufpd_mask:
37453 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37454 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37455 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37456 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37457 error ("the last argument must be a 2-bit immediate");
37460 case CODE_FOR_avx_vextractf128v4df:
37461 case CODE_FOR_avx_vextractf128v8sf:
37462 case CODE_FOR_avx_vextractf128v8si:
37463 case CODE_FOR_avx_vinsertf128v4df:
37464 case CODE_FOR_avx_vinsertf128v8sf:
37465 case CODE_FOR_avx_vinsertf128v8si:
37466 case CODE_FOR_avx512f_vinsertf64x4_mask:
37467 case CODE_FOR_avx512f_vinserti64x4_mask:
37468 case CODE_FOR_avx512f_vextractf64x4_mask:
37469 case CODE_FOR_avx512f_vextracti64x4_mask:
37470 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37471 case CODE_FOR_avx512dq_vinserti32x8_mask:
37472 case CODE_FOR_avx512vl_vinsertv4df:
37473 case CODE_FOR_avx512vl_vinsertv4di:
37474 case CODE_FOR_avx512vl_vinsertv8sf:
37475 case CODE_FOR_avx512vl_vinsertv8si:
37476 error ("the last argument must be a 1-bit immediate");
37479 case CODE_FOR_avx_vmcmpv2df3:
37480 case CODE_FOR_avx_vmcmpv4sf3:
37481 case CODE_FOR_avx_cmpv2df3:
37482 case CODE_FOR_avx_cmpv4sf3:
37483 case CODE_FOR_avx_cmpv4df3:
37484 case CODE_FOR_avx_cmpv8sf3:
37485 case CODE_FOR_avx512f_cmpv8df3_mask:
37486 case CODE_FOR_avx512f_cmpv16sf3_mask:
37487 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37488 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37489 error ("the last argument must be a 5-bit immediate");
37493 switch (nargs_constant)
37496 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37497 (!mask_pos && (nargs - i) == nargs_constant))
37499 error ("the next to last argument must be an 8-bit immediate");
37503 error ("the last argument must be an 8-bit immediate");
37506 gcc_unreachable ();
37513 if (VECTOR_MODE_P (mode))
37514 op = safe_vector_operand (op, mode);
37516 /* If we aren't optimizing, only allow one memory operand to
37518 if (memory_operand (op, mode))
37521 op = fixup_modeless_constant (op, mode);
37523 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37525 if (optimize || !match || num_memory > 1)
37526 op = copy_to_mode_reg (mode, op);
37530 op = copy_to_reg (op);
37531 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37536 args[i].mode = mode;
37542 pat = GEN_FCN (icode) (real_target, args[0].op);
37545 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37548 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37552 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37553 args[2].op, args[3].op);
37556 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37557 args[2].op, args[3].op, args[4].op);
37559 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37560 args[2].op, args[3].op, args[4].op,
37564 gcc_unreachable ();
37574 /* Transform pattern of following layout:
37577 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37585 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37589 (parallel [ A B ... ]) */
37592 ix86_erase_embedded_rounding (rtx pat)
37594 if (GET_CODE (pat) == INSN)
37595 pat = PATTERN (pat);
37597 gcc_assert (GET_CODE (pat) == PARALLEL);
37599 if (XVECLEN (pat, 0) == 2)
37601 rtx p0 = XVECEXP (pat, 0, 0);
37602 rtx p1 = XVECEXP (pat, 0, 1);
37604 gcc_assert (GET_CODE (p0) == SET
37605 && GET_CODE (p1) == UNSPEC
37606 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37612 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37616 for (; i < XVECLEN (pat, 0); ++i)
37618 rtx elem = XVECEXP (pat, 0, i);
37619 if (GET_CODE (elem) != UNSPEC
37620 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37624 /* No more than 1 occurence was removed. */
37625 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37627 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37631 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37634 ix86_expand_sse_comi_round (const struct builtin_description *d,
37635 tree exp, rtx target)
37638 tree arg0 = CALL_EXPR_ARG (exp, 0);
37639 tree arg1 = CALL_EXPR_ARG (exp, 1);
37640 tree arg2 = CALL_EXPR_ARG (exp, 2);
37641 tree arg3 = CALL_EXPR_ARG (exp, 3);
37642 rtx op0 = expand_normal (arg0);
37643 rtx op1 = expand_normal (arg1);
37644 rtx op2 = expand_normal (arg2);
37645 rtx op3 = expand_normal (arg3);
37646 enum insn_code icode = d->icode;
37647 const struct insn_data_d *insn_p = &insn_data[icode];
37648 machine_mode mode0 = insn_p->operand[0].mode;
37649 machine_mode mode1 = insn_p->operand[1].mode;
37650 enum rtx_code comparison = UNEQ;
37651 bool need_ucomi = false;
37653 /* See avxintrin.h for values. */
37654 enum rtx_code comi_comparisons[32] =
37656 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37657 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37658 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37660 bool need_ucomi_values[32] =
37662 true, false, false, true, true, false, false, true,
37663 true, false, false, true, true, false, false, true,
37664 false, true, true, false, false, true, true, false,
37665 false, true, true, false, false, true, true, false
37668 if (!CONST_INT_P (op2))
37670 error ("the third argument must be comparison constant");
37673 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37675 error ("incorrect comparison mode");
37679 if (!insn_p->operand[2].predicate (op3, SImode))
37681 error ("incorrect rounding operand");
37685 comparison = comi_comparisons[INTVAL (op2)];
37686 need_ucomi = need_ucomi_values[INTVAL (op2)];
37688 if (VECTOR_MODE_P (mode0))
37689 op0 = safe_vector_operand (op0, mode0);
37690 if (VECTOR_MODE_P (mode1))
37691 op1 = safe_vector_operand (op1, mode1);
37693 target = gen_reg_rtx (SImode);
37694 emit_move_insn (target, const0_rtx);
37695 target = gen_rtx_SUBREG (QImode, target, 0);
37697 if ((optimize && !register_operand (op0, mode0))
37698 || !insn_p->operand[0].predicate (op0, mode0))
37699 op0 = copy_to_mode_reg (mode0, op0);
37700 if ((optimize && !register_operand (op1, mode1))
37701 || !insn_p->operand[1].predicate (op1, mode1))
37702 op1 = copy_to_mode_reg (mode1, op1);
37705 icode = icode == CODE_FOR_sse_comi_round
37706 ? CODE_FOR_sse_ucomi_round
37707 : CODE_FOR_sse2_ucomi_round;
37709 pat = GEN_FCN (icode) (op0, op1, op3);
37713 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37714 if (INTVAL (op3) == NO_ROUND)
37716 pat = ix86_erase_embedded_rounding (pat);
37720 set_dst = SET_DEST (pat);
37724 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37725 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37729 emit_insn (gen_rtx_SET (VOIDmode,
37730 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37731 gen_rtx_fmt_ee (comparison, QImode,
37735 return SUBREG_REG (target);
37739 ix86_expand_round_builtin (const struct builtin_description *d,
37740 tree exp, rtx target)
37743 unsigned int i, nargs;
37749 enum insn_code icode = d->icode;
37750 const struct insn_data_d *insn_p = &insn_data[icode];
37751 machine_mode tmode = insn_p->operand[0].mode;
37752 unsigned int nargs_constant = 0;
37753 unsigned int redundant_embed_rnd = 0;
37755 switch ((enum ix86_builtin_func_type) d->flag)
37757 case UINT64_FTYPE_V2DF_INT:
37758 case UINT64_FTYPE_V4SF_INT:
37759 case UINT_FTYPE_V2DF_INT:
37760 case UINT_FTYPE_V4SF_INT:
37761 case INT64_FTYPE_V2DF_INT:
37762 case INT64_FTYPE_V4SF_INT:
37763 case INT_FTYPE_V2DF_INT:
37764 case INT_FTYPE_V4SF_INT:
37767 case V4SF_FTYPE_V4SF_UINT_INT:
37768 case V4SF_FTYPE_V4SF_UINT64_INT:
37769 case V2DF_FTYPE_V2DF_UINT64_INT:
37770 case V4SF_FTYPE_V4SF_INT_INT:
37771 case V4SF_FTYPE_V4SF_INT64_INT:
37772 case V2DF_FTYPE_V2DF_INT64_INT:
37773 case V4SF_FTYPE_V4SF_V4SF_INT:
37774 case V2DF_FTYPE_V2DF_V2DF_INT:
37775 case V4SF_FTYPE_V4SF_V2DF_INT:
37776 case V2DF_FTYPE_V2DF_V4SF_INT:
37779 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37780 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37781 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37782 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37783 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37784 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37785 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37786 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37787 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37788 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37789 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37790 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37791 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37792 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37795 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37796 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37797 nargs_constant = 2;
37800 case INT_FTYPE_V4SF_V4SF_INT_INT:
37801 case INT_FTYPE_V2DF_V2DF_INT_INT:
37802 return ix86_expand_sse_comi_round (d, exp, target);
37803 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37804 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37805 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37806 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37807 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37808 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37811 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37812 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37813 nargs_constant = 4;
37816 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37817 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37818 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37819 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37820 nargs_constant = 3;
37823 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37824 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37825 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37826 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37828 nargs_constant = 4;
37830 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37831 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37832 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37833 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37835 nargs_constant = 3;
37838 gcc_unreachable ();
37840 gcc_assert (nargs <= ARRAY_SIZE (args));
37844 || GET_MODE (target) != tmode
37845 || !insn_p->operand[0].predicate (target, tmode))
37846 target = gen_reg_rtx (tmode);
37848 for (i = 0; i < nargs; i++)
37850 tree arg = CALL_EXPR_ARG (exp, i);
37851 rtx op = expand_normal (arg);
37852 machine_mode mode = insn_p->operand[i + 1].mode;
37853 bool match = insn_p->operand[i + 1].predicate (op, mode);
37855 if (i == nargs - nargs_constant)
37861 case CODE_FOR_avx512f_getmantv8df_mask_round:
37862 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37863 case CODE_FOR_avx512f_vgetmantv2df_round:
37864 case CODE_FOR_avx512f_vgetmantv4sf_round:
37865 error ("the immediate argument must be a 4-bit immediate");
37867 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37868 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37869 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37870 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37871 error ("the immediate argument must be a 5-bit immediate");
37874 error ("the immediate argument must be an 8-bit immediate");
37879 else if (i == nargs-1)
37881 if (!insn_p->operand[nargs].predicate (op, SImode))
37883 error ("incorrect rounding operand");
37887 /* If there is no rounding use normal version of the pattern. */
37888 if (INTVAL (op) == NO_ROUND)
37889 redundant_embed_rnd = 1;
37893 if (VECTOR_MODE_P (mode))
37894 op = safe_vector_operand (op, mode);
37896 op = fixup_modeless_constant (op, mode);
37898 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37900 if (optimize || !match)
37901 op = copy_to_mode_reg (mode, op);
37905 op = copy_to_reg (op);
37906 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37911 args[i].mode = mode;
37917 pat = GEN_FCN (icode) (target, args[0].op);
37920 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37923 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37927 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37928 args[2].op, args[3].op);
37931 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37932 args[2].op, args[3].op, args[4].op);
37934 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37935 args[2].op, args[3].op, args[4].op,
37939 gcc_unreachable ();
37945 if (redundant_embed_rnd)
37946 pat = ix86_erase_embedded_rounding (pat);
37952 /* Subroutine of ix86_expand_builtin to take care of special insns
37953 with variable number of operands. */
37956 ix86_expand_special_args_builtin (const struct builtin_description *d,
37957 tree exp, rtx target)
37961 unsigned int i, nargs, arg_adjust, memory;
37962 bool aligned_mem = false;
37968 enum insn_code icode = d->icode;
37969 bool last_arg_constant = false;
37970 const struct insn_data_d *insn_p = &insn_data[icode];
37971 machine_mode tmode = insn_p->operand[0].mode;
37972 enum { load, store } klass;
37974 switch ((enum ix86_builtin_func_type) d->flag)
37976 case VOID_FTYPE_VOID:
37977 emit_insn (GEN_FCN (icode) (target));
37979 case VOID_FTYPE_UINT64:
37980 case VOID_FTYPE_UNSIGNED:
37986 case INT_FTYPE_VOID:
37987 case USHORT_FTYPE_VOID:
37988 case UINT64_FTYPE_VOID:
37989 case UNSIGNED_FTYPE_VOID:
37994 case UINT64_FTYPE_PUNSIGNED:
37995 case V2DI_FTYPE_PV2DI:
37996 case V4DI_FTYPE_PV4DI:
37997 case V32QI_FTYPE_PCCHAR:
37998 case V16QI_FTYPE_PCCHAR:
37999 case V8SF_FTYPE_PCV4SF:
38000 case V8SF_FTYPE_PCFLOAT:
38001 case V4SF_FTYPE_PCFLOAT:
38002 case V4DF_FTYPE_PCV2DF:
38003 case V4DF_FTYPE_PCDOUBLE:
38004 case V2DF_FTYPE_PCDOUBLE:
38005 case VOID_FTYPE_PVOID:
38006 case V16SI_FTYPE_PV4SI:
38007 case V16SF_FTYPE_PV4SF:
38008 case V8DI_FTYPE_PV4DI:
38009 case V8DI_FTYPE_PV8DI:
38010 case V8DF_FTYPE_PV4DF:
38016 case CODE_FOR_sse4_1_movntdqa:
38017 case CODE_FOR_avx2_movntdqa:
38018 case CODE_FOR_avx512f_movntdqa:
38019 aligned_mem = true;
38025 case VOID_FTYPE_PV2SF_V4SF:
38026 case VOID_FTYPE_PV8DI_V8DI:
38027 case VOID_FTYPE_PV4DI_V4DI:
38028 case VOID_FTYPE_PV2DI_V2DI:
38029 case VOID_FTYPE_PCHAR_V32QI:
38030 case VOID_FTYPE_PCHAR_V16QI:
38031 case VOID_FTYPE_PFLOAT_V16SF:
38032 case VOID_FTYPE_PFLOAT_V8SF:
38033 case VOID_FTYPE_PFLOAT_V4SF:
38034 case VOID_FTYPE_PDOUBLE_V8DF:
38035 case VOID_FTYPE_PDOUBLE_V4DF:
38036 case VOID_FTYPE_PDOUBLE_V2DF:
38037 case VOID_FTYPE_PLONGLONG_LONGLONG:
38038 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38039 case VOID_FTYPE_PINT_INT:
38042 /* Reserve memory operand for target. */
38043 memory = ARRAY_SIZE (args);
38046 /* These builtins and instructions require the memory
38047 to be properly aligned. */
38048 case CODE_FOR_avx_movntv4di:
38049 case CODE_FOR_sse2_movntv2di:
38050 case CODE_FOR_avx_movntv8sf:
38051 case CODE_FOR_sse_movntv4sf:
38052 case CODE_FOR_sse4a_vmmovntv4sf:
38053 case CODE_FOR_avx_movntv4df:
38054 case CODE_FOR_sse2_movntv2df:
38055 case CODE_FOR_sse4a_vmmovntv2df:
38056 case CODE_FOR_sse2_movntidi:
38057 case CODE_FOR_sse_movntq:
38058 case CODE_FOR_sse2_movntisi:
38059 case CODE_FOR_avx512f_movntv16sf:
38060 case CODE_FOR_avx512f_movntv8df:
38061 case CODE_FOR_avx512f_movntv8di:
38062 aligned_mem = true;
38068 case V4SF_FTYPE_V4SF_PCV2SF:
38069 case V2DF_FTYPE_V2DF_PCDOUBLE:
38074 case V8SF_FTYPE_PCV8SF_V8SI:
38075 case V4DF_FTYPE_PCV4DF_V4DI:
38076 case V4SF_FTYPE_PCV4SF_V4SI:
38077 case V2DF_FTYPE_PCV2DF_V2DI:
38078 case V8SI_FTYPE_PCV8SI_V8SI:
38079 case V4DI_FTYPE_PCV4DI_V4DI:
38080 case V4SI_FTYPE_PCV4SI_V4SI:
38081 case V2DI_FTYPE_PCV2DI_V2DI:
38086 case VOID_FTYPE_PV8DF_V8DF_QI:
38087 case VOID_FTYPE_PV16SF_V16SF_HI:
38088 case VOID_FTYPE_PV8DI_V8DI_QI:
38089 case VOID_FTYPE_PV4DI_V4DI_QI:
38090 case VOID_FTYPE_PV2DI_V2DI_QI:
38091 case VOID_FTYPE_PV16SI_V16SI_HI:
38092 case VOID_FTYPE_PV8SI_V8SI_QI:
38093 case VOID_FTYPE_PV4SI_V4SI_QI:
38096 /* These builtins and instructions require the memory
38097 to be properly aligned. */
38098 case CODE_FOR_avx512f_storev16sf_mask:
38099 case CODE_FOR_avx512f_storev16si_mask:
38100 case CODE_FOR_avx512f_storev8df_mask:
38101 case CODE_FOR_avx512f_storev8di_mask:
38102 case CODE_FOR_avx512vl_storev8sf_mask:
38103 case CODE_FOR_avx512vl_storev8si_mask:
38104 case CODE_FOR_avx512vl_storev4df_mask:
38105 case CODE_FOR_avx512vl_storev4di_mask:
38106 case CODE_FOR_avx512vl_storev4sf_mask:
38107 case CODE_FOR_avx512vl_storev4si_mask:
38108 case CODE_FOR_avx512vl_storev2df_mask:
38109 case CODE_FOR_avx512vl_storev2di_mask:
38110 aligned_mem = true;
38116 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38117 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38118 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38119 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38120 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38121 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38122 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38123 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38124 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38125 case VOID_FTYPE_PFLOAT_V4SF_QI:
38126 case VOID_FTYPE_PV8SI_V8DI_QI:
38127 case VOID_FTYPE_PV8HI_V8DI_QI:
38128 case VOID_FTYPE_PV16HI_V16SI_HI:
38129 case VOID_FTYPE_PV16QI_V8DI_QI:
38130 case VOID_FTYPE_PV16QI_V16SI_HI:
38131 case VOID_FTYPE_PV4SI_V4DI_QI:
38132 case VOID_FTYPE_PV4SI_V2DI_QI:
38133 case VOID_FTYPE_PV8HI_V4DI_QI:
38134 case VOID_FTYPE_PV8HI_V2DI_QI:
38135 case VOID_FTYPE_PV8HI_V8SI_QI:
38136 case VOID_FTYPE_PV8HI_V4SI_QI:
38137 case VOID_FTYPE_PV16QI_V4DI_QI:
38138 case VOID_FTYPE_PV16QI_V2DI_QI:
38139 case VOID_FTYPE_PV16QI_V8SI_QI:
38140 case VOID_FTYPE_PV16QI_V4SI_QI:
38141 case VOID_FTYPE_PV8HI_V8HI_QI:
38142 case VOID_FTYPE_PV16HI_V16HI_HI:
38143 case VOID_FTYPE_PV32HI_V32HI_SI:
38144 case VOID_FTYPE_PV16QI_V16QI_HI:
38145 case VOID_FTYPE_PV32QI_V32QI_SI:
38146 case VOID_FTYPE_PV64QI_V64QI_DI:
38147 case VOID_FTYPE_PV4DF_V4DF_QI:
38148 case VOID_FTYPE_PV2DF_V2DF_QI:
38149 case VOID_FTYPE_PV8SF_V8SF_QI:
38150 case VOID_FTYPE_PV4SF_V4SF_QI:
38153 /* Reserve memory operand for target. */
38154 memory = ARRAY_SIZE (args);
38156 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38157 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38158 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38159 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38160 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38161 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38162 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38163 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38164 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38165 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38166 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38167 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38168 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38169 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38170 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38171 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38172 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38173 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38174 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38175 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38181 /* These builtins and instructions require the memory
38182 to be properly aligned. */
38183 case CODE_FOR_avx512f_loadv16sf_mask:
38184 case CODE_FOR_avx512f_loadv16si_mask:
38185 case CODE_FOR_avx512f_loadv8df_mask:
38186 case CODE_FOR_avx512f_loadv8di_mask:
38187 case CODE_FOR_avx512vl_loadv8sf_mask:
38188 case CODE_FOR_avx512vl_loadv8si_mask:
38189 case CODE_FOR_avx512vl_loadv4df_mask:
38190 case CODE_FOR_avx512vl_loadv4di_mask:
38191 case CODE_FOR_avx512vl_loadv4sf_mask:
38192 case CODE_FOR_avx512vl_loadv4si_mask:
38193 case CODE_FOR_avx512vl_loadv2df_mask:
38194 case CODE_FOR_avx512vl_loadv2di_mask:
38195 case CODE_FOR_avx512bw_loadv64qi_mask:
38196 case CODE_FOR_avx512vl_loadv32qi_mask:
38197 case CODE_FOR_avx512vl_loadv16qi_mask:
38198 case CODE_FOR_avx512bw_loadv32hi_mask:
38199 case CODE_FOR_avx512vl_loadv16hi_mask:
38200 case CODE_FOR_avx512vl_loadv8hi_mask:
38201 aligned_mem = true;
38207 case VOID_FTYPE_UINT_UINT_UINT:
38208 case VOID_FTYPE_UINT64_UINT_UINT:
38209 case UCHAR_FTYPE_UINT_UINT_UINT:
38210 case UCHAR_FTYPE_UINT64_UINT_UINT:
38213 memory = ARRAY_SIZE (args);
38214 last_arg_constant = true;
38217 gcc_unreachable ();
38220 gcc_assert (nargs <= ARRAY_SIZE (args));
38222 if (klass == store)
38224 arg = CALL_EXPR_ARG (exp, 0);
38225 op = expand_normal (arg);
38226 gcc_assert (target == 0);
38229 op = ix86_zero_extend_to_Pmode (op);
38230 target = gen_rtx_MEM (tmode, op);
38231 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38232 on it. Try to improve it using get_pointer_alignment,
38233 and if the special builtin is one that requires strict
38234 mode alignment, also from it's GET_MODE_ALIGNMENT.
38235 Failure to do so could lead to ix86_legitimate_combined_insn
38236 rejecting all changes to such insns. */
38237 unsigned int align = get_pointer_alignment (arg);
38238 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38239 align = GET_MODE_ALIGNMENT (tmode);
38240 if (MEM_ALIGN (target) < align)
38241 set_mem_align (target, align);
38244 target = force_reg (tmode, op);
38252 || !register_operand (target, tmode)
38253 || GET_MODE (target) != tmode)
38254 target = gen_reg_rtx (tmode);
38257 for (i = 0; i < nargs; i++)
38259 machine_mode mode = insn_p->operand[i + 1].mode;
38262 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38263 op = expand_normal (arg);
38264 match = insn_p->operand[i + 1].predicate (op, mode);
38266 if (last_arg_constant && (i + 1) == nargs)
38270 if (icode == CODE_FOR_lwp_lwpvalsi3
38271 || icode == CODE_FOR_lwp_lwpinssi3
38272 || icode == CODE_FOR_lwp_lwpvaldi3
38273 || icode == CODE_FOR_lwp_lwpinsdi3)
38274 error ("the last argument must be a 32-bit immediate");
38276 error ("the last argument must be an 8-bit immediate");
38284 /* This must be the memory operand. */
38285 op = ix86_zero_extend_to_Pmode (op);
38286 op = gen_rtx_MEM (mode, op);
38287 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38288 on it. Try to improve it using get_pointer_alignment,
38289 and if the special builtin is one that requires strict
38290 mode alignment, also from it's GET_MODE_ALIGNMENT.
38291 Failure to do so could lead to ix86_legitimate_combined_insn
38292 rejecting all changes to such insns. */
38293 unsigned int align = get_pointer_alignment (arg);
38294 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38295 align = GET_MODE_ALIGNMENT (mode);
38296 if (MEM_ALIGN (op) < align)
38297 set_mem_align (op, align);
38301 /* This must be register. */
38302 if (VECTOR_MODE_P (mode))
38303 op = safe_vector_operand (op, mode);
38305 op = fixup_modeless_constant (op, mode);
38307 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38308 op = copy_to_mode_reg (mode, op);
38311 op = copy_to_reg (op);
38312 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38318 args[i].mode = mode;
38324 pat = GEN_FCN (icode) (target);
38327 pat = GEN_FCN (icode) (target, args[0].op);
38330 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38333 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38336 gcc_unreachable ();
38342 return klass == store ? 0 : target;
38345 /* Return the integer constant in ARG. Constrain it to be in the range
38346 of the subparts of VEC_TYPE; issue an error if not. */
38349 get_element_number (tree vec_type, tree arg)
38351 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38353 if (!tree_fits_uhwi_p (arg)
38354 || (elt = tree_to_uhwi (arg), elt > max))
38356 error ("selector must be an integer constant in the range 0..%wi", max);
38363 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38364 ix86_expand_vector_init. We DO have language-level syntax for this, in
38365 the form of (type){ init-list }. Except that since we can't place emms
38366 instructions from inside the compiler, we can't allow the use of MMX
38367 registers unless the user explicitly asks for it. So we do *not* define
38368 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38369 we have builtins invoked by mmintrin.h that gives us license to emit
38370 these sorts of instructions. */
38373 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38375 machine_mode tmode = TYPE_MODE (type);
38376 machine_mode inner_mode = GET_MODE_INNER (tmode);
38377 int i, n_elt = GET_MODE_NUNITS (tmode);
38378 rtvec v = rtvec_alloc (n_elt);
38380 gcc_assert (VECTOR_MODE_P (tmode));
38381 gcc_assert (call_expr_nargs (exp) == n_elt);
38383 for (i = 0; i < n_elt; ++i)
38385 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38386 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38389 if (!target || !register_operand (target, tmode))
38390 target = gen_reg_rtx (tmode);
38392 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38396 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38397 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38398 had a language-level syntax for referencing vector elements. */
38401 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38403 machine_mode tmode, mode0;
38408 arg0 = CALL_EXPR_ARG (exp, 0);
38409 arg1 = CALL_EXPR_ARG (exp, 1);
38411 op0 = expand_normal (arg0);
38412 elt = get_element_number (TREE_TYPE (arg0), arg1);
38414 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38415 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38416 gcc_assert (VECTOR_MODE_P (mode0));
38418 op0 = force_reg (mode0, op0);
38420 if (optimize || !target || !register_operand (target, tmode))
38421 target = gen_reg_rtx (tmode);
38423 ix86_expand_vector_extract (true, target, op0, elt);
38428 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38429 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38430 a language-level syntax for referencing vector elements. */
38433 ix86_expand_vec_set_builtin (tree exp)
38435 machine_mode tmode, mode1;
38436 tree arg0, arg1, arg2;
38438 rtx op0, op1, target;
38440 arg0 = CALL_EXPR_ARG (exp, 0);
38441 arg1 = CALL_EXPR_ARG (exp, 1);
38442 arg2 = CALL_EXPR_ARG (exp, 2);
38444 tmode = TYPE_MODE (TREE_TYPE (arg0));
38445 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38446 gcc_assert (VECTOR_MODE_P (tmode));
38448 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38449 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38450 elt = get_element_number (TREE_TYPE (arg0), arg2);
38452 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38453 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38455 op0 = force_reg (tmode, op0);
38456 op1 = force_reg (mode1, op1);
38458 /* OP0 is the source of these builtin functions and shouldn't be
38459 modified. Create a copy, use it and return it as target. */
38460 target = gen_reg_rtx (tmode);
38461 emit_move_insn (target, op0);
38462 ix86_expand_vector_set (true, target, op1, elt);
38467 /* Emit conditional move of SRC to DST with condition
38470 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38476 t = ix86_expand_compare (code, op1, op2);
38477 emit_insn (gen_rtx_SET (VOIDmode, dst,
38478 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38483 rtx nomove = gen_label_rtx ();
38484 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38485 const0_rtx, GET_MODE (op1), 1, nomove);
38486 emit_move_insn (dst, src);
38487 emit_label (nomove);
38491 /* Choose max of DST and SRC and put it to DST. */
38493 ix86_emit_move_max (rtx dst, rtx src)
38495 ix86_emit_cmove (dst, src, LTU, dst, src);
38498 /* Expand an expression EXP that calls a built-in function,
38499 with result going to TARGET if that's convenient
38500 (and in mode MODE if that's convenient).
38501 SUBTARGET may be used as the target for computing one of EXP's operands.
38502 IGNORE is nonzero if the value is to be ignored. */
38505 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38506 machine_mode mode, int ignore)
38508 const struct builtin_description *d;
38510 enum insn_code icode;
38511 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38512 tree arg0, arg1, arg2, arg3, arg4;
38513 rtx op0, op1, op2, op3, op4, pat, insn;
38514 machine_mode mode0, mode1, mode2, mode3, mode4;
38515 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38517 /* For CPU builtins that can be folded, fold first and expand the fold. */
38520 case IX86_BUILTIN_CPU_INIT:
38522 /* Make it call __cpu_indicator_init in libgcc. */
38523 tree call_expr, fndecl, type;
38524 type = build_function_type_list (integer_type_node, NULL_TREE);
38525 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38526 call_expr = build_call_expr (fndecl, 0);
38527 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38529 case IX86_BUILTIN_CPU_IS:
38530 case IX86_BUILTIN_CPU_SUPPORTS:
38532 tree arg0 = CALL_EXPR_ARG (exp, 0);
38533 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38534 gcc_assert (fold_expr != NULL_TREE);
38535 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38539 /* Determine whether the builtin function is available under the current ISA.
38540 Originally the builtin was not created if it wasn't applicable to the
38541 current ISA based on the command line switches. With function specific
38542 options, we need to check in the context of the function making the call
38543 whether it is supported. */
38544 if (ix86_builtins_isa[fcode].isa
38545 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38547 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38548 NULL, (enum fpmath_unit) 0, false);
38551 error ("%qE needs unknown isa option", fndecl);
38554 gcc_assert (opts != NULL);
38555 error ("%qE needs isa option %s", fndecl, opts);
38563 case IX86_BUILTIN_BNDMK:
38565 || GET_MODE (target) != BNDmode
38566 || !register_operand (target, BNDmode))
38567 target = gen_reg_rtx (BNDmode);
38569 arg0 = CALL_EXPR_ARG (exp, 0);
38570 arg1 = CALL_EXPR_ARG (exp, 1);
38572 op0 = expand_normal (arg0);
38573 op1 = expand_normal (arg1);
38575 if (!register_operand (op0, Pmode))
38576 op0 = ix86_zero_extend_to_Pmode (op0);
38577 if (!register_operand (op1, Pmode))
38578 op1 = ix86_zero_extend_to_Pmode (op1);
38580 /* Builtin arg1 is size of block but instruction op1 should
38582 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38583 NULL_RTX, 1, OPTAB_DIRECT);
38585 emit_insn (BNDmode == BND64mode
38586 ? gen_bnd64_mk (target, op0, op1)
38587 : gen_bnd32_mk (target, op0, op1));
38590 case IX86_BUILTIN_BNDSTX:
38591 arg0 = CALL_EXPR_ARG (exp, 0);
38592 arg1 = CALL_EXPR_ARG (exp, 1);
38593 arg2 = CALL_EXPR_ARG (exp, 2);
38595 op0 = expand_normal (arg0);
38596 op1 = expand_normal (arg1);
38597 op2 = expand_normal (arg2);
38599 if (!register_operand (op0, Pmode))
38600 op0 = ix86_zero_extend_to_Pmode (op0);
38601 if (!register_operand (op1, BNDmode))
38602 op1 = copy_to_mode_reg (BNDmode, op1);
38603 if (!register_operand (op2, Pmode))
38604 op2 = ix86_zero_extend_to_Pmode (op2);
38606 emit_insn (BNDmode == BND64mode
38607 ? gen_bnd64_stx (op2, op0, op1)
38608 : gen_bnd32_stx (op2, op0, op1));
38611 case IX86_BUILTIN_BNDLDX:
38613 || GET_MODE (target) != BNDmode
38614 || !register_operand (target, BNDmode))
38615 target = gen_reg_rtx (BNDmode);
38617 arg0 = CALL_EXPR_ARG (exp, 0);
38618 arg1 = CALL_EXPR_ARG (exp, 1);
38620 op0 = expand_normal (arg0);
38621 op1 = expand_normal (arg1);
38623 if (!register_operand (op0, Pmode))
38624 op0 = ix86_zero_extend_to_Pmode (op0);
38625 if (!register_operand (op1, Pmode))
38626 op1 = ix86_zero_extend_to_Pmode (op1);
38628 emit_insn (BNDmode == BND64mode
38629 ? gen_bnd64_ldx (target, op0, op1)
38630 : gen_bnd32_ldx (target, op0, op1));
38633 case IX86_BUILTIN_BNDCL:
38634 arg0 = CALL_EXPR_ARG (exp, 0);
38635 arg1 = CALL_EXPR_ARG (exp, 1);
38637 op0 = expand_normal (arg0);
38638 op1 = expand_normal (arg1);
38640 if (!register_operand (op0, Pmode))
38641 op0 = ix86_zero_extend_to_Pmode (op0);
38642 if (!register_operand (op1, BNDmode))
38643 op1 = copy_to_mode_reg (BNDmode, op1);
38645 emit_insn (BNDmode == BND64mode
38646 ? gen_bnd64_cl (op1, op0)
38647 : gen_bnd32_cl (op1, op0));
38650 case IX86_BUILTIN_BNDCU:
38651 arg0 = CALL_EXPR_ARG (exp, 0);
38652 arg1 = CALL_EXPR_ARG (exp, 1);
38654 op0 = expand_normal (arg0);
38655 op1 = expand_normal (arg1);
38657 if (!register_operand (op0, Pmode))
38658 op0 = ix86_zero_extend_to_Pmode (op0);
38659 if (!register_operand (op1, BNDmode))
38660 op1 = copy_to_mode_reg (BNDmode, op1);
38662 emit_insn (BNDmode == BND64mode
38663 ? gen_bnd64_cu (op1, op0)
38664 : gen_bnd32_cu (op1, op0));
38667 case IX86_BUILTIN_BNDRET:
38668 arg0 = CALL_EXPR_ARG (exp, 0);
38669 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38670 target = chkp_get_rtl_bounds (arg0);
38672 /* If no bounds were specified for returned value,
38673 then use INIT bounds. It usually happens when
38674 some built-in function is expanded. */
38677 rtx t1 = gen_reg_rtx (Pmode);
38678 rtx t2 = gen_reg_rtx (Pmode);
38679 target = gen_reg_rtx (BNDmode);
38680 emit_move_insn (t1, const0_rtx);
38681 emit_move_insn (t2, constm1_rtx);
38682 emit_insn (BNDmode == BND64mode
38683 ? gen_bnd64_mk (target, t1, t2)
38684 : gen_bnd32_mk (target, t1, t2));
38687 gcc_assert (target && REG_P (target));
38690 case IX86_BUILTIN_BNDNARROW:
38692 rtx m1, m1h1, m1h2, lb, ub, t1;
38694 /* Return value and lb. */
38695 arg0 = CALL_EXPR_ARG (exp, 0);
38697 arg1 = CALL_EXPR_ARG (exp, 1);
38699 arg2 = CALL_EXPR_ARG (exp, 2);
38701 lb = expand_normal (arg0);
38702 op1 = expand_normal (arg1);
38703 op2 = expand_normal (arg2);
38705 /* Size was passed but we need to use (size - 1) as for bndmk. */
38706 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38707 NULL_RTX, 1, OPTAB_DIRECT);
38709 /* Add LB to size and inverse to get UB. */
38710 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38711 op2, 1, OPTAB_DIRECT);
38712 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38714 if (!register_operand (lb, Pmode))
38715 lb = ix86_zero_extend_to_Pmode (lb);
38716 if (!register_operand (ub, Pmode))
38717 ub = ix86_zero_extend_to_Pmode (ub);
38719 /* We need to move bounds to memory before any computations. */
38724 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38725 emit_move_insn (m1, op1);
38728 /* Generate mem expression to be used for access to LB and UB. */
38729 m1h1 = adjust_address (m1, Pmode, 0);
38730 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38732 t1 = gen_reg_rtx (Pmode);
38735 emit_move_insn (t1, m1h1);
38736 ix86_emit_move_max (t1, lb);
38737 emit_move_insn (m1h1, t1);
38739 /* Compute UB. UB is stored in 1's complement form. Therefore
38740 we also use max here. */
38741 emit_move_insn (t1, m1h2);
38742 ix86_emit_move_max (t1, ub);
38743 emit_move_insn (m1h2, t1);
38745 op2 = gen_reg_rtx (BNDmode);
38746 emit_move_insn (op2, m1);
38748 return chkp_join_splitted_slot (lb, op2);
38751 case IX86_BUILTIN_BNDINT:
38753 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38756 || GET_MODE (target) != BNDmode
38757 || !register_operand (target, BNDmode))
38758 target = gen_reg_rtx (BNDmode);
38760 arg0 = CALL_EXPR_ARG (exp, 0);
38761 arg1 = CALL_EXPR_ARG (exp, 1);
38763 op0 = expand_normal (arg0);
38764 op1 = expand_normal (arg1);
38766 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38767 rh1 = adjust_address (res, Pmode, 0);
38768 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38770 /* Put first bounds to temporaries. */
38771 lb1 = gen_reg_rtx (Pmode);
38772 ub1 = gen_reg_rtx (Pmode);
38775 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38776 emit_move_insn (ub1, adjust_address (op0, Pmode,
38777 GET_MODE_SIZE (Pmode)));
38781 emit_move_insn (res, op0);
38782 emit_move_insn (lb1, rh1);
38783 emit_move_insn (ub1, rh2);
38786 /* Put second bounds to temporaries. */
38787 lb2 = gen_reg_rtx (Pmode);
38788 ub2 = gen_reg_rtx (Pmode);
38791 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38792 emit_move_insn (ub2, adjust_address (op1, Pmode,
38793 GET_MODE_SIZE (Pmode)));
38797 emit_move_insn (res, op1);
38798 emit_move_insn (lb2, rh1);
38799 emit_move_insn (ub2, rh2);
38803 ix86_emit_move_max (lb1, lb2);
38804 emit_move_insn (rh1, lb1);
38806 /* Compute UB. UB is stored in 1's complement form. Therefore
38807 we also use max here. */
38808 ix86_emit_move_max (ub1, ub2);
38809 emit_move_insn (rh2, ub1);
38811 emit_move_insn (target, res);
38816 case IX86_BUILTIN_SIZEOF:
38822 || GET_MODE (target) != Pmode
38823 || !register_operand (target, Pmode))
38824 target = gen_reg_rtx (Pmode);
38826 arg0 = CALL_EXPR_ARG (exp, 0);
38827 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38829 name = DECL_ASSEMBLER_NAME (arg0);
38830 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38832 emit_insn (Pmode == SImode
38833 ? gen_move_size_reloc_si (target, symbol)
38834 : gen_move_size_reloc_di (target, symbol));
38839 case IX86_BUILTIN_BNDLOWER:
38844 || GET_MODE (target) != Pmode
38845 || !register_operand (target, Pmode))
38846 target = gen_reg_rtx (Pmode);
38848 arg0 = CALL_EXPR_ARG (exp, 0);
38849 op0 = expand_normal (arg0);
38851 /* We need to move bounds to memory first. */
38856 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38857 emit_move_insn (mem, op0);
38860 /* Generate mem expression to access LB and load it. */
38861 hmem = adjust_address (mem, Pmode, 0);
38862 emit_move_insn (target, hmem);
38867 case IX86_BUILTIN_BNDUPPER:
38869 rtx mem, hmem, res;
38872 || GET_MODE (target) != Pmode
38873 || !register_operand (target, Pmode))
38874 target = gen_reg_rtx (Pmode);
38876 arg0 = CALL_EXPR_ARG (exp, 0);
38877 op0 = expand_normal (arg0);
38879 /* We need to move bounds to memory first. */
38884 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38885 emit_move_insn (mem, op0);
38888 /* Generate mem expression to access UB. */
38889 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38891 /* We need to inverse all bits of UB. */
38892 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38895 emit_move_insn (target, res);
38900 case IX86_BUILTIN_MASKMOVQ:
38901 case IX86_BUILTIN_MASKMOVDQU:
38902 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38903 ? CODE_FOR_mmx_maskmovq
38904 : CODE_FOR_sse2_maskmovdqu);
38905 /* Note the arg order is different from the operand order. */
38906 arg1 = CALL_EXPR_ARG (exp, 0);
38907 arg2 = CALL_EXPR_ARG (exp, 1);
38908 arg0 = CALL_EXPR_ARG (exp, 2);
38909 op0 = expand_normal (arg0);
38910 op1 = expand_normal (arg1);
38911 op2 = expand_normal (arg2);
38912 mode0 = insn_data[icode].operand[0].mode;
38913 mode1 = insn_data[icode].operand[1].mode;
38914 mode2 = insn_data[icode].operand[2].mode;
38916 op0 = ix86_zero_extend_to_Pmode (op0);
38917 op0 = gen_rtx_MEM (mode1, op0);
38919 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38920 op0 = copy_to_mode_reg (mode0, op0);
38921 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38922 op1 = copy_to_mode_reg (mode1, op1);
38923 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38924 op2 = copy_to_mode_reg (mode2, op2);
38925 pat = GEN_FCN (icode) (op0, op1, op2);
38931 case IX86_BUILTIN_LDMXCSR:
38932 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38933 target = assign_386_stack_local (SImode, SLOT_TEMP);
38934 emit_move_insn (target, op0);
38935 emit_insn (gen_sse_ldmxcsr (target));
38938 case IX86_BUILTIN_STMXCSR:
38939 target = assign_386_stack_local (SImode, SLOT_TEMP);
38940 emit_insn (gen_sse_stmxcsr (target));
38941 return copy_to_mode_reg (SImode, target);
38943 case IX86_BUILTIN_CLFLUSH:
38944 arg0 = CALL_EXPR_ARG (exp, 0);
38945 op0 = expand_normal (arg0);
38946 icode = CODE_FOR_sse2_clflush;
38947 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38948 op0 = ix86_zero_extend_to_Pmode (op0);
38950 emit_insn (gen_sse2_clflush (op0));
38953 case IX86_BUILTIN_CLWB:
38954 arg0 = CALL_EXPR_ARG (exp, 0);
38955 op0 = expand_normal (arg0);
38956 icode = CODE_FOR_clwb;
38957 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38958 op0 = ix86_zero_extend_to_Pmode (op0);
38960 emit_insn (gen_clwb (op0));
38963 case IX86_BUILTIN_CLFLUSHOPT:
38964 arg0 = CALL_EXPR_ARG (exp, 0);
38965 op0 = expand_normal (arg0);
38966 icode = CODE_FOR_clflushopt;
38967 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38968 op0 = ix86_zero_extend_to_Pmode (op0);
38970 emit_insn (gen_clflushopt (op0));
38973 case IX86_BUILTIN_MONITOR:
38974 arg0 = CALL_EXPR_ARG (exp, 0);
38975 arg1 = CALL_EXPR_ARG (exp, 1);
38976 arg2 = CALL_EXPR_ARG (exp, 2);
38977 op0 = expand_normal (arg0);
38978 op1 = expand_normal (arg1);
38979 op2 = expand_normal (arg2);
38981 op0 = ix86_zero_extend_to_Pmode (op0);
38983 op1 = copy_to_mode_reg (SImode, op1);
38985 op2 = copy_to_mode_reg (SImode, op2);
38986 emit_insn (ix86_gen_monitor (op0, op1, op2));
38989 case IX86_BUILTIN_MWAIT:
38990 arg0 = CALL_EXPR_ARG (exp, 0);
38991 arg1 = CALL_EXPR_ARG (exp, 1);
38992 op0 = expand_normal (arg0);
38993 op1 = expand_normal (arg1);
38995 op0 = copy_to_mode_reg (SImode, op0);
38997 op1 = copy_to_mode_reg (SImode, op1);
38998 emit_insn (gen_sse3_mwait (op0, op1));
39001 case IX86_BUILTIN_VEC_INIT_V2SI:
39002 case IX86_BUILTIN_VEC_INIT_V4HI:
39003 case IX86_BUILTIN_VEC_INIT_V8QI:
39004 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39006 case IX86_BUILTIN_VEC_EXT_V2DF:
39007 case IX86_BUILTIN_VEC_EXT_V2DI:
39008 case IX86_BUILTIN_VEC_EXT_V4SF:
39009 case IX86_BUILTIN_VEC_EXT_V4SI:
39010 case IX86_BUILTIN_VEC_EXT_V8HI:
39011 case IX86_BUILTIN_VEC_EXT_V2SI:
39012 case IX86_BUILTIN_VEC_EXT_V4HI:
39013 case IX86_BUILTIN_VEC_EXT_V16QI:
39014 return ix86_expand_vec_ext_builtin (exp, target);
39016 case IX86_BUILTIN_VEC_SET_V2DI:
39017 case IX86_BUILTIN_VEC_SET_V4SF:
39018 case IX86_BUILTIN_VEC_SET_V4SI:
39019 case IX86_BUILTIN_VEC_SET_V8HI:
39020 case IX86_BUILTIN_VEC_SET_V4HI:
39021 case IX86_BUILTIN_VEC_SET_V16QI:
39022 return ix86_expand_vec_set_builtin (exp);
39024 case IX86_BUILTIN_INFQ:
39025 case IX86_BUILTIN_HUGE_VALQ:
39027 REAL_VALUE_TYPE inf;
39031 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39033 tmp = validize_mem (force_const_mem (mode, tmp));
39036 target = gen_reg_rtx (mode);
39038 emit_move_insn (target, tmp);
39042 case IX86_BUILTIN_RDPMC:
39043 case IX86_BUILTIN_RDTSC:
39044 case IX86_BUILTIN_RDTSCP:
39046 op0 = gen_reg_rtx (DImode);
39047 op1 = gen_reg_rtx (DImode);
39049 if (fcode == IX86_BUILTIN_RDPMC)
39051 arg0 = CALL_EXPR_ARG (exp, 0);
39052 op2 = expand_normal (arg0);
39053 if (!register_operand (op2, SImode))
39054 op2 = copy_to_mode_reg (SImode, op2);
39056 insn = (TARGET_64BIT
39057 ? gen_rdpmc_rex64 (op0, op1, op2)
39058 : gen_rdpmc (op0, op2));
39061 else if (fcode == IX86_BUILTIN_RDTSC)
39063 insn = (TARGET_64BIT
39064 ? gen_rdtsc_rex64 (op0, op1)
39065 : gen_rdtsc (op0));
39070 op2 = gen_reg_rtx (SImode);
39072 insn = (TARGET_64BIT
39073 ? gen_rdtscp_rex64 (op0, op1, op2)
39074 : gen_rdtscp (op0, op2));
39077 arg0 = CALL_EXPR_ARG (exp, 0);
39078 op4 = expand_normal (arg0);
39079 if (!address_operand (op4, VOIDmode))
39081 op4 = convert_memory_address (Pmode, op4);
39082 op4 = copy_addr_to_reg (op4);
39084 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39089 /* mode is VOIDmode if __builtin_rd* has been called
39091 if (mode == VOIDmode)
39093 target = gen_reg_rtx (mode);
39098 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39099 op1, 1, OPTAB_DIRECT);
39100 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39101 op0, 1, OPTAB_DIRECT);
39104 emit_move_insn (target, op0);
39107 case IX86_BUILTIN_FXSAVE:
39108 case IX86_BUILTIN_FXRSTOR:
39109 case IX86_BUILTIN_FXSAVE64:
39110 case IX86_BUILTIN_FXRSTOR64:
39111 case IX86_BUILTIN_FNSTENV:
39112 case IX86_BUILTIN_FLDENV:
39116 case IX86_BUILTIN_FXSAVE:
39117 icode = CODE_FOR_fxsave;
39119 case IX86_BUILTIN_FXRSTOR:
39120 icode = CODE_FOR_fxrstor;
39122 case IX86_BUILTIN_FXSAVE64:
39123 icode = CODE_FOR_fxsave64;
39125 case IX86_BUILTIN_FXRSTOR64:
39126 icode = CODE_FOR_fxrstor64;
39128 case IX86_BUILTIN_FNSTENV:
39129 icode = CODE_FOR_fnstenv;
39131 case IX86_BUILTIN_FLDENV:
39132 icode = CODE_FOR_fldenv;
39135 gcc_unreachable ();
39138 arg0 = CALL_EXPR_ARG (exp, 0);
39139 op0 = expand_normal (arg0);
39141 if (!address_operand (op0, VOIDmode))
39143 op0 = convert_memory_address (Pmode, op0);
39144 op0 = copy_addr_to_reg (op0);
39146 op0 = gen_rtx_MEM (mode0, op0);
39148 pat = GEN_FCN (icode) (op0);
39153 case IX86_BUILTIN_XSAVE:
39154 case IX86_BUILTIN_XRSTOR:
39155 case IX86_BUILTIN_XSAVE64:
39156 case IX86_BUILTIN_XRSTOR64:
39157 case IX86_BUILTIN_XSAVEOPT:
39158 case IX86_BUILTIN_XSAVEOPT64:
39159 case IX86_BUILTIN_XSAVES:
39160 case IX86_BUILTIN_XRSTORS:
39161 case IX86_BUILTIN_XSAVES64:
39162 case IX86_BUILTIN_XRSTORS64:
39163 case IX86_BUILTIN_XSAVEC:
39164 case IX86_BUILTIN_XSAVEC64:
39165 arg0 = CALL_EXPR_ARG (exp, 0);
39166 arg1 = CALL_EXPR_ARG (exp, 1);
39167 op0 = expand_normal (arg0);
39168 op1 = expand_normal (arg1);
39170 if (!address_operand (op0, VOIDmode))
39172 op0 = convert_memory_address (Pmode, op0);
39173 op0 = copy_addr_to_reg (op0);
39175 op0 = gen_rtx_MEM (BLKmode, op0);
39177 op1 = force_reg (DImode, op1);
39181 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39182 NULL, 1, OPTAB_DIRECT);
39185 case IX86_BUILTIN_XSAVE:
39186 icode = CODE_FOR_xsave_rex64;
39188 case IX86_BUILTIN_XRSTOR:
39189 icode = CODE_FOR_xrstor_rex64;
39191 case IX86_BUILTIN_XSAVE64:
39192 icode = CODE_FOR_xsave64;
39194 case IX86_BUILTIN_XRSTOR64:
39195 icode = CODE_FOR_xrstor64;
39197 case IX86_BUILTIN_XSAVEOPT:
39198 icode = CODE_FOR_xsaveopt_rex64;
39200 case IX86_BUILTIN_XSAVEOPT64:
39201 icode = CODE_FOR_xsaveopt64;
39203 case IX86_BUILTIN_XSAVES:
39204 icode = CODE_FOR_xsaves_rex64;
39206 case IX86_BUILTIN_XRSTORS:
39207 icode = CODE_FOR_xrstors_rex64;
39209 case IX86_BUILTIN_XSAVES64:
39210 icode = CODE_FOR_xsaves64;
39212 case IX86_BUILTIN_XRSTORS64:
39213 icode = CODE_FOR_xrstors64;
39215 case IX86_BUILTIN_XSAVEC:
39216 icode = CODE_FOR_xsavec_rex64;
39218 case IX86_BUILTIN_XSAVEC64:
39219 icode = CODE_FOR_xsavec64;
39222 gcc_unreachable ();
39225 op2 = gen_lowpart (SImode, op2);
39226 op1 = gen_lowpart (SImode, op1);
39227 pat = GEN_FCN (icode) (op0, op1, op2);
39233 case IX86_BUILTIN_XSAVE:
39234 icode = CODE_FOR_xsave;
39236 case IX86_BUILTIN_XRSTOR:
39237 icode = CODE_FOR_xrstor;
39239 case IX86_BUILTIN_XSAVEOPT:
39240 icode = CODE_FOR_xsaveopt;
39242 case IX86_BUILTIN_XSAVES:
39243 icode = CODE_FOR_xsaves;
39245 case IX86_BUILTIN_XRSTORS:
39246 icode = CODE_FOR_xrstors;
39248 case IX86_BUILTIN_XSAVEC:
39249 icode = CODE_FOR_xsavec;
39252 gcc_unreachable ();
39254 pat = GEN_FCN (icode) (op0, op1);
39261 case IX86_BUILTIN_LLWPCB:
39262 arg0 = CALL_EXPR_ARG (exp, 0);
39263 op0 = expand_normal (arg0);
39264 icode = CODE_FOR_lwp_llwpcb;
39265 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39266 op0 = ix86_zero_extend_to_Pmode (op0);
39267 emit_insn (gen_lwp_llwpcb (op0));
39270 case IX86_BUILTIN_SLWPCB:
39271 icode = CODE_FOR_lwp_slwpcb;
39273 || !insn_data[icode].operand[0].predicate (target, Pmode))
39274 target = gen_reg_rtx (Pmode);
39275 emit_insn (gen_lwp_slwpcb (target));
39278 case IX86_BUILTIN_BEXTRI32:
39279 case IX86_BUILTIN_BEXTRI64:
39280 arg0 = CALL_EXPR_ARG (exp, 0);
39281 arg1 = CALL_EXPR_ARG (exp, 1);
39282 op0 = expand_normal (arg0);
39283 op1 = expand_normal (arg1);
39284 icode = (fcode == IX86_BUILTIN_BEXTRI32
39285 ? CODE_FOR_tbm_bextri_si
39286 : CODE_FOR_tbm_bextri_di);
39287 if (!CONST_INT_P (op1))
39289 error ("last argument must be an immediate");
39294 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39295 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39296 op1 = GEN_INT (length);
39297 op2 = GEN_INT (lsb_index);
39298 pat = GEN_FCN (icode) (target, op0, op1, op2);
39304 case IX86_BUILTIN_RDRAND16_STEP:
39305 icode = CODE_FOR_rdrandhi_1;
39309 case IX86_BUILTIN_RDRAND32_STEP:
39310 icode = CODE_FOR_rdrandsi_1;
39314 case IX86_BUILTIN_RDRAND64_STEP:
39315 icode = CODE_FOR_rdranddi_1;
39319 op0 = gen_reg_rtx (mode0);
39320 emit_insn (GEN_FCN (icode) (op0));
39322 arg0 = CALL_EXPR_ARG (exp, 0);
39323 op1 = expand_normal (arg0);
39324 if (!address_operand (op1, VOIDmode))
39326 op1 = convert_memory_address (Pmode, op1);
39327 op1 = copy_addr_to_reg (op1);
39329 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39331 op1 = gen_reg_rtx (SImode);
39332 emit_move_insn (op1, CONST1_RTX (SImode));
39334 /* Emit SImode conditional move. */
39335 if (mode0 == HImode)
39337 op2 = gen_reg_rtx (SImode);
39338 emit_insn (gen_zero_extendhisi2 (op2, op0));
39340 else if (mode0 == SImode)
39343 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39346 || !register_operand (target, SImode))
39347 target = gen_reg_rtx (SImode);
39349 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39351 emit_insn (gen_rtx_SET (VOIDmode, target,
39352 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39355 case IX86_BUILTIN_RDSEED16_STEP:
39356 icode = CODE_FOR_rdseedhi_1;
39360 case IX86_BUILTIN_RDSEED32_STEP:
39361 icode = CODE_FOR_rdseedsi_1;
39365 case IX86_BUILTIN_RDSEED64_STEP:
39366 icode = CODE_FOR_rdseeddi_1;
39370 op0 = gen_reg_rtx (mode0);
39371 emit_insn (GEN_FCN (icode) (op0));
39373 arg0 = CALL_EXPR_ARG (exp, 0);
39374 op1 = expand_normal (arg0);
39375 if (!address_operand (op1, VOIDmode))
39377 op1 = convert_memory_address (Pmode, op1);
39378 op1 = copy_addr_to_reg (op1);
39380 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39382 op2 = gen_reg_rtx (QImode);
39384 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39386 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39389 || !register_operand (target, SImode))
39390 target = gen_reg_rtx (SImode);
39392 emit_insn (gen_zero_extendqisi2 (target, op2));
39395 case IX86_BUILTIN_SBB32:
39396 icode = CODE_FOR_subsi3_carry;
39400 case IX86_BUILTIN_SBB64:
39401 icode = CODE_FOR_subdi3_carry;
39405 case IX86_BUILTIN_ADDCARRYX32:
39406 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39410 case IX86_BUILTIN_ADDCARRYX64:
39411 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39415 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39416 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39417 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39418 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39420 op0 = gen_reg_rtx (QImode);
39422 /* Generate CF from input operand. */
39423 op1 = expand_normal (arg0);
39424 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39425 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39427 /* Gen ADCX instruction to compute X+Y+CF. */
39428 op2 = expand_normal (arg1);
39429 op3 = expand_normal (arg2);
39432 op2 = copy_to_mode_reg (mode0, op2);
39434 op3 = copy_to_mode_reg (mode0, op3);
39436 op0 = gen_reg_rtx (mode0);
39438 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39439 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39440 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39442 /* Store the result. */
39443 op4 = expand_normal (arg3);
39444 if (!address_operand (op4, VOIDmode))
39446 op4 = convert_memory_address (Pmode, op4);
39447 op4 = copy_addr_to_reg (op4);
39449 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39451 /* Return current CF value. */
39453 target = gen_reg_rtx (QImode);
39455 PUT_MODE (pat, QImode);
39456 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39459 case IX86_BUILTIN_READ_FLAGS:
39460 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39463 || target == NULL_RTX
39464 || !nonimmediate_operand (target, word_mode)
39465 || GET_MODE (target) != word_mode)
39466 target = gen_reg_rtx (word_mode);
39468 emit_insn (gen_pop (target));
39471 case IX86_BUILTIN_WRITE_FLAGS:
39473 arg0 = CALL_EXPR_ARG (exp, 0);
39474 op0 = expand_normal (arg0);
39475 if (!general_no_elim_operand (op0, word_mode))
39476 op0 = copy_to_mode_reg (word_mode, op0);
39478 emit_insn (gen_push (op0));
39479 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39482 case IX86_BUILTIN_KORTESTC16:
39483 icode = CODE_FOR_kortestchi;
39488 case IX86_BUILTIN_KORTESTZ16:
39489 icode = CODE_FOR_kortestzhi;
39494 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39495 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39496 op0 = expand_normal (arg0);
39497 op1 = expand_normal (arg1);
39499 op0 = copy_to_reg (op0);
39500 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39501 op1 = copy_to_reg (op1);
39502 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39504 target = gen_reg_rtx (QImode);
39505 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39507 /* Emit kortest. */
39508 emit_insn (GEN_FCN (icode) (op0, op1));
39509 /* And use setcc to return result from flags. */
39510 ix86_expand_setcc (target, EQ,
39511 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39514 case IX86_BUILTIN_GATHERSIV2DF:
39515 icode = CODE_FOR_avx2_gathersiv2df;
39517 case IX86_BUILTIN_GATHERSIV4DF:
39518 icode = CODE_FOR_avx2_gathersiv4df;
39520 case IX86_BUILTIN_GATHERDIV2DF:
39521 icode = CODE_FOR_avx2_gatherdiv2df;
39523 case IX86_BUILTIN_GATHERDIV4DF:
39524 icode = CODE_FOR_avx2_gatherdiv4df;
39526 case IX86_BUILTIN_GATHERSIV4SF:
39527 icode = CODE_FOR_avx2_gathersiv4sf;
39529 case IX86_BUILTIN_GATHERSIV8SF:
39530 icode = CODE_FOR_avx2_gathersiv8sf;
39532 case IX86_BUILTIN_GATHERDIV4SF:
39533 icode = CODE_FOR_avx2_gatherdiv4sf;
39535 case IX86_BUILTIN_GATHERDIV8SF:
39536 icode = CODE_FOR_avx2_gatherdiv8sf;
39538 case IX86_BUILTIN_GATHERSIV2DI:
39539 icode = CODE_FOR_avx2_gathersiv2di;
39541 case IX86_BUILTIN_GATHERSIV4DI:
39542 icode = CODE_FOR_avx2_gathersiv4di;
39544 case IX86_BUILTIN_GATHERDIV2DI:
39545 icode = CODE_FOR_avx2_gatherdiv2di;
39547 case IX86_BUILTIN_GATHERDIV4DI:
39548 icode = CODE_FOR_avx2_gatherdiv4di;
39550 case IX86_BUILTIN_GATHERSIV4SI:
39551 icode = CODE_FOR_avx2_gathersiv4si;
39553 case IX86_BUILTIN_GATHERSIV8SI:
39554 icode = CODE_FOR_avx2_gathersiv8si;
39556 case IX86_BUILTIN_GATHERDIV4SI:
39557 icode = CODE_FOR_avx2_gatherdiv4si;
39559 case IX86_BUILTIN_GATHERDIV8SI:
39560 icode = CODE_FOR_avx2_gatherdiv8si;
39562 case IX86_BUILTIN_GATHERALTSIV4DF:
39563 icode = CODE_FOR_avx2_gathersiv4df;
39565 case IX86_BUILTIN_GATHERALTDIV8SF:
39566 icode = CODE_FOR_avx2_gatherdiv8sf;
39568 case IX86_BUILTIN_GATHERALTSIV4DI:
39569 icode = CODE_FOR_avx2_gathersiv4di;
39571 case IX86_BUILTIN_GATHERALTDIV8SI:
39572 icode = CODE_FOR_avx2_gatherdiv8si;
39574 case IX86_BUILTIN_GATHER3SIV16SF:
39575 icode = CODE_FOR_avx512f_gathersiv16sf;
39577 case IX86_BUILTIN_GATHER3SIV8DF:
39578 icode = CODE_FOR_avx512f_gathersiv8df;
39580 case IX86_BUILTIN_GATHER3DIV16SF:
39581 icode = CODE_FOR_avx512f_gatherdiv16sf;
39583 case IX86_BUILTIN_GATHER3DIV8DF:
39584 icode = CODE_FOR_avx512f_gatherdiv8df;
39586 case IX86_BUILTIN_GATHER3SIV16SI:
39587 icode = CODE_FOR_avx512f_gathersiv16si;
39589 case IX86_BUILTIN_GATHER3SIV8DI:
39590 icode = CODE_FOR_avx512f_gathersiv8di;
39592 case IX86_BUILTIN_GATHER3DIV16SI:
39593 icode = CODE_FOR_avx512f_gatherdiv16si;
39595 case IX86_BUILTIN_GATHER3DIV8DI:
39596 icode = CODE_FOR_avx512f_gatherdiv8di;
39598 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39599 icode = CODE_FOR_avx512f_gathersiv8df;
39601 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39602 icode = CODE_FOR_avx512f_gatherdiv16sf;
39604 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39605 icode = CODE_FOR_avx512f_gathersiv8di;
39607 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39608 icode = CODE_FOR_avx512f_gatherdiv16si;
39610 case IX86_BUILTIN_GATHER3SIV2DF:
39611 icode = CODE_FOR_avx512vl_gathersiv2df;
39613 case IX86_BUILTIN_GATHER3SIV4DF:
39614 icode = CODE_FOR_avx512vl_gathersiv4df;
39616 case IX86_BUILTIN_GATHER3DIV2DF:
39617 icode = CODE_FOR_avx512vl_gatherdiv2df;
39619 case IX86_BUILTIN_GATHER3DIV4DF:
39620 icode = CODE_FOR_avx512vl_gatherdiv4df;
39622 case IX86_BUILTIN_GATHER3SIV4SF:
39623 icode = CODE_FOR_avx512vl_gathersiv4sf;
39625 case IX86_BUILTIN_GATHER3SIV8SF:
39626 icode = CODE_FOR_avx512vl_gathersiv8sf;
39628 case IX86_BUILTIN_GATHER3DIV4SF:
39629 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39631 case IX86_BUILTIN_GATHER3DIV8SF:
39632 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39634 case IX86_BUILTIN_GATHER3SIV2DI:
39635 icode = CODE_FOR_avx512vl_gathersiv2di;
39637 case IX86_BUILTIN_GATHER3SIV4DI:
39638 icode = CODE_FOR_avx512vl_gathersiv4di;
39640 case IX86_BUILTIN_GATHER3DIV2DI:
39641 icode = CODE_FOR_avx512vl_gatherdiv2di;
39643 case IX86_BUILTIN_GATHER3DIV4DI:
39644 icode = CODE_FOR_avx512vl_gatherdiv4di;
39646 case IX86_BUILTIN_GATHER3SIV4SI:
39647 icode = CODE_FOR_avx512vl_gathersiv4si;
39649 case IX86_BUILTIN_GATHER3SIV8SI:
39650 icode = CODE_FOR_avx512vl_gathersiv8si;
39652 case IX86_BUILTIN_GATHER3DIV4SI:
39653 icode = CODE_FOR_avx512vl_gatherdiv4si;
39655 case IX86_BUILTIN_GATHER3DIV8SI:
39656 icode = CODE_FOR_avx512vl_gatherdiv8si;
39658 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39659 icode = CODE_FOR_avx512vl_gathersiv4df;
39661 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39662 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39664 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39665 icode = CODE_FOR_avx512vl_gathersiv4di;
39667 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39668 icode = CODE_FOR_avx512vl_gatherdiv8si;
39670 case IX86_BUILTIN_SCATTERSIV16SF:
39671 icode = CODE_FOR_avx512f_scattersiv16sf;
39673 case IX86_BUILTIN_SCATTERSIV8DF:
39674 icode = CODE_FOR_avx512f_scattersiv8df;
39676 case IX86_BUILTIN_SCATTERDIV16SF:
39677 icode = CODE_FOR_avx512f_scatterdiv16sf;
39679 case IX86_BUILTIN_SCATTERDIV8DF:
39680 icode = CODE_FOR_avx512f_scatterdiv8df;
39682 case IX86_BUILTIN_SCATTERSIV16SI:
39683 icode = CODE_FOR_avx512f_scattersiv16si;
39685 case IX86_BUILTIN_SCATTERSIV8DI:
39686 icode = CODE_FOR_avx512f_scattersiv8di;
39688 case IX86_BUILTIN_SCATTERDIV16SI:
39689 icode = CODE_FOR_avx512f_scatterdiv16si;
39691 case IX86_BUILTIN_SCATTERDIV8DI:
39692 icode = CODE_FOR_avx512f_scatterdiv8di;
39694 case IX86_BUILTIN_SCATTERSIV8SF:
39695 icode = CODE_FOR_avx512vl_scattersiv8sf;
39697 case IX86_BUILTIN_SCATTERSIV4SF:
39698 icode = CODE_FOR_avx512vl_scattersiv4sf;
39700 case IX86_BUILTIN_SCATTERSIV4DF:
39701 icode = CODE_FOR_avx512vl_scattersiv4df;
39703 case IX86_BUILTIN_SCATTERSIV2DF:
39704 icode = CODE_FOR_avx512vl_scattersiv2df;
39706 case IX86_BUILTIN_SCATTERDIV8SF:
39707 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39709 case IX86_BUILTIN_SCATTERDIV4SF:
39710 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39712 case IX86_BUILTIN_SCATTERDIV4DF:
39713 icode = CODE_FOR_avx512vl_scatterdiv4df;
39715 case IX86_BUILTIN_SCATTERDIV2DF:
39716 icode = CODE_FOR_avx512vl_scatterdiv2df;
39718 case IX86_BUILTIN_SCATTERSIV8SI:
39719 icode = CODE_FOR_avx512vl_scattersiv8si;
39721 case IX86_BUILTIN_SCATTERSIV4SI:
39722 icode = CODE_FOR_avx512vl_scattersiv4si;
39724 case IX86_BUILTIN_SCATTERSIV4DI:
39725 icode = CODE_FOR_avx512vl_scattersiv4di;
39727 case IX86_BUILTIN_SCATTERSIV2DI:
39728 icode = CODE_FOR_avx512vl_scattersiv2di;
39730 case IX86_BUILTIN_SCATTERDIV8SI:
39731 icode = CODE_FOR_avx512vl_scatterdiv8si;
39733 case IX86_BUILTIN_SCATTERDIV4SI:
39734 icode = CODE_FOR_avx512vl_scatterdiv4si;
39736 case IX86_BUILTIN_SCATTERDIV4DI:
39737 icode = CODE_FOR_avx512vl_scatterdiv4di;
39739 case IX86_BUILTIN_SCATTERDIV2DI:
39740 icode = CODE_FOR_avx512vl_scatterdiv2di;
39742 case IX86_BUILTIN_GATHERPFDPD:
39743 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39744 goto vec_prefetch_gen;
39745 case IX86_BUILTIN_GATHERPFDPS:
39746 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39747 goto vec_prefetch_gen;
39748 case IX86_BUILTIN_GATHERPFQPD:
39749 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39750 goto vec_prefetch_gen;
39751 case IX86_BUILTIN_GATHERPFQPS:
39752 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39753 goto vec_prefetch_gen;
39754 case IX86_BUILTIN_SCATTERPFDPD:
39755 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39756 goto vec_prefetch_gen;
39757 case IX86_BUILTIN_SCATTERPFDPS:
39758 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39759 goto vec_prefetch_gen;
39760 case IX86_BUILTIN_SCATTERPFQPD:
39761 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39762 goto vec_prefetch_gen;
39763 case IX86_BUILTIN_SCATTERPFQPS:
39764 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39765 goto vec_prefetch_gen;
39769 rtx (*gen) (rtx, rtx);
39771 arg0 = CALL_EXPR_ARG (exp, 0);
39772 arg1 = CALL_EXPR_ARG (exp, 1);
39773 arg2 = CALL_EXPR_ARG (exp, 2);
39774 arg3 = CALL_EXPR_ARG (exp, 3);
39775 arg4 = CALL_EXPR_ARG (exp, 4);
39776 op0 = expand_normal (arg0);
39777 op1 = expand_normal (arg1);
39778 op2 = expand_normal (arg2);
39779 op3 = expand_normal (arg3);
39780 op4 = expand_normal (arg4);
39781 /* Note the arg order is different from the operand order. */
39782 mode0 = insn_data[icode].operand[1].mode;
39783 mode2 = insn_data[icode].operand[3].mode;
39784 mode3 = insn_data[icode].operand[4].mode;
39785 mode4 = insn_data[icode].operand[5].mode;
39787 if (target == NULL_RTX
39788 || GET_MODE (target) != insn_data[icode].operand[0].mode
39789 || !insn_data[icode].operand[0].predicate (target,
39790 GET_MODE (target)))
39791 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39793 subtarget = target;
39797 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39798 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39799 half = gen_reg_rtx (V8SImode);
39800 if (!nonimmediate_operand (op2, V16SImode))
39801 op2 = copy_to_mode_reg (V16SImode, op2);
39802 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39805 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39806 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39807 case IX86_BUILTIN_GATHERALTSIV4DF:
39808 case IX86_BUILTIN_GATHERALTSIV4DI:
39809 half = gen_reg_rtx (V4SImode);
39810 if (!nonimmediate_operand (op2, V8SImode))
39811 op2 = copy_to_mode_reg (V8SImode, op2);
39812 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39815 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39816 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39817 half = gen_reg_rtx (mode0);
39818 if (mode0 == V8SFmode)
39819 gen = gen_vec_extract_lo_v16sf;
39821 gen = gen_vec_extract_lo_v16si;
39822 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39823 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39824 emit_insn (gen (half, op0));
39826 if (GET_MODE (op3) != VOIDmode)
39828 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39829 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39830 emit_insn (gen (half, op3));
39834 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39835 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39836 case IX86_BUILTIN_GATHERALTDIV8SF:
39837 case IX86_BUILTIN_GATHERALTDIV8SI:
39838 half = gen_reg_rtx (mode0);
39839 if (mode0 == V4SFmode)
39840 gen = gen_vec_extract_lo_v8sf;
39842 gen = gen_vec_extract_lo_v8si;
39843 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39844 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39845 emit_insn (gen (half, op0));
39847 if (GET_MODE (op3) != VOIDmode)
39849 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39850 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39851 emit_insn (gen (half, op3));
39859 /* Force memory operand only with base register here. But we
39860 don't want to do it on memory operand for other builtin
39862 op1 = ix86_zero_extend_to_Pmode (op1);
39864 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39865 op0 = copy_to_mode_reg (mode0, op0);
39866 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39867 op1 = copy_to_mode_reg (Pmode, op1);
39868 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39869 op2 = copy_to_mode_reg (mode2, op2);
39871 op3 = fixup_modeless_constant (op3, mode3);
39873 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39875 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39876 op3 = copy_to_mode_reg (mode3, op3);
39880 op3 = copy_to_reg (op3);
39881 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39883 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39885 error ("the last argument must be scale 1, 2, 4, 8");
39889 /* Optimize. If mask is known to have all high bits set,
39890 replace op0 with pc_rtx to signal that the instruction
39891 overwrites the whole destination and doesn't use its
39892 previous contents. */
39895 if (TREE_CODE (arg3) == INTEGER_CST)
39897 if (integer_all_onesp (arg3))
39900 else if (TREE_CODE (arg3) == VECTOR_CST)
39902 unsigned int negative = 0;
39903 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39905 tree cst = VECTOR_CST_ELT (arg3, i);
39906 if (TREE_CODE (cst) == INTEGER_CST
39907 && tree_int_cst_sign_bit (cst))
39909 else if (TREE_CODE (cst) == REAL_CST
39910 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39913 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39916 else if (TREE_CODE (arg3) == SSA_NAME
39917 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39919 /* Recognize also when mask is like:
39920 __v2df src = _mm_setzero_pd ();
39921 __v2df mask = _mm_cmpeq_pd (src, src);
39923 __v8sf src = _mm256_setzero_ps ();
39924 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39925 as that is a cheaper way to load all ones into
39926 a register than having to load a constant from
39928 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39929 if (is_gimple_call (def_stmt))
39931 tree fndecl = gimple_call_fndecl (def_stmt);
39933 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39934 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39936 case IX86_BUILTIN_CMPPD:
39937 case IX86_BUILTIN_CMPPS:
39938 case IX86_BUILTIN_CMPPD256:
39939 case IX86_BUILTIN_CMPPS256:
39940 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39943 case IX86_BUILTIN_CMPEQPD:
39944 case IX86_BUILTIN_CMPEQPS:
39945 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39946 && initializer_zerop (gimple_call_arg (def_stmt,
39957 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39964 case IX86_BUILTIN_GATHER3DIV16SF:
39965 if (target == NULL_RTX)
39966 target = gen_reg_rtx (V8SFmode);
39967 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39969 case IX86_BUILTIN_GATHER3DIV16SI:
39970 if (target == NULL_RTX)
39971 target = gen_reg_rtx (V8SImode);
39972 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39974 case IX86_BUILTIN_GATHER3DIV8SF:
39975 case IX86_BUILTIN_GATHERDIV8SF:
39976 if (target == NULL_RTX)
39977 target = gen_reg_rtx (V4SFmode);
39978 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39980 case IX86_BUILTIN_GATHER3DIV8SI:
39981 case IX86_BUILTIN_GATHERDIV8SI:
39982 if (target == NULL_RTX)
39983 target = gen_reg_rtx (V4SImode);
39984 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39987 target = subtarget;
39993 arg0 = CALL_EXPR_ARG (exp, 0);
39994 arg1 = CALL_EXPR_ARG (exp, 1);
39995 arg2 = CALL_EXPR_ARG (exp, 2);
39996 arg3 = CALL_EXPR_ARG (exp, 3);
39997 arg4 = CALL_EXPR_ARG (exp, 4);
39998 op0 = expand_normal (arg0);
39999 op1 = expand_normal (arg1);
40000 op2 = expand_normal (arg2);
40001 op3 = expand_normal (arg3);
40002 op4 = expand_normal (arg4);
40003 mode1 = insn_data[icode].operand[1].mode;
40004 mode2 = insn_data[icode].operand[2].mode;
40005 mode3 = insn_data[icode].operand[3].mode;
40006 mode4 = insn_data[icode].operand[4].mode;
40008 /* Force memory operand only with base register here. But we
40009 don't want to do it on memory operand for other builtin
40011 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40013 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40014 op0 = copy_to_mode_reg (Pmode, op0);
40016 op1 = fixup_modeless_constant (op1, mode1);
40018 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40020 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40021 op1 = copy_to_mode_reg (mode1, op1);
40025 op1 = copy_to_reg (op1);
40026 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40029 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40030 op2 = copy_to_mode_reg (mode2, op2);
40032 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40033 op3 = copy_to_mode_reg (mode3, op3);
40035 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40037 error ("the last argument must be scale 1, 2, 4, 8");
40041 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40049 arg0 = CALL_EXPR_ARG (exp, 0);
40050 arg1 = CALL_EXPR_ARG (exp, 1);
40051 arg2 = CALL_EXPR_ARG (exp, 2);
40052 arg3 = CALL_EXPR_ARG (exp, 3);
40053 arg4 = CALL_EXPR_ARG (exp, 4);
40054 op0 = expand_normal (arg0);
40055 op1 = expand_normal (arg1);
40056 op2 = expand_normal (arg2);
40057 op3 = expand_normal (arg3);
40058 op4 = expand_normal (arg4);
40059 mode0 = insn_data[icode].operand[0].mode;
40060 mode1 = insn_data[icode].operand[1].mode;
40061 mode3 = insn_data[icode].operand[3].mode;
40062 mode4 = insn_data[icode].operand[4].mode;
40064 op0 = fixup_modeless_constant (op0, mode0);
40066 if (GET_MODE (op0) == mode0
40067 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40069 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40070 op0 = copy_to_mode_reg (mode0, op0);
40072 else if (op0 != constm1_rtx)
40074 op0 = copy_to_reg (op0);
40075 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40078 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40079 op1 = copy_to_mode_reg (mode1, op1);
40081 /* Force memory operand only with base register here. But we
40082 don't want to do it on memory operand for other builtin
40084 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40086 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40087 op2 = copy_to_mode_reg (Pmode, op2);
40089 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40091 error ("the forth argument must be scale 1, 2, 4, 8");
40095 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40097 error ("incorrect hint operand");
40101 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40109 case IX86_BUILTIN_XABORT:
40110 icode = CODE_FOR_xabort;
40111 arg0 = CALL_EXPR_ARG (exp, 0);
40112 op0 = expand_normal (arg0);
40113 mode0 = insn_data[icode].operand[0].mode;
40114 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40116 error ("the xabort's argument must be an 8-bit immediate");
40119 emit_insn (gen_xabort (op0));
40126 for (i = 0, d = bdesc_special_args;
40127 i < ARRAY_SIZE (bdesc_special_args);
40129 if (d->code == fcode)
40130 return ix86_expand_special_args_builtin (d, exp, target);
40132 for (i = 0, d = bdesc_args;
40133 i < ARRAY_SIZE (bdesc_args);
40135 if (d->code == fcode)
40138 case IX86_BUILTIN_FABSQ:
40139 case IX86_BUILTIN_COPYSIGNQ:
40141 /* Emit a normal call if SSE isn't available. */
40142 return expand_call (exp, target, ignore);
40144 return ix86_expand_args_builtin (d, exp, target);
40147 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40148 if (d->code == fcode)
40149 return ix86_expand_sse_comi (d, exp, target);
40151 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40152 if (d->code == fcode)
40153 return ix86_expand_round_builtin (d, exp, target);
40155 for (i = 0, d = bdesc_pcmpestr;
40156 i < ARRAY_SIZE (bdesc_pcmpestr);
40158 if (d->code == fcode)
40159 return ix86_expand_sse_pcmpestr (d, exp, target);
40161 for (i = 0, d = bdesc_pcmpistr;
40162 i < ARRAY_SIZE (bdesc_pcmpistr);
40164 if (d->code == fcode)
40165 return ix86_expand_sse_pcmpistr (d, exp, target);
40167 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40168 if (d->code == fcode)
40169 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40170 (enum ix86_builtin_func_type)
40171 d->flag, d->comparison);
40173 gcc_unreachable ();
40176 /* This returns the target-specific builtin with code CODE if
40177 current_function_decl has visibility on this builtin, which is checked
40178 using isa flags. Returns NULL_TREE otherwise. */
40180 static tree ix86_get_builtin (enum ix86_builtins code)
40182 struct cl_target_option *opts;
40183 tree target_tree = NULL_TREE;
40185 /* Determine the isa flags of current_function_decl. */
40187 if (current_function_decl)
40188 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40190 if (target_tree == NULL)
40191 target_tree = target_option_default_node;
40193 opts = TREE_TARGET_OPTION (target_tree);
40195 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40196 return ix86_builtin_decl (code, true);
40201 /* Return function decl for target specific builtin
40202 for given MPX builtin passed i FCODE. */
40204 ix86_builtin_mpx_function (unsigned fcode)
40208 case BUILT_IN_CHKP_BNDMK:
40209 return ix86_builtins[IX86_BUILTIN_BNDMK];
40211 case BUILT_IN_CHKP_BNDSTX:
40212 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40214 case BUILT_IN_CHKP_BNDLDX:
40215 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40217 case BUILT_IN_CHKP_BNDCL:
40218 return ix86_builtins[IX86_BUILTIN_BNDCL];
40220 case BUILT_IN_CHKP_BNDCU:
40221 return ix86_builtins[IX86_BUILTIN_BNDCU];
40223 case BUILT_IN_CHKP_BNDRET:
40224 return ix86_builtins[IX86_BUILTIN_BNDRET];
40226 case BUILT_IN_CHKP_INTERSECT:
40227 return ix86_builtins[IX86_BUILTIN_BNDINT];
40229 case BUILT_IN_CHKP_NARROW:
40230 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40232 case BUILT_IN_CHKP_SIZEOF:
40233 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40235 case BUILT_IN_CHKP_EXTRACT_LOWER:
40236 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40238 case BUILT_IN_CHKP_EXTRACT_UPPER:
40239 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40245 gcc_unreachable ();
40248 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40250 Return an address to be used to load/store bounds for pointer
40253 SLOT_NO is an integer constant holding number of a target
40254 dependent special slot to be used in case SLOT is not a memory.
40256 SPECIAL_BASE is a pointer to be used as a base of fake address
40257 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40258 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40261 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40265 /* NULL slot means we pass bounds for pointer not passed to the
40266 function at all. Register slot means we pass pointer in a
40267 register. In both these cases bounds are passed via Bounds
40268 Table. Since we do not have actual pointer stored in memory,
40269 we have to use fake addresses to access Bounds Table. We
40270 start with (special_base - sizeof (void*)) and decrease this
40271 address by pointer size to get addresses for other slots. */
40272 if (!slot || REG_P (slot))
40274 gcc_assert (CONST_INT_P (slot_no));
40275 addr = plus_constant (Pmode, special_base,
40276 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40278 /* If pointer is passed in a memory then its address is used to
40279 access Bounds Table. */
40280 else if (MEM_P (slot))
40282 addr = XEXP (slot, 0);
40283 if (!register_operand (addr, Pmode))
40284 addr = copy_addr_to_reg (addr);
40287 gcc_unreachable ();
40292 /* Expand pass uses this hook to load bounds for function parameter
40293 PTR passed in SLOT in case its bounds are not passed in a register.
40295 If SLOT is a memory, then bounds are loaded as for regular pointer
40296 loaded from memory. PTR may be NULL in case SLOT is a memory.
40297 In such case value of PTR (if required) may be loaded from SLOT.
40299 If SLOT is NULL or a register then SLOT_NO is an integer constant
40300 holding number of the target dependent special slot which should be
40301 used to obtain bounds.
40303 Return loaded bounds. */
40306 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40308 rtx reg = gen_reg_rtx (BNDmode);
40311 /* Get address to be used to access Bounds Table. Special slots start
40312 at the location of return address of the current function. */
40313 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40315 /* Load pointer value from a memory if we don't have it. */
40318 gcc_assert (MEM_P (slot));
40319 ptr = copy_addr_to_reg (slot);
40322 emit_insn (BNDmode == BND64mode
40323 ? gen_bnd64_ldx (reg, addr, ptr)
40324 : gen_bnd32_ldx (reg, addr, ptr));
40329 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40330 passed in SLOT in case BOUNDS are not passed in a register.
40332 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40333 stored in memory. PTR may be NULL in case SLOT is a memory.
40334 In such case value of PTR (if required) may be loaded from SLOT.
40336 If SLOT is NULL or a register then SLOT_NO is an integer constant
40337 holding number of the target dependent special slot which should be
40338 used to store BOUNDS. */
40341 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40345 /* Get address to be used to access Bounds Table. Special slots start
40346 at the location of return address of a called function. */
40347 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40349 /* Load pointer value from a memory if we don't have it. */
40352 gcc_assert (MEM_P (slot));
40353 ptr = copy_addr_to_reg (slot);
40356 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40357 if (!register_operand (bounds, BNDmode))
40358 bounds = copy_to_mode_reg (BNDmode, bounds);
40360 emit_insn (BNDmode == BND64mode
40361 ? gen_bnd64_stx (addr, ptr, bounds)
40362 : gen_bnd32_stx (addr, ptr, bounds));
40365 /* Load and return bounds returned by function in SLOT. */
40368 ix86_load_returned_bounds (rtx slot)
40372 gcc_assert (REG_P (slot));
40373 res = gen_reg_rtx (BNDmode);
40374 emit_move_insn (res, slot);
40379 /* Store BOUNDS returned by function into SLOT. */
40382 ix86_store_returned_bounds (rtx slot, rtx bounds)
40384 gcc_assert (REG_P (slot));
40385 emit_move_insn (slot, bounds);
40388 /* Returns a function decl for a vectorized version of the builtin function
40389 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40390 if it is not available. */
40393 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40396 machine_mode in_mode, out_mode;
40398 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40400 if (TREE_CODE (type_out) != VECTOR_TYPE
40401 || TREE_CODE (type_in) != VECTOR_TYPE
40402 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40405 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40406 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40407 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40408 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40412 case BUILT_IN_SQRT:
40413 if (out_mode == DFmode && in_mode == DFmode)
40415 if (out_n == 2 && in_n == 2)
40416 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40417 else if (out_n == 4 && in_n == 4)
40418 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40419 else if (out_n == 8 && in_n == 8)
40420 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40424 case BUILT_IN_EXP2F:
40425 if (out_mode == SFmode && in_mode == SFmode)
40427 if (out_n == 16 && in_n == 16)
40428 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40432 case BUILT_IN_SQRTF:
40433 if (out_mode == SFmode && in_mode == SFmode)
40435 if (out_n == 4 && in_n == 4)
40436 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40437 else if (out_n == 8 && in_n == 8)
40438 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40439 else if (out_n == 16 && in_n == 16)
40440 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40444 case BUILT_IN_IFLOOR:
40445 case BUILT_IN_LFLOOR:
40446 case BUILT_IN_LLFLOOR:
40447 /* The round insn does not trap on denormals. */
40448 if (flag_trapping_math || !TARGET_ROUND)
40451 if (out_mode == SImode && in_mode == DFmode)
40453 if (out_n == 4 && in_n == 2)
40454 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40455 else if (out_n == 8 && in_n == 4)
40456 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40457 else if (out_n == 16 && in_n == 8)
40458 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40462 case BUILT_IN_IFLOORF:
40463 case BUILT_IN_LFLOORF:
40464 case BUILT_IN_LLFLOORF:
40465 /* The round insn does not trap on denormals. */
40466 if (flag_trapping_math || !TARGET_ROUND)
40469 if (out_mode == SImode && in_mode == SFmode)
40471 if (out_n == 4 && in_n == 4)
40472 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40473 else if (out_n == 8 && in_n == 8)
40474 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40478 case BUILT_IN_ICEIL:
40479 case BUILT_IN_LCEIL:
40480 case BUILT_IN_LLCEIL:
40481 /* The round insn does not trap on denormals. */
40482 if (flag_trapping_math || !TARGET_ROUND)
40485 if (out_mode == SImode && in_mode == DFmode)
40487 if (out_n == 4 && in_n == 2)
40488 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40489 else if (out_n == 8 && in_n == 4)
40490 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40491 else if (out_n == 16 && in_n == 8)
40492 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40496 case BUILT_IN_ICEILF:
40497 case BUILT_IN_LCEILF:
40498 case BUILT_IN_LLCEILF:
40499 /* The round insn does not trap on denormals. */
40500 if (flag_trapping_math || !TARGET_ROUND)
40503 if (out_mode == SImode && in_mode == SFmode)
40505 if (out_n == 4 && in_n == 4)
40506 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40507 else if (out_n == 8 && in_n == 8)
40508 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40512 case BUILT_IN_IRINT:
40513 case BUILT_IN_LRINT:
40514 case BUILT_IN_LLRINT:
40515 if (out_mode == SImode && in_mode == DFmode)
40517 if (out_n == 4 && in_n == 2)
40518 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40519 else if (out_n == 8 && in_n == 4)
40520 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40524 case BUILT_IN_IRINTF:
40525 case BUILT_IN_LRINTF:
40526 case BUILT_IN_LLRINTF:
40527 if (out_mode == SImode && in_mode == SFmode)
40529 if (out_n == 4 && in_n == 4)
40530 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40531 else if (out_n == 8 && in_n == 8)
40532 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40536 case BUILT_IN_IROUND:
40537 case BUILT_IN_LROUND:
40538 case BUILT_IN_LLROUND:
40539 /* The round insn does not trap on denormals. */
40540 if (flag_trapping_math || !TARGET_ROUND)
40543 if (out_mode == SImode && in_mode == DFmode)
40545 if (out_n == 4 && in_n == 2)
40546 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40547 else if (out_n == 8 && in_n == 4)
40548 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40549 else if (out_n == 16 && in_n == 8)
40550 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40554 case BUILT_IN_IROUNDF:
40555 case BUILT_IN_LROUNDF:
40556 case BUILT_IN_LLROUNDF:
40557 /* The round insn does not trap on denormals. */
40558 if (flag_trapping_math || !TARGET_ROUND)
40561 if (out_mode == SImode && in_mode == SFmode)
40563 if (out_n == 4 && in_n == 4)
40564 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40565 else if (out_n == 8 && in_n == 8)
40566 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40570 case BUILT_IN_COPYSIGN:
40571 if (out_mode == DFmode && in_mode == DFmode)
40573 if (out_n == 2 && in_n == 2)
40574 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40575 else if (out_n == 4 && in_n == 4)
40576 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40577 else if (out_n == 8 && in_n == 8)
40578 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40582 case BUILT_IN_COPYSIGNF:
40583 if (out_mode == SFmode && in_mode == SFmode)
40585 if (out_n == 4 && in_n == 4)
40586 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40587 else if (out_n == 8 && in_n == 8)
40588 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40589 else if (out_n == 16 && in_n == 16)
40590 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40594 case BUILT_IN_FLOOR:
40595 /* The round insn does not trap on denormals. */
40596 if (flag_trapping_math || !TARGET_ROUND)
40599 if (out_mode == DFmode && in_mode == DFmode)
40601 if (out_n == 2 && in_n == 2)
40602 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40603 else if (out_n == 4 && in_n == 4)
40604 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40608 case BUILT_IN_FLOORF:
40609 /* The round insn does not trap on denormals. */
40610 if (flag_trapping_math || !TARGET_ROUND)
40613 if (out_mode == SFmode && in_mode == SFmode)
40615 if (out_n == 4 && in_n == 4)
40616 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40617 else if (out_n == 8 && in_n == 8)
40618 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40622 case BUILT_IN_CEIL:
40623 /* The round insn does not trap on denormals. */
40624 if (flag_trapping_math || !TARGET_ROUND)
40627 if (out_mode == DFmode && in_mode == DFmode)
40629 if (out_n == 2 && in_n == 2)
40630 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40631 else if (out_n == 4 && in_n == 4)
40632 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40636 case BUILT_IN_CEILF:
40637 /* The round insn does not trap on denormals. */
40638 if (flag_trapping_math || !TARGET_ROUND)
40641 if (out_mode == SFmode && in_mode == SFmode)
40643 if (out_n == 4 && in_n == 4)
40644 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40645 else if (out_n == 8 && in_n == 8)
40646 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40650 case BUILT_IN_TRUNC:
40651 /* The round insn does not trap on denormals. */
40652 if (flag_trapping_math || !TARGET_ROUND)
40655 if (out_mode == DFmode && in_mode == DFmode)
40657 if (out_n == 2 && in_n == 2)
40658 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40659 else if (out_n == 4 && in_n == 4)
40660 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40664 case BUILT_IN_TRUNCF:
40665 /* The round insn does not trap on denormals. */
40666 if (flag_trapping_math || !TARGET_ROUND)
40669 if (out_mode == SFmode && in_mode == SFmode)
40671 if (out_n == 4 && in_n == 4)
40672 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40673 else if (out_n == 8 && in_n == 8)
40674 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40678 case BUILT_IN_RINT:
40679 /* The round insn does not trap on denormals. */
40680 if (flag_trapping_math || !TARGET_ROUND)
40683 if (out_mode == DFmode && in_mode == DFmode)
40685 if (out_n == 2 && in_n == 2)
40686 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40687 else if (out_n == 4 && in_n == 4)
40688 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40692 case BUILT_IN_RINTF:
40693 /* The round insn does not trap on denormals. */
40694 if (flag_trapping_math || !TARGET_ROUND)
40697 if (out_mode == SFmode && in_mode == SFmode)
40699 if (out_n == 4 && in_n == 4)
40700 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40701 else if (out_n == 8 && in_n == 8)
40702 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40706 case BUILT_IN_ROUND:
40707 /* The round insn does not trap on denormals. */
40708 if (flag_trapping_math || !TARGET_ROUND)
40711 if (out_mode == DFmode && in_mode == DFmode)
40713 if (out_n == 2 && in_n == 2)
40714 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40715 else if (out_n == 4 && in_n == 4)
40716 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40720 case BUILT_IN_ROUNDF:
40721 /* The round insn does not trap on denormals. */
40722 if (flag_trapping_math || !TARGET_ROUND)
40725 if (out_mode == SFmode && in_mode == SFmode)
40727 if (out_n == 4 && in_n == 4)
40728 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40729 else if (out_n == 8 && in_n == 8)
40730 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40735 if (out_mode == DFmode && in_mode == DFmode)
40737 if (out_n == 2 && in_n == 2)
40738 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40739 if (out_n == 4 && in_n == 4)
40740 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40744 case BUILT_IN_FMAF:
40745 if (out_mode == SFmode && in_mode == SFmode)
40747 if (out_n == 4 && in_n == 4)
40748 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40749 if (out_n == 8 && in_n == 8)
40750 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40758 /* Dispatch to a handler for a vectorization library. */
40759 if (ix86_veclib_handler)
40760 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40766 /* Handler for an SVML-style interface to
40767 a library with vectorized intrinsics. */
40770 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40773 tree fntype, new_fndecl, args;
40776 machine_mode el_mode, in_mode;
40779 /* The SVML is suitable for unsafe math only. */
40780 if (!flag_unsafe_math_optimizations)
40783 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40784 n = TYPE_VECTOR_SUBPARTS (type_out);
40785 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40786 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40787 if (el_mode != in_mode
40795 case BUILT_IN_LOG10:
40797 case BUILT_IN_TANH:
40799 case BUILT_IN_ATAN:
40800 case BUILT_IN_ATAN2:
40801 case BUILT_IN_ATANH:
40802 case BUILT_IN_CBRT:
40803 case BUILT_IN_SINH:
40805 case BUILT_IN_ASINH:
40806 case BUILT_IN_ASIN:
40807 case BUILT_IN_COSH:
40809 case BUILT_IN_ACOSH:
40810 case BUILT_IN_ACOS:
40811 if (el_mode != DFmode || n != 2)
40815 case BUILT_IN_EXPF:
40816 case BUILT_IN_LOGF:
40817 case BUILT_IN_LOG10F:
40818 case BUILT_IN_POWF:
40819 case BUILT_IN_TANHF:
40820 case BUILT_IN_TANF:
40821 case BUILT_IN_ATANF:
40822 case BUILT_IN_ATAN2F:
40823 case BUILT_IN_ATANHF:
40824 case BUILT_IN_CBRTF:
40825 case BUILT_IN_SINHF:
40826 case BUILT_IN_SINF:
40827 case BUILT_IN_ASINHF:
40828 case BUILT_IN_ASINF:
40829 case BUILT_IN_COSHF:
40830 case BUILT_IN_COSF:
40831 case BUILT_IN_ACOSHF:
40832 case BUILT_IN_ACOSF:
40833 if (el_mode != SFmode || n != 4)
40841 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40843 if (fn == BUILT_IN_LOGF)
40844 strcpy (name, "vmlsLn4");
40845 else if (fn == BUILT_IN_LOG)
40846 strcpy (name, "vmldLn2");
40849 sprintf (name, "vmls%s", bname+10);
40850 name[strlen (name)-1] = '4';
40853 sprintf (name, "vmld%s2", bname+10);
40855 /* Convert to uppercase. */
40859 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40861 args = TREE_CHAIN (args))
40865 fntype = build_function_type_list (type_out, type_in, NULL);
40867 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40869 /* Build a function declaration for the vectorized function. */
40870 new_fndecl = build_decl (BUILTINS_LOCATION,
40871 FUNCTION_DECL, get_identifier (name), fntype);
40872 TREE_PUBLIC (new_fndecl) = 1;
40873 DECL_EXTERNAL (new_fndecl) = 1;
40874 DECL_IS_NOVOPS (new_fndecl) = 1;
40875 TREE_READONLY (new_fndecl) = 1;
40880 /* Handler for an ACML-style interface to
40881 a library with vectorized intrinsics. */
40884 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40886 char name[20] = "__vr.._";
40887 tree fntype, new_fndecl, args;
40890 machine_mode el_mode, in_mode;
40893 /* The ACML is 64bits only and suitable for unsafe math only as
40894 it does not correctly support parts of IEEE with the required
40895 precision such as denormals. */
40897 || !flag_unsafe_math_optimizations)
40900 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40901 n = TYPE_VECTOR_SUBPARTS (type_out);
40902 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40903 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40904 if (el_mode != in_mode
40914 case BUILT_IN_LOG2:
40915 case BUILT_IN_LOG10:
40918 if (el_mode != DFmode
40923 case BUILT_IN_SINF:
40924 case BUILT_IN_COSF:
40925 case BUILT_IN_EXPF:
40926 case BUILT_IN_POWF:
40927 case BUILT_IN_LOGF:
40928 case BUILT_IN_LOG2F:
40929 case BUILT_IN_LOG10F:
40932 if (el_mode != SFmode
40941 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40942 sprintf (name + 7, "%s", bname+10);
40945 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40947 args = TREE_CHAIN (args))
40951 fntype = build_function_type_list (type_out, type_in, NULL);
40953 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40955 /* Build a function declaration for the vectorized function. */
40956 new_fndecl = build_decl (BUILTINS_LOCATION,
40957 FUNCTION_DECL, get_identifier (name), fntype);
40958 TREE_PUBLIC (new_fndecl) = 1;
40959 DECL_EXTERNAL (new_fndecl) = 1;
40960 DECL_IS_NOVOPS (new_fndecl) = 1;
40961 TREE_READONLY (new_fndecl) = 1;
40966 /* Returns a decl of a function that implements gather load with
40967 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40968 Return NULL_TREE if it is not available. */
40971 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40972 const_tree index_type, int scale)
40975 enum ix86_builtins code;
40980 if ((TREE_CODE (index_type) != INTEGER_TYPE
40981 && !POINTER_TYPE_P (index_type))
40982 || (TYPE_MODE (index_type) != SImode
40983 && TYPE_MODE (index_type) != DImode))
40986 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40989 /* v*gather* insn sign extends index to pointer mode. */
40990 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40991 && TYPE_UNSIGNED (index_type))
40996 || (scale & (scale - 1)) != 0)
40999 si = TYPE_MODE (index_type) == SImode;
41000 switch (TYPE_MODE (mem_vectype))
41003 if (TARGET_AVX512VL)
41004 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41006 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41009 if (TARGET_AVX512VL)
41010 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41012 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41015 if (TARGET_AVX512VL)
41016 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41018 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41021 if (TARGET_AVX512VL)
41022 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41024 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41027 if (TARGET_AVX512VL)
41028 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41030 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41033 if (TARGET_AVX512VL)
41034 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41036 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41039 if (TARGET_AVX512VL)
41040 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41042 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41045 if (TARGET_AVX512VL)
41046 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41048 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41051 if (TARGET_AVX512F)
41052 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41057 if (TARGET_AVX512F)
41058 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41063 if (TARGET_AVX512F)
41064 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41069 if (TARGET_AVX512F)
41070 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41078 return ix86_get_builtin (code);
41081 /* Returns a code for a target-specific builtin that implements
41082 reciprocal of the function, or NULL_TREE if not available. */
41085 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41087 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41088 && flag_finite_math_only && !flag_trapping_math
41089 && flag_unsafe_math_optimizations))
41093 /* Machine dependent builtins. */
41096 /* Vectorized version of sqrt to rsqrt conversion. */
41097 case IX86_BUILTIN_SQRTPS_NR:
41098 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41100 case IX86_BUILTIN_SQRTPS_NR256:
41101 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41107 /* Normal builtins. */
41110 /* Sqrt to rsqrt conversion. */
41111 case BUILT_IN_SQRTF:
41112 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41119 /* Helper for avx_vpermilps256_operand et al. This is also used by
41120 the expansion functions to turn the parallel back into a mask.
41121 The return value is 0 for no match and the imm8+1 for a match. */
41124 avx_vpermilp_parallel (rtx par, machine_mode mode)
41126 unsigned i, nelt = GET_MODE_NUNITS (mode);
41128 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41130 if (XVECLEN (par, 0) != (int) nelt)
41133 /* Validate that all of the elements are constants, and not totally
41134 out of range. Copy the data into an integral array to make the
41135 subsequent checks easier. */
41136 for (i = 0; i < nelt; ++i)
41138 rtx er = XVECEXP (par, 0, i);
41139 unsigned HOST_WIDE_INT ei;
41141 if (!CONST_INT_P (er))
41152 /* In the 512-bit DFmode case, we can only move elements within
41153 a 128-bit lane. First fill the second part of the mask,
41155 for (i = 4; i < 6; ++i)
41157 if (ipar[i] < 4 || ipar[i] >= 6)
41159 mask |= (ipar[i] - 4) << i;
41161 for (i = 6; i < 8; ++i)
41165 mask |= (ipar[i] - 6) << i;
41170 /* In the 256-bit DFmode case, we can only move elements within
41172 for (i = 0; i < 2; ++i)
41176 mask |= ipar[i] << i;
41178 for (i = 2; i < 4; ++i)
41182 mask |= (ipar[i] - 2) << i;
41187 /* In 512 bit SFmode case, permutation in the upper 256 bits
41188 must mirror the permutation in the lower 256-bits. */
41189 for (i = 0; i < 8; ++i)
41190 if (ipar[i] + 8 != ipar[i + 8])
41195 /* In 256 bit SFmode case, we have full freedom of
41196 movement within the low 128-bit lane, but the high 128-bit
41197 lane must mirror the exact same pattern. */
41198 for (i = 0; i < 4; ++i)
41199 if (ipar[i] + 4 != ipar[i + 4])
41206 /* In the 128-bit case, we've full freedom in the placement of
41207 the elements from the source operand. */
41208 for (i = 0; i < nelt; ++i)
41209 mask |= ipar[i] << (i * (nelt / 2));
41213 gcc_unreachable ();
41216 /* Make sure success has a non-zero value by adding one. */
41220 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41221 the expansion functions to turn the parallel back into a mask.
41222 The return value is 0 for no match and the imm8+1 for a match. */
41225 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41227 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41229 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41231 if (XVECLEN (par, 0) != (int) nelt)
41234 /* Validate that all of the elements are constants, and not totally
41235 out of range. Copy the data into an integral array to make the
41236 subsequent checks easier. */
41237 for (i = 0; i < nelt; ++i)
41239 rtx er = XVECEXP (par, 0, i);
41240 unsigned HOST_WIDE_INT ei;
41242 if (!CONST_INT_P (er))
41245 if (ei >= 2 * nelt)
41250 /* Validate that the halves of the permute are halves. */
41251 for (i = 0; i < nelt2 - 1; ++i)
41252 if (ipar[i] + 1 != ipar[i + 1])
41254 for (i = nelt2; i < nelt - 1; ++i)
41255 if (ipar[i] + 1 != ipar[i + 1])
41258 /* Reconstruct the mask. */
41259 for (i = 0; i < 2; ++i)
41261 unsigned e = ipar[i * nelt2];
41265 mask |= e << (i * 4);
41268 /* Make sure success has a non-zero value by adding one. */
41272 /* Return a register priority for hard reg REGNO. */
41274 ix86_register_priority (int hard_regno)
41276 /* ebp and r13 as the base always wants a displacement, r12 as the
41277 base always wants an index. So discourage their usage in an
41279 if (hard_regno == R12_REG || hard_regno == R13_REG)
41281 if (hard_regno == BP_REG)
41283 /* New x86-64 int registers result in bigger code size. Discourage
41285 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41287 /* New x86-64 SSE registers result in bigger code size. Discourage
41289 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41291 /* Usage of AX register results in smaller code. Prefer it. */
41292 if (hard_regno == AX_REG)
41297 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41299 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41300 QImode must go into class Q_REGS.
41301 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41302 movdf to do mem-to-mem moves through integer regs. */
41305 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41307 machine_mode mode = GET_MODE (x);
41309 /* We're only allowed to return a subclass of CLASS. Many of the
41310 following checks fail for NO_REGS, so eliminate that early. */
41311 if (regclass == NO_REGS)
41314 /* All classes can load zeros. */
41315 if (x == CONST0_RTX (mode))
41318 /* Force constants into memory if we are loading a (nonzero) constant into
41319 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41320 instructions to load from a constant. */
41322 && (MAYBE_MMX_CLASS_P (regclass)
41323 || MAYBE_SSE_CLASS_P (regclass)
41324 || MAYBE_MASK_CLASS_P (regclass)))
41327 /* Prefer SSE regs only, if we can use them for math. */
41328 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41329 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41331 /* Floating-point constants need more complex checks. */
41332 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41334 /* General regs can load everything. */
41335 if (reg_class_subset_p (regclass, GENERAL_REGS))
41338 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41339 zero above. We only want to wind up preferring 80387 registers if
41340 we plan on doing computation with them. */
41342 && standard_80387_constant_p (x) > 0)
41344 /* Limit class to non-sse. */
41345 if (regclass == FLOAT_SSE_REGS)
41347 if (regclass == FP_TOP_SSE_REGS)
41349 if (regclass == FP_SECOND_SSE_REGS)
41350 return FP_SECOND_REG;
41351 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41358 /* Generally when we see PLUS here, it's the function invariant
41359 (plus soft-fp const_int). Which can only be computed into general
41361 if (GET_CODE (x) == PLUS)
41362 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41364 /* QImode constants are easy to load, but non-constant QImode data
41365 must go into Q_REGS. */
41366 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41368 if (reg_class_subset_p (regclass, Q_REGS))
41370 if (reg_class_subset_p (Q_REGS, regclass))
41378 /* Discourage putting floating-point values in SSE registers unless
41379 SSE math is being used, and likewise for the 387 registers. */
41381 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41383 machine_mode mode = GET_MODE (x);
41385 /* Restrict the output reload class to the register bank that we are doing
41386 math on. If we would like not to return a subset of CLASS, reject this
41387 alternative: if reload cannot do this, it will still use its choice. */
41388 mode = GET_MODE (x);
41389 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41390 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41392 if (X87_FLOAT_MODE_P (mode))
41394 if (regclass == FP_TOP_SSE_REGS)
41396 else if (regclass == FP_SECOND_SSE_REGS)
41397 return FP_SECOND_REG;
41399 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41406 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41407 machine_mode mode, secondary_reload_info *sri)
41409 /* Double-word spills from general registers to non-offsettable memory
41410 references (zero-extended addresses) require special handling. */
41413 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41414 && INTEGER_CLASS_P (rclass)
41415 && !offsettable_memref_p (x))
41418 ? CODE_FOR_reload_noff_load
41419 : CODE_FOR_reload_noff_store);
41420 /* Add the cost of moving address to a temporary. */
41421 sri->extra_cost = 1;
41426 /* QImode spills from non-QI registers require
41427 intermediate register on 32bit targets. */
41429 && (MAYBE_MASK_CLASS_P (rclass)
41430 || (!TARGET_64BIT && !in_p
41431 && INTEGER_CLASS_P (rclass)
41432 && MAYBE_NON_Q_CLASS_P (rclass))))
41441 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41442 regno = true_regnum (x);
41444 /* Return Q_REGS if the operand is in memory. */
41449 /* This condition handles corner case where an expression involving
41450 pointers gets vectorized. We're trying to use the address of a
41451 stack slot as a vector initializer.
41453 (set (reg:V2DI 74 [ vect_cst_.2 ])
41454 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41456 Eventually frame gets turned into sp+offset like this:
41458 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41459 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41460 (const_int 392 [0x188]))))
41462 That later gets turned into:
41464 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41465 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41466 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41468 We'll have the following reload recorded:
41470 Reload 0: reload_in (DI) =
41471 (plus:DI (reg/f:DI 7 sp)
41472 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41473 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41474 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41475 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41476 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41477 reload_reg_rtx: (reg:V2DI 22 xmm1)
41479 Which isn't going to work since SSE instructions can't handle scalar
41480 additions. Returning GENERAL_REGS forces the addition into integer
41481 register and reload can handle subsequent reloads without problems. */
41483 if (in_p && GET_CODE (x) == PLUS
41484 && SSE_CLASS_P (rclass)
41485 && SCALAR_INT_MODE_P (mode))
41486 return GENERAL_REGS;
41491 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41494 ix86_class_likely_spilled_p (reg_class_t rclass)
41505 case SSE_FIRST_REG:
41507 case FP_SECOND_REG:
41518 /* If we are copying between general and FP registers, we need a memory
41519 location. The same is true for SSE and MMX registers.
41521 To optimize register_move_cost performance, allow inline variant.
41523 The macro can't work reliably when one of the CLASSES is class containing
41524 registers from multiple units (SSE, MMX, integer). We avoid this by never
41525 combining those units in single alternative in the machine description.
41526 Ensure that this constraint holds to avoid unexpected surprises.
41528 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41529 enforce these sanity checks. */
41532 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41533 machine_mode mode, int strict)
41535 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41537 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41538 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41539 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41540 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41541 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41542 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41544 gcc_assert (!strict || lra_in_progress);
41548 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41551 /* Between mask and general, we have moves no larger than word size. */
41552 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41553 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41556 /* ??? This is a lie. We do have moves between mmx/general, and for
41557 mmx/sse2. But by saying we need secondary memory we discourage the
41558 register allocator from using the mmx registers unless needed. */
41559 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41562 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41564 /* SSE1 doesn't have any direct moves from other classes. */
41568 /* If the target says that inter-unit moves are more expensive
41569 than moving through memory, then don't generate them. */
41570 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41571 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41574 /* Between SSE and general, we have moves no larger than word size. */
41575 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41583 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41584 machine_mode mode, int strict)
41586 return inline_secondary_memory_needed (class1, class2, mode, strict);
41589 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41591 On the 80386, this is the size of MODE in words,
41592 except in the FP regs, where a single reg is always enough. */
41594 static unsigned char
41595 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41597 if (MAYBE_INTEGER_CLASS_P (rclass))
41599 if (mode == XFmode)
41600 return (TARGET_64BIT ? 2 : 3);
41601 else if (mode == XCmode)
41602 return (TARGET_64BIT ? 4 : 6);
41604 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41608 if (COMPLEX_MODE_P (mode))
41615 /* Return true if the registers in CLASS cannot represent the change from
41616 modes FROM to TO. */
41619 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41620 enum reg_class regclass)
41625 /* x87 registers can't do subreg at all, as all values are reformatted
41626 to extended precision. */
41627 if (MAYBE_FLOAT_CLASS_P (regclass))
41630 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41632 /* Vector registers do not support QI or HImode loads. If we don't
41633 disallow a change to these modes, reload will assume it's ok to
41634 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41635 the vec_dupv4hi pattern. */
41636 if (GET_MODE_SIZE (from) < 4)
41643 /* Return the cost of moving data of mode M between a
41644 register and memory. A value of 2 is the default; this cost is
41645 relative to those in `REGISTER_MOVE_COST'.
41647 This function is used extensively by register_move_cost that is used to
41648 build tables at startup. Make it inline in this case.
41649 When IN is 2, return maximum of in and out move cost.
41651 If moving between registers and memory is more expensive than
41652 between two registers, you should define this macro to express the
41655 Model also increased moving costs of QImode registers in non
41659 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41663 if (FLOAT_CLASS_P (regclass))
41681 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41682 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41684 if (SSE_CLASS_P (regclass))
41687 switch (GET_MODE_SIZE (mode))
41702 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41703 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41705 if (MMX_CLASS_P (regclass))
41708 switch (GET_MODE_SIZE (mode))
41720 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41721 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41723 switch (GET_MODE_SIZE (mode))
41726 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41729 return ix86_cost->int_store[0];
41730 if (TARGET_PARTIAL_REG_DEPENDENCY
41731 && optimize_function_for_speed_p (cfun))
41732 cost = ix86_cost->movzbl_load;
41734 cost = ix86_cost->int_load[0];
41736 return MAX (cost, ix86_cost->int_store[0]);
41742 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41744 return ix86_cost->movzbl_load;
41746 return ix86_cost->int_store[0] + 4;
41751 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41752 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41754 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41755 if (mode == TFmode)
41758 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41760 cost = ix86_cost->int_load[2];
41762 cost = ix86_cost->int_store[2];
41763 return (cost * (((int) GET_MODE_SIZE (mode)
41764 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41769 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41772 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41776 /* Return the cost of moving data from a register in class CLASS1 to
41777 one in class CLASS2.
41779 It is not required that the cost always equal 2 when FROM is the same as TO;
41780 on some machines it is expensive to move between registers if they are not
41781 general registers. */
41784 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41785 reg_class_t class2_i)
41787 enum reg_class class1 = (enum reg_class) class1_i;
41788 enum reg_class class2 = (enum reg_class) class2_i;
41790 /* In case we require secondary memory, compute cost of the store followed
41791 by load. In order to avoid bad register allocation choices, we need
41792 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41794 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41798 cost += inline_memory_move_cost (mode, class1, 2);
41799 cost += inline_memory_move_cost (mode, class2, 2);
41801 /* In case of copying from general_purpose_register we may emit multiple
41802 stores followed by single load causing memory size mismatch stall.
41803 Count this as arbitrarily high cost of 20. */
41804 if (targetm.class_max_nregs (class1, mode)
41805 > targetm.class_max_nregs (class2, mode))
41808 /* In the case of FP/MMX moves, the registers actually overlap, and we
41809 have to switch modes in order to treat them differently. */
41810 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41811 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41817 /* Moves between SSE/MMX and integer unit are expensive. */
41818 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41819 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41821 /* ??? By keeping returned value relatively high, we limit the number
41822 of moves between integer and MMX/SSE registers for all targets.
41823 Additionally, high value prevents problem with x86_modes_tieable_p(),
41824 where integer modes in MMX/SSE registers are not tieable
41825 because of missing QImode and HImode moves to, from or between
41826 MMX/SSE registers. */
41827 return MAX (8, ix86_cost->mmxsse_to_integer);
41829 if (MAYBE_FLOAT_CLASS_P (class1))
41830 return ix86_cost->fp_move;
41831 if (MAYBE_SSE_CLASS_P (class1))
41832 return ix86_cost->sse_move;
41833 if (MAYBE_MMX_CLASS_P (class1))
41834 return ix86_cost->mmx_move;
41838 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41842 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41844 /* Flags and only flags can only hold CCmode values. */
41845 if (CC_REGNO_P (regno))
41846 return GET_MODE_CLASS (mode) == MODE_CC;
41847 if (GET_MODE_CLASS (mode) == MODE_CC
41848 || GET_MODE_CLASS (mode) == MODE_RANDOM
41849 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41851 if (STACK_REGNO_P (regno))
41852 return VALID_FP_MODE_P (mode);
41853 if (MASK_REGNO_P (regno))
41854 return (VALID_MASK_REG_MODE (mode)
41855 || (TARGET_AVX512BW
41856 && VALID_MASK_AVX512BW_MODE (mode)));
41857 if (BND_REGNO_P (regno))
41858 return VALID_BND_REG_MODE (mode);
41859 if (SSE_REGNO_P (regno))
41861 /* We implement the move patterns for all vector modes into and
41862 out of SSE registers, even when no operation instructions
41865 /* For AVX-512 we allow, regardless of regno:
41867 - any of 512-bit wide vector mode
41868 - any scalar mode. */
41871 || VALID_AVX512F_REG_MODE (mode)
41872 || VALID_AVX512F_SCALAR_MODE (mode)))
41875 /* TODO check for QI/HI scalars. */
41876 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41877 if (TARGET_AVX512VL
41880 || VALID_AVX256_REG_MODE (mode)
41881 || VALID_AVX512VL_128_REG_MODE (mode)))
41884 /* xmm16-xmm31 are only available for AVX-512. */
41885 if (EXT_REX_SSE_REGNO_P (regno))
41888 /* OImode and AVX modes are available only when AVX is enabled. */
41889 return ((TARGET_AVX
41890 && VALID_AVX256_REG_OR_OI_MODE (mode))
41891 || VALID_SSE_REG_MODE (mode)
41892 || VALID_SSE2_REG_MODE (mode)
41893 || VALID_MMX_REG_MODE (mode)
41894 || VALID_MMX_REG_MODE_3DNOW (mode));
41896 if (MMX_REGNO_P (regno))
41898 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41899 so if the register is available at all, then we can move data of
41900 the given mode into or out of it. */
41901 return (VALID_MMX_REG_MODE (mode)
41902 || VALID_MMX_REG_MODE_3DNOW (mode));
41905 if (mode == QImode)
41907 /* Take care for QImode values - they can be in non-QI regs,
41908 but then they do cause partial register stalls. */
41909 if (ANY_QI_REGNO_P (regno))
41911 if (!TARGET_PARTIAL_REG_STALL)
41913 /* LRA checks if the hard register is OK for the given mode.
41914 QImode values can live in non-QI regs, so we allow all
41916 if (lra_in_progress)
41918 return !can_create_pseudo_p ();
41920 /* We handle both integer and floats in the general purpose registers. */
41921 else if (VALID_INT_MODE_P (mode))
41923 else if (VALID_FP_MODE_P (mode))
41925 else if (VALID_DFP_MODE_P (mode))
41927 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41928 on to use that value in smaller contexts, this can easily force a
41929 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41930 supporting DImode, allow it. */
41931 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41937 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41938 tieable integer mode. */
41941 ix86_tieable_integer_mode_p (machine_mode mode)
41950 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41953 return TARGET_64BIT;
41960 /* Return true if MODE1 is accessible in a register that can hold MODE2
41961 without copying. That is, all register classes that can hold MODE2
41962 can also hold MODE1. */
41965 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41967 if (mode1 == mode2)
41970 if (ix86_tieable_integer_mode_p (mode1)
41971 && ix86_tieable_integer_mode_p (mode2))
41974 /* MODE2 being XFmode implies fp stack or general regs, which means we
41975 can tie any smaller floating point modes to it. Note that we do not
41976 tie this with TFmode. */
41977 if (mode2 == XFmode)
41978 return mode1 == SFmode || mode1 == DFmode;
41980 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41981 that we can tie it with SFmode. */
41982 if (mode2 == DFmode)
41983 return mode1 == SFmode;
41985 /* If MODE2 is only appropriate for an SSE register, then tie with
41986 any other mode acceptable to SSE registers. */
41987 if (GET_MODE_SIZE (mode2) == 32
41988 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41989 return (GET_MODE_SIZE (mode1) == 32
41990 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41991 if (GET_MODE_SIZE (mode2) == 16
41992 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41993 return (GET_MODE_SIZE (mode1) == 16
41994 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41996 /* If MODE2 is appropriate for an MMX register, then tie
41997 with any other mode acceptable to MMX registers. */
41998 if (GET_MODE_SIZE (mode2) == 8
41999 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42000 return (GET_MODE_SIZE (mode1) == 8
42001 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42006 /* Return the cost of moving between two registers of mode MODE. */
42009 ix86_set_reg_reg_cost (machine_mode mode)
42011 unsigned int units = UNITS_PER_WORD;
42013 switch (GET_MODE_CLASS (mode))
42019 units = GET_MODE_SIZE (CCmode);
42023 if ((TARGET_SSE && mode == TFmode)
42024 || (TARGET_80387 && mode == XFmode)
42025 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42026 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42027 units = GET_MODE_SIZE (mode);
42030 case MODE_COMPLEX_FLOAT:
42031 if ((TARGET_SSE && mode == TCmode)
42032 || (TARGET_80387 && mode == XCmode)
42033 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42034 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42035 units = GET_MODE_SIZE (mode);
42038 case MODE_VECTOR_INT:
42039 case MODE_VECTOR_FLOAT:
42040 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42041 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42042 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42043 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42044 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42045 units = GET_MODE_SIZE (mode);
42048 /* Return the cost of moving between two registers of mode MODE,
42049 assuming that the move will be in pieces of at most UNITS bytes. */
42050 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42053 /* Compute a (partial) cost for rtx X. Return true if the complete
42054 cost has been computed, and false if subexpressions should be
42055 scanned. In either case, *TOTAL contains the cost result. */
42058 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42062 enum rtx_code code = (enum rtx_code) code_i;
42063 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42064 machine_mode mode = GET_MODE (x);
42065 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42070 if (register_operand (SET_DEST (x), VOIDmode)
42071 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42073 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42082 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42084 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42086 else if (flag_pic && SYMBOLIC_CONST (x)
42088 && (GET_CODE (x) == LABEL_REF
42089 || (GET_CODE (x) == SYMBOL_REF
42090 && SYMBOL_REF_LOCAL_P (x)))))
42097 if (mode == VOIDmode)
42102 switch (standard_80387_constant_p (x))
42107 default: /* Other constants */
42114 if (SSE_FLOAT_MODE_P (mode))
42117 switch (standard_sse_constant_p (x))
42121 case 1: /* 0: xor eliminates false dependency */
42124 default: /* -1: cmp contains false dependency */
42129 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42130 it'll probably end up. Add a penalty for size. */
42131 *total = (COSTS_N_INSNS (1)
42132 + (flag_pic != 0 && !TARGET_64BIT)
42133 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42137 /* The zero extensions is often completely free on x86_64, so make
42138 it as cheap as possible. */
42139 if (TARGET_64BIT && mode == DImode
42140 && GET_MODE (XEXP (x, 0)) == SImode)
42142 else if (TARGET_ZERO_EXTEND_WITH_AND)
42143 *total = cost->add;
42145 *total = cost->movzx;
42149 *total = cost->movsx;
42153 if (SCALAR_INT_MODE_P (mode)
42154 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42155 && CONST_INT_P (XEXP (x, 1)))
42157 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42160 *total = cost->add;
42163 if ((value == 2 || value == 3)
42164 && cost->lea <= cost->shift_const)
42166 *total = cost->lea;
42176 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42178 /* ??? Should be SSE vector operation cost. */
42179 /* At least for published AMD latencies, this really is the same
42180 as the latency for a simple fpu operation like fabs. */
42181 /* V*QImode is emulated with 1-11 insns. */
42182 if (mode == V16QImode || mode == V32QImode)
42185 if (TARGET_XOP && mode == V16QImode)
42187 /* For XOP we use vpshab, which requires a broadcast of the
42188 value to the variable shift insn. For constants this
42189 means a V16Q const in mem; even when we can perform the
42190 shift with one insn set the cost to prefer paddb. */
42191 if (CONSTANT_P (XEXP (x, 1)))
42193 *total = (cost->fabs
42194 + rtx_cost (XEXP (x, 0), code, 0, speed)
42195 + (speed ? 2 : COSTS_N_BYTES (16)));
42200 else if (TARGET_SSSE3)
42202 *total = cost->fabs * count;
42205 *total = cost->fabs;
42207 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42209 if (CONST_INT_P (XEXP (x, 1)))
42211 if (INTVAL (XEXP (x, 1)) > 32)
42212 *total = cost->shift_const + COSTS_N_INSNS (2);
42214 *total = cost->shift_const * 2;
42218 if (GET_CODE (XEXP (x, 1)) == AND)
42219 *total = cost->shift_var * 2;
42221 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42226 if (CONST_INT_P (XEXP (x, 1)))
42227 *total = cost->shift_const;
42228 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42229 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42231 /* Return the cost after shift-and truncation. */
42232 *total = cost->shift_var;
42236 *total = cost->shift_var;
42244 gcc_assert (FLOAT_MODE_P (mode));
42245 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42247 /* ??? SSE scalar/vector cost should be used here. */
42248 /* ??? Bald assumption that fma has the same cost as fmul. */
42249 *total = cost->fmul;
42250 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42252 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42254 if (GET_CODE (sub) == NEG)
42255 sub = XEXP (sub, 0);
42256 *total += rtx_cost (sub, FMA, 0, speed);
42259 if (GET_CODE (sub) == NEG)
42260 sub = XEXP (sub, 0);
42261 *total += rtx_cost (sub, FMA, 2, speed);
42266 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42268 /* ??? SSE scalar cost should be used here. */
42269 *total = cost->fmul;
42272 else if (X87_FLOAT_MODE_P (mode))
42274 *total = cost->fmul;
42277 else if (FLOAT_MODE_P (mode))
42279 /* ??? SSE vector cost should be used here. */
42280 *total = cost->fmul;
42283 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42285 /* V*QImode is emulated with 7-13 insns. */
42286 if (mode == V16QImode || mode == V32QImode)
42289 if (TARGET_XOP && mode == V16QImode)
42291 else if (TARGET_SSSE3)
42293 *total = cost->fmul * 2 + cost->fabs * extra;
42295 /* V*DImode is emulated with 5-8 insns. */
42296 else if (mode == V2DImode || mode == V4DImode)
42298 if (TARGET_XOP && mode == V2DImode)
42299 *total = cost->fmul * 2 + cost->fabs * 3;
42301 *total = cost->fmul * 3 + cost->fabs * 5;
42303 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42304 insns, including two PMULUDQ. */
42305 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42306 *total = cost->fmul * 2 + cost->fabs * 5;
42308 *total = cost->fmul;
42313 rtx op0 = XEXP (x, 0);
42314 rtx op1 = XEXP (x, 1);
42316 if (CONST_INT_P (XEXP (x, 1)))
42318 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42319 for (nbits = 0; value != 0; value &= value - 1)
42323 /* This is arbitrary. */
42326 /* Compute costs correctly for widening multiplication. */
42327 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42328 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42329 == GET_MODE_SIZE (mode))
42331 int is_mulwiden = 0;
42332 machine_mode inner_mode = GET_MODE (op0);
42334 if (GET_CODE (op0) == GET_CODE (op1))
42335 is_mulwiden = 1, op1 = XEXP (op1, 0);
42336 else if (CONST_INT_P (op1))
42338 if (GET_CODE (op0) == SIGN_EXTEND)
42339 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42342 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42346 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42349 *total = (cost->mult_init[MODE_INDEX (mode)]
42350 + nbits * cost->mult_bit
42351 + rtx_cost (op0, outer_code, opno, speed)
42352 + rtx_cost (op1, outer_code, opno, speed));
42361 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42362 /* ??? SSE cost should be used here. */
42363 *total = cost->fdiv;
42364 else if (X87_FLOAT_MODE_P (mode))
42365 *total = cost->fdiv;
42366 else if (FLOAT_MODE_P (mode))
42367 /* ??? SSE vector cost should be used here. */
42368 *total = cost->fdiv;
42370 *total = cost->divide[MODE_INDEX (mode)];
42374 if (GET_MODE_CLASS (mode) == MODE_INT
42375 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42377 if (GET_CODE (XEXP (x, 0)) == PLUS
42378 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42379 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42380 && CONSTANT_P (XEXP (x, 1)))
42382 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42383 if (val == 2 || val == 4 || val == 8)
42385 *total = cost->lea;
42386 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42387 outer_code, opno, speed);
42388 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42389 outer_code, opno, speed);
42390 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42394 else if (GET_CODE (XEXP (x, 0)) == MULT
42395 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42397 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42398 if (val == 2 || val == 4 || val == 8)
42400 *total = cost->lea;
42401 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42402 outer_code, opno, speed);
42403 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42407 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42409 *total = cost->lea;
42410 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42411 outer_code, opno, speed);
42412 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42413 outer_code, opno, speed);
42414 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42421 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42423 /* ??? SSE cost should be used here. */
42424 *total = cost->fadd;
42427 else if (X87_FLOAT_MODE_P (mode))
42429 *total = cost->fadd;
42432 else if (FLOAT_MODE_P (mode))
42434 /* ??? SSE vector cost should be used here. */
42435 *total = cost->fadd;
42443 if (GET_MODE_CLASS (mode) == MODE_INT
42444 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42446 *total = (cost->add * 2
42447 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42448 << (GET_MODE (XEXP (x, 0)) != DImode))
42449 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42450 << (GET_MODE (XEXP (x, 1)) != DImode)));
42456 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42458 /* ??? SSE cost should be used here. */
42459 *total = cost->fchs;
42462 else if (X87_FLOAT_MODE_P (mode))
42464 *total = cost->fchs;
42467 else if (FLOAT_MODE_P (mode))
42469 /* ??? SSE vector cost should be used here. */
42470 *total = cost->fchs;
42476 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42478 /* ??? Should be SSE vector operation cost. */
42479 /* At least for published AMD latencies, this really is the same
42480 as the latency for a simple fpu operation like fabs. */
42481 *total = cost->fabs;
42483 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42484 *total = cost->add * 2;
42486 *total = cost->add;
42490 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42491 && XEXP (XEXP (x, 0), 1) == const1_rtx
42492 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42493 && XEXP (x, 1) == const0_rtx)
42495 /* This kind of construct is implemented using test[bwl].
42496 Treat it as if we had an AND. */
42497 *total = (cost->add
42498 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42499 + rtx_cost (const1_rtx, outer_code, opno, speed));
42505 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42510 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42511 /* ??? SSE cost should be used here. */
42512 *total = cost->fabs;
42513 else if (X87_FLOAT_MODE_P (mode))
42514 *total = cost->fabs;
42515 else if (FLOAT_MODE_P (mode))
42516 /* ??? SSE vector cost should be used here. */
42517 *total = cost->fabs;
42521 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42522 /* ??? SSE cost should be used here. */
42523 *total = cost->fsqrt;
42524 else if (X87_FLOAT_MODE_P (mode))
42525 *total = cost->fsqrt;
42526 else if (FLOAT_MODE_P (mode))
42527 /* ??? SSE vector cost should be used here. */
42528 *total = cost->fsqrt;
42532 if (XINT (x, 1) == UNSPEC_TP)
42538 case VEC_DUPLICATE:
42539 /* ??? Assume all of these vector manipulation patterns are
42540 recognizable. In which case they all pretty much have the
42542 *total = cost->fabs;
42545 mask = XEXP (x, 2);
42546 /* This is masked instruction, assume the same cost,
42547 as nonmasked variant. */
42548 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42549 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42551 *total = cost->fabs;
42561 static int current_machopic_label_num;
42563 /* Given a symbol name and its associated stub, write out the
42564 definition of the stub. */
42567 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42569 unsigned int length;
42570 char *binder_name, *symbol_name, lazy_ptr_name[32];
42571 int label = ++current_machopic_label_num;
42573 /* For 64-bit we shouldn't get here. */
42574 gcc_assert (!TARGET_64BIT);
42576 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42577 symb = targetm.strip_name_encoding (symb);
42579 length = strlen (stub);
42580 binder_name = XALLOCAVEC (char, length + 32);
42581 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42583 length = strlen (symb);
42584 symbol_name = XALLOCAVEC (char, length + 32);
42585 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42587 sprintf (lazy_ptr_name, "L%d$lz", label);
42589 if (MACHOPIC_ATT_STUB)
42590 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42591 else if (MACHOPIC_PURE)
42592 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42594 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42596 fprintf (file, "%s:\n", stub);
42597 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42599 if (MACHOPIC_ATT_STUB)
42601 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42603 else if (MACHOPIC_PURE)
42606 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42607 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42608 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42609 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42610 label, lazy_ptr_name, label);
42611 fprintf (file, "\tjmp\t*%%ecx\n");
42614 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42616 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42617 it needs no stub-binding-helper. */
42618 if (MACHOPIC_ATT_STUB)
42621 fprintf (file, "%s:\n", binder_name);
42625 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42626 fprintf (file, "\tpushl\t%%ecx\n");
42629 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42631 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42633 /* N.B. Keep the correspondence of these
42634 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42635 old-pic/new-pic/non-pic stubs; altering this will break
42636 compatibility with existing dylibs. */
42639 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42640 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42643 /* 16-byte -mdynamic-no-pic stub. */
42644 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42646 fprintf (file, "%s:\n", lazy_ptr_name);
42647 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42648 fprintf (file, ASM_LONG "%s\n", binder_name);
42650 #endif /* TARGET_MACHO */
42652 /* Order the registers for register allocator. */
42655 x86_order_regs_for_local_alloc (void)
42660 /* First allocate the local general purpose registers. */
42661 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42662 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42663 reg_alloc_order [pos++] = i;
42665 /* Global general purpose registers. */
42666 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42667 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42668 reg_alloc_order [pos++] = i;
42670 /* x87 registers come first in case we are doing FP math
42672 if (!TARGET_SSE_MATH)
42673 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42674 reg_alloc_order [pos++] = i;
42676 /* SSE registers. */
42677 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42678 reg_alloc_order [pos++] = i;
42679 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42680 reg_alloc_order [pos++] = i;
42682 /* Extended REX SSE registers. */
42683 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42684 reg_alloc_order [pos++] = i;
42686 /* Mask register. */
42687 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42688 reg_alloc_order [pos++] = i;
42690 /* MPX bound registers. */
42691 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42692 reg_alloc_order [pos++] = i;
42694 /* x87 registers. */
42695 if (TARGET_SSE_MATH)
42696 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42697 reg_alloc_order [pos++] = i;
42699 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42700 reg_alloc_order [pos++] = i;
42702 /* Initialize the rest of array as we do not allocate some registers
42704 while (pos < FIRST_PSEUDO_REGISTER)
42705 reg_alloc_order [pos++] = 0;
42708 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42709 in struct attribute_spec handler. */
42711 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42714 bool *no_add_attrs)
42716 if (TREE_CODE (*node) != FUNCTION_TYPE
42717 && TREE_CODE (*node) != METHOD_TYPE
42718 && TREE_CODE (*node) != FIELD_DECL
42719 && TREE_CODE (*node) != TYPE_DECL)
42721 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42723 *no_add_attrs = true;
42728 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42730 *no_add_attrs = true;
42733 if (is_attribute_p ("callee_pop_aggregate_return", name))
42737 cst = TREE_VALUE (args);
42738 if (TREE_CODE (cst) != INTEGER_CST)
42740 warning (OPT_Wattributes,
42741 "%qE attribute requires an integer constant argument",
42743 *no_add_attrs = true;
42745 else if (compare_tree_int (cst, 0) != 0
42746 && compare_tree_int (cst, 1) != 0)
42748 warning (OPT_Wattributes,
42749 "argument to %qE attribute is neither zero, nor one",
42751 *no_add_attrs = true;
42760 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42761 struct attribute_spec.handler. */
42763 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42764 bool *no_add_attrs)
42766 if (TREE_CODE (*node) != FUNCTION_TYPE
42767 && TREE_CODE (*node) != METHOD_TYPE
42768 && TREE_CODE (*node) != FIELD_DECL
42769 && TREE_CODE (*node) != TYPE_DECL)
42771 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42773 *no_add_attrs = true;
42777 /* Can combine regparm with all attributes but fastcall. */
42778 if (is_attribute_p ("ms_abi", name))
42780 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42782 error ("ms_abi and sysv_abi attributes are not compatible");
42787 else if (is_attribute_p ("sysv_abi", name))
42789 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42791 error ("ms_abi and sysv_abi attributes are not compatible");
42800 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42801 struct attribute_spec.handler. */
42803 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42804 bool *no_add_attrs)
42807 if (DECL_P (*node))
42809 if (TREE_CODE (*node) == TYPE_DECL)
42810 type = &TREE_TYPE (*node);
42815 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42817 warning (OPT_Wattributes, "%qE attribute ignored",
42819 *no_add_attrs = true;
42822 else if ((is_attribute_p ("ms_struct", name)
42823 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42824 || ((is_attribute_p ("gcc_struct", name)
42825 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42827 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42829 *no_add_attrs = true;
42836 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42837 bool *no_add_attrs)
42839 if (TREE_CODE (*node) != FUNCTION_DECL)
42841 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42843 *no_add_attrs = true;
42849 ix86_ms_bitfield_layout_p (const_tree record_type)
42851 return ((TARGET_MS_BITFIELD_LAYOUT
42852 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42853 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42856 /* Returns an expression indicating where the this parameter is
42857 located on entry to the FUNCTION. */
42860 x86_this_parameter (tree function)
42862 tree type = TREE_TYPE (function);
42863 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42868 const int *parm_regs;
42870 if (ix86_function_type_abi (type) == MS_ABI)
42871 parm_regs = x86_64_ms_abi_int_parameter_registers;
42873 parm_regs = x86_64_int_parameter_registers;
42874 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42877 nregs = ix86_function_regparm (type, function);
42879 if (nregs > 0 && !stdarg_p (type))
42882 unsigned int ccvt = ix86_get_callcvt (type);
42884 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42885 regno = aggr ? DX_REG : CX_REG;
42886 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42890 return gen_rtx_MEM (SImode,
42891 plus_constant (Pmode, stack_pointer_rtx, 4));
42900 return gen_rtx_MEM (SImode,
42901 plus_constant (Pmode,
42902 stack_pointer_rtx, 4));
42905 return gen_rtx_REG (SImode, regno);
42908 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42912 /* Determine whether x86_output_mi_thunk can succeed. */
42915 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42916 const_tree function)
42918 /* 64-bit can handle anything. */
42922 /* For 32-bit, everything's fine if we have one free register. */
42923 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42926 /* Need a free register for vcall_offset. */
42930 /* Need a free register for GOT references. */
42931 if (flag_pic && !targetm.binds_local_p (function))
42934 /* Otherwise ok. */
42938 /* Output the assembler code for a thunk function. THUNK_DECL is the
42939 declaration for the thunk function itself, FUNCTION is the decl for
42940 the target function. DELTA is an immediate constant offset to be
42941 added to THIS. If VCALL_OFFSET is nonzero, the word at
42942 *(*this + vcall_offset) should be added to THIS. */
42945 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42946 HOST_WIDE_INT vcall_offset, tree function)
42948 rtx this_param = x86_this_parameter (function);
42949 rtx this_reg, tmp, fnaddr;
42950 unsigned int tmp_regno;
42954 tmp_regno = R10_REG;
42957 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42958 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42959 tmp_regno = AX_REG;
42960 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42961 tmp_regno = DX_REG;
42963 tmp_regno = CX_REG;
42966 emit_note (NOTE_INSN_PROLOGUE_END);
42968 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42969 pull it in now and let DELTA benefit. */
42970 if (REG_P (this_param))
42971 this_reg = this_param;
42972 else if (vcall_offset)
42974 /* Put the this parameter into %eax. */
42975 this_reg = gen_rtx_REG (Pmode, AX_REG);
42976 emit_move_insn (this_reg, this_param);
42979 this_reg = NULL_RTX;
42981 /* Adjust the this parameter by a fixed constant. */
42984 rtx delta_rtx = GEN_INT (delta);
42985 rtx delta_dst = this_reg ? this_reg : this_param;
42989 if (!x86_64_general_operand (delta_rtx, Pmode))
42991 tmp = gen_rtx_REG (Pmode, tmp_regno);
42992 emit_move_insn (tmp, delta_rtx);
42997 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43000 /* Adjust the this parameter by a value stored in the vtable. */
43003 rtx vcall_addr, vcall_mem, this_mem;
43005 tmp = gen_rtx_REG (Pmode, tmp_regno);
43007 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43008 if (Pmode != ptr_mode)
43009 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43010 emit_move_insn (tmp, this_mem);
43012 /* Adjust the this parameter. */
43013 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43015 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43017 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43018 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43019 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43022 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43023 if (Pmode != ptr_mode)
43024 emit_insn (gen_addsi_1_zext (this_reg,
43025 gen_rtx_REG (ptr_mode,
43029 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43032 /* If necessary, drop THIS back to its stack slot. */
43033 if (this_reg && this_reg != this_param)
43034 emit_move_insn (this_param, this_reg);
43036 fnaddr = XEXP (DECL_RTL (function), 0);
43039 if (!flag_pic || targetm.binds_local_p (function)
43044 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43045 tmp = gen_rtx_CONST (Pmode, tmp);
43046 fnaddr = gen_const_mem (Pmode, tmp);
43051 if (!flag_pic || targetm.binds_local_p (function))
43054 else if (TARGET_MACHO)
43056 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43057 fnaddr = XEXP (fnaddr, 0);
43059 #endif /* TARGET_MACHO */
43062 tmp = gen_rtx_REG (Pmode, CX_REG);
43063 output_set_got (tmp, NULL_RTX);
43065 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43066 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43067 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43068 fnaddr = gen_const_mem (Pmode, fnaddr);
43072 /* Our sibling call patterns do not allow memories, because we have no
43073 predicate that can distinguish between frame and non-frame memory.
43074 For our purposes here, we can get away with (ab)using a jump pattern,
43075 because we're going to do no optimization. */
43076 if (MEM_P (fnaddr))
43078 if (sibcall_insn_operand (fnaddr, word_mode))
43080 fnaddr = XEXP (DECL_RTL (function), 0);
43081 tmp = gen_rtx_MEM (QImode, fnaddr);
43082 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43083 tmp = emit_call_insn (tmp);
43084 SIBLING_CALL_P (tmp) = 1;
43087 emit_jump_insn (gen_indirect_jump (fnaddr));
43091 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43093 // CM_LARGE_PIC always uses pseudo PIC register which is
43094 // uninitialized. Since FUNCTION is local and calling it
43095 // doesn't go through PLT, we use scratch register %r11 as
43096 // PIC register and initialize it here.
43097 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43098 ix86_init_large_pic_reg (tmp_regno);
43099 fnaddr = legitimize_pic_address (fnaddr,
43100 gen_rtx_REG (Pmode, tmp_regno));
43103 if (!sibcall_insn_operand (fnaddr, word_mode))
43105 tmp = gen_rtx_REG (word_mode, tmp_regno);
43106 if (GET_MODE (fnaddr) != word_mode)
43107 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43108 emit_move_insn (tmp, fnaddr);
43112 tmp = gen_rtx_MEM (QImode, fnaddr);
43113 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43114 tmp = emit_call_insn (tmp);
43115 SIBLING_CALL_P (tmp) = 1;
43119 /* Emit just enough of rest_of_compilation to get the insns emitted.
43120 Note that use_thunk calls assemble_start_function et al. */
43121 insn = get_insns ();
43122 shorten_branches (insn);
43123 final_start_function (insn, file, 1);
43124 final (insn, file, 1);
43125 final_end_function ();
43129 x86_file_start (void)
43131 default_file_start ();
43133 fputs ("\t.code16gcc\n", asm_out_file);
43135 darwin_file_start ();
43137 if (X86_FILE_START_VERSION_DIRECTIVE)
43138 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43139 if (X86_FILE_START_FLTUSED)
43140 fputs ("\t.global\t__fltused\n", asm_out_file);
43141 if (ix86_asm_dialect == ASM_INTEL)
43142 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43146 x86_field_alignment (tree field, int computed)
43149 tree type = TREE_TYPE (field);
43151 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43153 mode = TYPE_MODE (strip_array_types (type));
43154 if (mode == DFmode || mode == DCmode
43155 || GET_MODE_CLASS (mode) == MODE_INT
43156 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43157 return MIN (32, computed);
43161 /* Print call to TARGET to FILE. */
43164 x86_print_call_or_nop (FILE *file, const char *target)
43166 if (flag_nop_mcount)
43167 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43169 fprintf (file, "1:\tcall\t%s\n", target);
43172 /* Output assembler code to FILE to increment profiler label # LABELNO
43173 for profiling a function entry. */
43175 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43177 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43181 #ifndef NO_PROFILE_COUNTERS
43182 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43185 if (!TARGET_PECOFF && flag_pic)
43186 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43188 x86_print_call_or_nop (file, mcount_name);
43192 #ifndef NO_PROFILE_COUNTERS
43193 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43196 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43200 #ifndef NO_PROFILE_COUNTERS
43201 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43204 x86_print_call_or_nop (file, mcount_name);
43207 if (flag_record_mcount)
43209 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43210 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43211 fprintf (file, "\t.previous\n");
43215 /* We don't have exact information about the insn sizes, but we may assume
43216 quite safely that we are informed about all 1 byte insns and memory
43217 address sizes. This is enough to eliminate unnecessary padding in
43221 min_insn_size (rtx_insn *insn)
43225 if (!INSN_P (insn) || !active_insn_p (insn))
43228 /* Discard alignments we've emit and jump instructions. */
43229 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43230 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43233 /* Important case - calls are always 5 bytes.
43234 It is common to have many calls in the row. */
43236 && symbolic_reference_mentioned_p (PATTERN (insn))
43237 && !SIBLING_CALL_P (insn))
43239 len = get_attr_length (insn);
43243 /* For normal instructions we rely on get_attr_length being exact,
43244 with a few exceptions. */
43245 if (!JUMP_P (insn))
43247 enum attr_type type = get_attr_type (insn);
43252 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43253 || asm_noperands (PATTERN (insn)) >= 0)
43260 /* Otherwise trust get_attr_length. */
43264 l = get_attr_length_address (insn);
43265 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43274 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43276 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43280 ix86_avoid_jump_mispredicts (void)
43282 rtx_insn *insn, *start = get_insns ();
43283 int nbytes = 0, njumps = 0;
43284 bool isjump = false;
43286 /* Look for all minimal intervals of instructions containing 4 jumps.
43287 The intervals are bounded by START and INSN. NBYTES is the total
43288 size of instructions in the interval including INSN and not including
43289 START. When the NBYTES is smaller than 16 bytes, it is possible
43290 that the end of START and INSN ends up in the same 16byte page.
43292 The smallest offset in the page INSN can start is the case where START
43293 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43294 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43296 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43297 have to, control transfer to label(s) can be performed through other
43298 means, and also we estimate minimum length of all asm stmts as 0. */
43299 for (insn = start; insn; insn = NEXT_INSN (insn))
43303 if (LABEL_P (insn))
43305 int align = label_to_alignment (insn);
43306 int max_skip = label_to_max_skip (insn);
43310 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43311 already in the current 16 byte page, because otherwise
43312 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43313 bytes to reach 16 byte boundary. */
43315 || (align <= 3 && max_skip != (1 << align) - 1))
43318 fprintf (dump_file, "Label %i with max_skip %i\n",
43319 INSN_UID (insn), max_skip);
43322 while (nbytes + max_skip >= 16)
43324 start = NEXT_INSN (start);
43325 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43327 njumps--, isjump = true;
43330 nbytes -= min_insn_size (start);
43336 min_size = min_insn_size (insn);
43337 nbytes += min_size;
43339 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43340 INSN_UID (insn), min_size);
43341 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43349 start = NEXT_INSN (start);
43350 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43352 njumps--, isjump = true;
43355 nbytes -= min_insn_size (start);
43357 gcc_assert (njumps >= 0);
43359 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43360 INSN_UID (start), INSN_UID (insn), nbytes);
43362 if (njumps == 3 && isjump && nbytes < 16)
43364 int padsize = 15 - nbytes + min_insn_size (insn);
43367 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43368 INSN_UID (insn), padsize);
43369 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43375 /* AMD Athlon works faster
43376 when RET is not destination of conditional jump or directly preceded
43377 by other jump instruction. We avoid the penalty by inserting NOP just
43378 before the RET instructions in such cases. */
43380 ix86_pad_returns (void)
43385 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43387 basic_block bb = e->src;
43388 rtx_insn *ret = BB_END (bb);
43390 bool replace = false;
43392 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43393 || optimize_bb_for_size_p (bb))
43395 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43396 if (active_insn_p (prev) || LABEL_P (prev))
43398 if (prev && LABEL_P (prev))
43403 FOR_EACH_EDGE (e, ei, bb->preds)
43404 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43405 && !(e->flags & EDGE_FALLTHRU))
43413 prev = prev_active_insn (ret);
43415 && ((JUMP_P (prev) && any_condjump_p (prev))
43418 /* Empty functions get branch mispredict even when
43419 the jump destination is not visible to us. */
43420 if (!prev && !optimize_function_for_size_p (cfun))
43425 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43431 /* Count the minimum number of instructions in BB. Return 4 if the
43432 number of instructions >= 4. */
43435 ix86_count_insn_bb (basic_block bb)
43438 int insn_count = 0;
43440 /* Count number of instructions in this block. Return 4 if the number
43441 of instructions >= 4. */
43442 FOR_BB_INSNS (bb, insn)
43444 /* Only happen in exit blocks. */
43446 && ANY_RETURN_P (PATTERN (insn)))
43449 if (NONDEBUG_INSN_P (insn)
43450 && GET_CODE (PATTERN (insn)) != USE
43451 && GET_CODE (PATTERN (insn)) != CLOBBER)
43454 if (insn_count >= 4)
43463 /* Count the minimum number of instructions in code path in BB.
43464 Return 4 if the number of instructions >= 4. */
43467 ix86_count_insn (basic_block bb)
43471 int min_prev_count;
43473 /* Only bother counting instructions along paths with no
43474 more than 2 basic blocks between entry and exit. Given
43475 that BB has an edge to exit, determine if a predecessor
43476 of BB has an edge from entry. If so, compute the number
43477 of instructions in the predecessor block. If there
43478 happen to be multiple such blocks, compute the minimum. */
43479 min_prev_count = 4;
43480 FOR_EACH_EDGE (e, ei, bb->preds)
43483 edge_iterator prev_ei;
43485 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43487 min_prev_count = 0;
43490 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43492 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43494 int count = ix86_count_insn_bb (e->src);
43495 if (count < min_prev_count)
43496 min_prev_count = count;
43502 if (min_prev_count < 4)
43503 min_prev_count += ix86_count_insn_bb (bb);
43505 return min_prev_count;
43508 /* Pad short function to 4 instructions. */
43511 ix86_pad_short_function (void)
43516 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43518 rtx_insn *ret = BB_END (e->src);
43519 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43521 int insn_count = ix86_count_insn (e->src);
43523 /* Pad short function. */
43524 if (insn_count < 4)
43526 rtx_insn *insn = ret;
43528 /* Find epilogue. */
43531 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43532 insn = PREV_INSN (insn);
43537 /* Two NOPs count as one instruction. */
43538 insn_count = 2 * (4 - insn_count);
43539 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43545 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43546 the epilogue, the Windows system unwinder will apply epilogue logic and
43547 produce incorrect offsets. This can be avoided by adding a nop between
43548 the last insn that can throw and the first insn of the epilogue. */
43551 ix86_seh_fixup_eh_fallthru (void)
43556 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43558 rtx_insn *insn, *next;
43560 /* Find the beginning of the epilogue. */
43561 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43562 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43567 /* We only care about preceding insns that can throw. */
43568 insn = prev_active_insn (insn);
43569 if (insn == NULL || !can_throw_internal (insn))
43572 /* Do not separate calls from their debug information. */
43573 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43575 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43576 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43581 emit_insn_after (gen_nops (const1_rtx), insn);
43585 /* Implement machine specific optimizations. We implement padding of returns
43586 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43590 /* We are freeing block_for_insn in the toplev to keep compatibility
43591 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43592 compute_bb_for_insn ();
43594 if (TARGET_SEH && current_function_has_exception_handlers ())
43595 ix86_seh_fixup_eh_fallthru ();
43597 if (optimize && optimize_function_for_speed_p (cfun))
43599 if (TARGET_PAD_SHORT_FUNCTION)
43600 ix86_pad_short_function ();
43601 else if (TARGET_PAD_RETURNS)
43602 ix86_pad_returns ();
43603 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43604 if (TARGET_FOUR_JUMP_LIMIT)
43605 ix86_avoid_jump_mispredicts ();
43610 /* Return nonzero when QImode register that must be represented via REX prefix
43613 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43616 extract_insn_cached (insn);
43617 for (i = 0; i < recog_data.n_operands; i++)
43618 if (GENERAL_REG_P (recog_data.operand[i])
43619 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43624 /* Return true when INSN mentions register that must be encoded using REX
43627 x86_extended_reg_mentioned_p (rtx insn)
43629 subrtx_iterator::array_type array;
43630 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43632 const_rtx x = *iter;
43634 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43640 /* If profitable, negate (without causing overflow) integer constant
43641 of mode MODE at location LOC. Return true in this case. */
43643 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43647 if (!CONST_INT_P (*loc))
43653 /* DImode x86_64 constants must fit in 32 bits. */
43654 gcc_assert (x86_64_immediate_operand (*loc, mode));
43665 gcc_unreachable ();
43668 /* Avoid overflows. */
43669 if (mode_signbit_p (mode, *loc))
43672 val = INTVAL (*loc);
43674 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43675 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43676 if ((val < 0 && val != -128)
43679 *loc = GEN_INT (-val);
43686 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43687 optabs would emit if we didn't have TFmode patterns. */
43690 x86_emit_floatuns (rtx operands[2])
43692 rtx_code_label *neglab, *donelab;
43693 rtx i0, i1, f0, in, out;
43694 machine_mode mode, inmode;
43696 inmode = GET_MODE (operands[1]);
43697 gcc_assert (inmode == SImode || inmode == DImode);
43700 in = force_reg (inmode, operands[1]);
43701 mode = GET_MODE (out);
43702 neglab = gen_label_rtx ();
43703 donelab = gen_label_rtx ();
43704 f0 = gen_reg_rtx (mode);
43706 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43708 expand_float (out, in, 0);
43710 emit_jump_insn (gen_jump (donelab));
43713 emit_label (neglab);
43715 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43717 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43719 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43721 expand_float (f0, i0, 0);
43723 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43725 emit_label (donelab);
43728 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43729 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43730 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43731 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43733 /* Get a vector mode of the same size as the original but with elements
43734 twice as wide. This is only guaranteed to apply to integral vectors. */
43736 static inline machine_mode
43737 get_mode_wider_vector (machine_mode o)
43739 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43740 machine_mode n = GET_MODE_WIDER_MODE (o);
43741 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43742 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43746 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43747 fill target with val via vec_duplicate. */
43750 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43756 /* First attempt to recognize VAL as-is. */
43757 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43758 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43759 if (recog_memoized (insn) < 0)
43762 /* If that fails, force VAL into a register. */
43765 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43766 seq = get_insns ();
43769 emit_insn_before (seq, insn);
43771 ok = recog_memoized (insn) >= 0;
43777 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43778 with all elements equal to VAR. Return true if successful. */
43781 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43782 rtx target, rtx val)
43806 return ix86_vector_duplicate_value (mode, target, val);
43811 if (TARGET_SSE || TARGET_3DNOW_A)
43815 val = gen_lowpart (SImode, val);
43816 x = gen_rtx_TRUNCATE (HImode, val);
43817 x = gen_rtx_VEC_DUPLICATE (mode, x);
43818 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43830 return ix86_vector_duplicate_value (mode, target, val);
43834 struct expand_vec_perm_d dperm;
43838 memset (&dperm, 0, sizeof (dperm));
43839 dperm.target = target;
43840 dperm.vmode = mode;
43841 dperm.nelt = GET_MODE_NUNITS (mode);
43842 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43843 dperm.one_operand_p = true;
43845 /* Extend to SImode using a paradoxical SUBREG. */
43846 tmp1 = gen_reg_rtx (SImode);
43847 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43849 /* Insert the SImode value as low element of a V4SImode vector. */
43850 tmp2 = gen_reg_rtx (V4SImode);
43851 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43852 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43854 ok = (expand_vec_perm_1 (&dperm)
43855 || expand_vec_perm_broadcast_1 (&dperm));
43863 return ix86_vector_duplicate_value (mode, target, val);
43870 /* Replicate the value once into the next wider mode and recurse. */
43872 machine_mode smode, wsmode, wvmode;
43875 smode = GET_MODE_INNER (mode);
43876 wvmode = get_mode_wider_vector (mode);
43877 wsmode = GET_MODE_INNER (wvmode);
43879 val = convert_modes (wsmode, smode, val, true);
43880 x = expand_simple_binop (wsmode, ASHIFT, val,
43881 GEN_INT (GET_MODE_BITSIZE (smode)),
43882 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43883 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43885 x = gen_reg_rtx (wvmode);
43886 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43888 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43895 return ix86_vector_duplicate_value (mode, target, val);
43898 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43899 rtx x = gen_reg_rtx (hvmode);
43901 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43904 x = gen_rtx_VEC_CONCAT (mode, x, x);
43905 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43911 if (TARGET_AVX512BW)
43912 return ix86_vector_duplicate_value (mode, target, val);
43915 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43916 rtx x = gen_reg_rtx (hvmode);
43918 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43921 x = gen_rtx_VEC_CONCAT (mode, x, x);
43922 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43931 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43932 whose ONE_VAR element is VAR, and other elements are zero. Return true
43936 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43937 rtx target, rtx var, int one_var)
43939 machine_mode vsimode;
43942 bool use_vector_set = false;
43947 /* For SSE4.1, we normally use vector set. But if the second
43948 element is zero and inter-unit moves are OK, we use movq
43950 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43951 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43957 use_vector_set = TARGET_SSE4_1;
43960 use_vector_set = TARGET_SSE2;
43963 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43970 use_vector_set = TARGET_AVX;
43973 /* Use ix86_expand_vector_set in 64bit mode only. */
43974 use_vector_set = TARGET_AVX && TARGET_64BIT;
43980 if (use_vector_set)
43982 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43983 var = force_reg (GET_MODE_INNER (mode), var);
43984 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44000 var = force_reg (GET_MODE_INNER (mode), var);
44001 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44002 emit_insn (gen_rtx_SET (VOIDmode, target, x));
44007 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44008 new_target = gen_reg_rtx (mode);
44010 new_target = target;
44011 var = force_reg (GET_MODE_INNER (mode), var);
44012 x = gen_rtx_VEC_DUPLICATE (mode, var);
44013 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44014 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
44017 /* We need to shuffle the value to the correct position, so
44018 create a new pseudo to store the intermediate result. */
44020 /* With SSE2, we can use the integer shuffle insns. */
44021 if (mode != V4SFmode && TARGET_SSE2)
44023 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44025 GEN_INT (one_var == 1 ? 0 : 1),
44026 GEN_INT (one_var == 2 ? 0 : 1),
44027 GEN_INT (one_var == 3 ? 0 : 1)));
44028 if (target != new_target)
44029 emit_move_insn (target, new_target);
44033 /* Otherwise convert the intermediate result to V4SFmode and
44034 use the SSE1 shuffle instructions. */
44035 if (mode != V4SFmode)
44037 tmp = gen_reg_rtx (V4SFmode);
44038 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44043 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44045 GEN_INT (one_var == 1 ? 0 : 1),
44046 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44047 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44049 if (mode != V4SFmode)
44050 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44051 else if (tmp != target)
44052 emit_move_insn (target, tmp);
44054 else if (target != new_target)
44055 emit_move_insn (target, new_target);
44060 vsimode = V4SImode;
44066 vsimode = V2SImode;
44072 /* Zero extend the variable element to SImode and recurse. */
44073 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44075 x = gen_reg_rtx (vsimode);
44076 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44078 gcc_unreachable ();
44080 emit_move_insn (target, gen_lowpart (mode, x));
44088 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44089 consisting of the values in VALS. It is known that all elements
44090 except ONE_VAR are constants. Return true if successful. */
44093 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44094 rtx target, rtx vals, int one_var)
44096 rtx var = XVECEXP (vals, 0, one_var);
44097 machine_mode wmode;
44100 const_vec = copy_rtx (vals);
44101 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44102 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44110 /* For the two element vectors, it's just as easy to use
44111 the general case. */
44115 /* Use ix86_expand_vector_set in 64bit mode only. */
44138 /* There's no way to set one QImode entry easily. Combine
44139 the variable value with its adjacent constant value, and
44140 promote to an HImode set. */
44141 x = XVECEXP (vals, 0, one_var ^ 1);
44144 var = convert_modes (HImode, QImode, var, true);
44145 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44146 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44147 x = GEN_INT (INTVAL (x) & 0xff);
44151 var = convert_modes (HImode, QImode, var, true);
44152 x = gen_int_mode (INTVAL (x) << 8, HImode);
44154 if (x != const0_rtx)
44155 var = expand_simple_binop (HImode, IOR, var, x, var,
44156 1, OPTAB_LIB_WIDEN);
44158 x = gen_reg_rtx (wmode);
44159 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44160 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44162 emit_move_insn (target, gen_lowpart (mode, x));
44169 emit_move_insn (target, const_vec);
44170 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44174 /* A subroutine of ix86_expand_vector_init_general. Use vector
44175 concatenate to handle the most general case: all values variable,
44176 and none identical. */
44179 ix86_expand_vector_init_concat (machine_mode mode,
44180 rtx target, rtx *ops, int n)
44182 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44183 rtx first[16], second[8], third[4];
44235 gcc_unreachable ();
44238 if (!register_operand (ops[1], cmode))
44239 ops[1] = force_reg (cmode, ops[1]);
44240 if (!register_operand (ops[0], cmode))
44241 ops[0] = force_reg (cmode, ops[0]);
44242 emit_insn (gen_rtx_SET (VOIDmode, target,
44243 gen_rtx_VEC_CONCAT (mode, ops[0],
44263 gcc_unreachable ();
44287 gcc_unreachable ();
44305 gcc_unreachable ();
44310 /* FIXME: We process inputs backward to help RA. PR 36222. */
44313 for (; i > 0; i -= 2, j--)
44315 first[j] = gen_reg_rtx (cmode);
44316 v = gen_rtvec (2, ops[i - 1], ops[i]);
44317 ix86_expand_vector_init (false, first[j],
44318 gen_rtx_PARALLEL (cmode, v));
44324 gcc_assert (hmode != VOIDmode);
44325 gcc_assert (gmode != VOIDmode);
44326 for (i = j = 0; i < n; i += 2, j++)
44328 second[j] = gen_reg_rtx (hmode);
44329 ix86_expand_vector_init_concat (hmode, second [j],
44333 for (i = j = 0; i < n; i += 2, j++)
44335 third[j] = gen_reg_rtx (gmode);
44336 ix86_expand_vector_init_concat (gmode, third[j],
44340 ix86_expand_vector_init_concat (mode, target, third, n);
44344 gcc_assert (hmode != VOIDmode);
44345 for (i = j = 0; i < n; i += 2, j++)
44347 second[j] = gen_reg_rtx (hmode);
44348 ix86_expand_vector_init_concat (hmode, second [j],
44352 ix86_expand_vector_init_concat (mode, target, second, n);
44355 ix86_expand_vector_init_concat (mode, target, first, n);
44359 gcc_unreachable ();
44363 /* A subroutine of ix86_expand_vector_init_general. Use vector
44364 interleave to handle the most general case: all values variable,
44365 and none identical. */
44368 ix86_expand_vector_init_interleave (machine_mode mode,
44369 rtx target, rtx *ops, int n)
44371 machine_mode first_imode, second_imode, third_imode, inner_mode;
44374 rtx (*gen_load_even) (rtx, rtx, rtx);
44375 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44376 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44381 gen_load_even = gen_vec_setv8hi;
44382 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44383 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44384 inner_mode = HImode;
44385 first_imode = V4SImode;
44386 second_imode = V2DImode;
44387 third_imode = VOIDmode;
44390 gen_load_even = gen_vec_setv16qi;
44391 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44392 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44393 inner_mode = QImode;
44394 first_imode = V8HImode;
44395 second_imode = V4SImode;
44396 third_imode = V2DImode;
44399 gcc_unreachable ();
44402 for (i = 0; i < n; i++)
44404 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44405 op0 = gen_reg_rtx (SImode);
44406 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44408 /* Insert the SImode value as low element of V4SImode vector. */
44409 op1 = gen_reg_rtx (V4SImode);
44410 op0 = gen_rtx_VEC_MERGE (V4SImode,
44411 gen_rtx_VEC_DUPLICATE (V4SImode,
44413 CONST0_RTX (V4SImode),
44415 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44417 /* Cast the V4SImode vector back to a vector in orignal mode. */
44418 op0 = gen_reg_rtx (mode);
44419 emit_move_insn (op0, gen_lowpart (mode, op1));
44421 /* Load even elements into the second position. */
44422 emit_insn (gen_load_even (op0,
44423 force_reg (inner_mode,
44427 /* Cast vector to FIRST_IMODE vector. */
44428 ops[i] = gen_reg_rtx (first_imode);
44429 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44432 /* Interleave low FIRST_IMODE vectors. */
44433 for (i = j = 0; i < n; i += 2, j++)
44435 op0 = gen_reg_rtx (first_imode);
44436 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44438 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44439 ops[j] = gen_reg_rtx (second_imode);
44440 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44443 /* Interleave low SECOND_IMODE vectors. */
44444 switch (second_imode)
44447 for (i = j = 0; i < n / 2; i += 2, j++)
44449 op0 = gen_reg_rtx (second_imode);
44450 emit_insn (gen_interleave_second_low (op0, ops[i],
44453 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44455 ops[j] = gen_reg_rtx (third_imode);
44456 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44458 second_imode = V2DImode;
44459 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44463 op0 = gen_reg_rtx (second_imode);
44464 emit_insn (gen_interleave_second_low (op0, ops[0],
44467 /* Cast the SECOND_IMODE vector back to a vector on original
44469 emit_insn (gen_rtx_SET (VOIDmode, target,
44470 gen_lowpart (mode, op0)));
44474 gcc_unreachable ();
44478 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44479 all values variable, and none identical. */
44482 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44483 rtx target, rtx vals)
44485 rtx ops[64], op0, op1, op2, op3, op4, op5;
44486 machine_mode half_mode = VOIDmode;
44487 machine_mode quarter_mode = VOIDmode;
44494 if (!mmx_ok && !TARGET_SSE)
44510 n = GET_MODE_NUNITS (mode);
44511 for (i = 0; i < n; i++)
44512 ops[i] = XVECEXP (vals, 0, i);
44513 ix86_expand_vector_init_concat (mode, target, ops, n);
44517 half_mode = V16QImode;
44521 half_mode = V8HImode;
44525 n = GET_MODE_NUNITS (mode);
44526 for (i = 0; i < n; i++)
44527 ops[i] = XVECEXP (vals, 0, i);
44528 op0 = gen_reg_rtx (half_mode);
44529 op1 = gen_reg_rtx (half_mode);
44530 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44532 ix86_expand_vector_init_interleave (half_mode, op1,
44533 &ops [n >> 1], n >> 2);
44534 emit_insn (gen_rtx_SET (VOIDmode, target,
44535 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44539 quarter_mode = V16QImode;
44540 half_mode = V32QImode;
44544 quarter_mode = V8HImode;
44545 half_mode = V16HImode;
44549 n = GET_MODE_NUNITS (mode);
44550 for (i = 0; i < n; i++)
44551 ops[i] = XVECEXP (vals, 0, i);
44552 op0 = gen_reg_rtx (quarter_mode);
44553 op1 = gen_reg_rtx (quarter_mode);
44554 op2 = gen_reg_rtx (quarter_mode);
44555 op3 = gen_reg_rtx (quarter_mode);
44556 op4 = gen_reg_rtx (half_mode);
44557 op5 = gen_reg_rtx (half_mode);
44558 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44560 ix86_expand_vector_init_interleave (quarter_mode, op1,
44561 &ops [n >> 2], n >> 3);
44562 ix86_expand_vector_init_interleave (quarter_mode, op2,
44563 &ops [n >> 1], n >> 3);
44564 ix86_expand_vector_init_interleave (quarter_mode, op3,
44565 &ops [(n >> 1) | (n >> 2)], n >> 3);
44566 emit_insn (gen_rtx_SET (VOIDmode, op4,
44567 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44568 emit_insn (gen_rtx_SET (VOIDmode, op5,
44569 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44570 emit_insn (gen_rtx_SET (VOIDmode, target,
44571 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44575 if (!TARGET_SSE4_1)
44583 /* Don't use ix86_expand_vector_init_interleave if we can't
44584 move from GPR to SSE register directly. */
44585 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44588 n = GET_MODE_NUNITS (mode);
44589 for (i = 0; i < n; i++)
44590 ops[i] = XVECEXP (vals, 0, i);
44591 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44599 gcc_unreachable ();
44603 int i, j, n_elts, n_words, n_elt_per_word;
44604 machine_mode inner_mode;
44605 rtx words[4], shift;
44607 inner_mode = GET_MODE_INNER (mode);
44608 n_elts = GET_MODE_NUNITS (mode);
44609 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44610 n_elt_per_word = n_elts / n_words;
44611 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44613 for (i = 0; i < n_words; ++i)
44615 rtx word = NULL_RTX;
44617 for (j = 0; j < n_elt_per_word; ++j)
44619 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44620 elt = convert_modes (word_mode, inner_mode, elt, true);
44626 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44627 word, 1, OPTAB_LIB_WIDEN);
44628 word = expand_simple_binop (word_mode, IOR, word, elt,
44629 word, 1, OPTAB_LIB_WIDEN);
44637 emit_move_insn (target, gen_lowpart (mode, words[0]));
44638 else if (n_words == 2)
44640 rtx tmp = gen_reg_rtx (mode);
44641 emit_clobber (tmp);
44642 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44643 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44644 emit_move_insn (target, tmp);
44646 else if (n_words == 4)
44648 rtx tmp = gen_reg_rtx (V4SImode);
44649 gcc_assert (word_mode == SImode);
44650 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44651 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44652 emit_move_insn (target, gen_lowpart (mode, tmp));
44655 gcc_unreachable ();
44659 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44660 instructions unless MMX_OK is true. */
44663 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44665 machine_mode mode = GET_MODE (target);
44666 machine_mode inner_mode = GET_MODE_INNER (mode);
44667 int n_elts = GET_MODE_NUNITS (mode);
44668 int n_var = 0, one_var = -1;
44669 bool all_same = true, all_const_zero = true;
44673 for (i = 0; i < n_elts; ++i)
44675 x = XVECEXP (vals, 0, i);
44676 if (!(CONST_INT_P (x)
44677 || GET_CODE (x) == CONST_DOUBLE
44678 || GET_CODE (x) == CONST_FIXED))
44679 n_var++, one_var = i;
44680 else if (x != CONST0_RTX (inner_mode))
44681 all_const_zero = false;
44682 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44686 /* Constants are best loaded from the constant pool. */
44689 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44693 /* If all values are identical, broadcast the value. */
44695 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44696 XVECEXP (vals, 0, 0)))
44699 /* Values where only one field is non-constant are best loaded from
44700 the pool and overwritten via move later. */
44704 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44705 XVECEXP (vals, 0, one_var),
44709 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44713 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44717 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44719 machine_mode mode = GET_MODE (target);
44720 machine_mode inner_mode = GET_MODE_INNER (mode);
44721 machine_mode half_mode;
44722 bool use_vec_merge = false;
44724 static rtx (*gen_extract[6][2]) (rtx, rtx)
44726 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44727 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44728 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44729 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44730 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44731 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44733 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44735 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44736 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44737 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44738 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44739 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44740 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44750 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44751 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44753 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44755 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44756 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44762 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44766 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44767 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44769 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44771 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44772 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44779 /* For the two element vectors, we implement a VEC_CONCAT with
44780 the extraction of the other element. */
44782 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44783 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44786 op0 = val, op1 = tmp;
44788 op0 = tmp, op1 = val;
44790 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44791 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44796 use_vec_merge = TARGET_SSE4_1;
44803 use_vec_merge = true;
44807 /* tmp = target = A B C D */
44808 tmp = copy_to_reg (target);
44809 /* target = A A B B */
44810 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44811 /* target = X A B B */
44812 ix86_expand_vector_set (false, target, val, 0);
44813 /* target = A X C D */
44814 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44815 const1_rtx, const0_rtx,
44816 GEN_INT (2+4), GEN_INT (3+4)));
44820 /* tmp = target = A B C D */
44821 tmp = copy_to_reg (target);
44822 /* tmp = X B C D */
44823 ix86_expand_vector_set (false, tmp, val, 0);
44824 /* target = A B X D */
44825 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44826 const0_rtx, const1_rtx,
44827 GEN_INT (0+4), GEN_INT (3+4)));
44831 /* tmp = target = A B C D */
44832 tmp = copy_to_reg (target);
44833 /* tmp = X B C D */
44834 ix86_expand_vector_set (false, tmp, val, 0);
44835 /* target = A B X D */
44836 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44837 const0_rtx, const1_rtx,
44838 GEN_INT (2+4), GEN_INT (0+4)));
44842 gcc_unreachable ();
44847 use_vec_merge = TARGET_SSE4_1;
44851 /* Element 0 handled by vec_merge below. */
44854 use_vec_merge = true;
44860 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44861 store into element 0, then shuffle them back. */
44865 order[0] = GEN_INT (elt);
44866 order[1] = const1_rtx;
44867 order[2] = const2_rtx;
44868 order[3] = GEN_INT (3);
44869 order[elt] = const0_rtx;
44871 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44872 order[1], order[2], order[3]));
44874 ix86_expand_vector_set (false, target, val, 0);
44876 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44877 order[1], order[2], order[3]));
44881 /* For SSE1, we have to reuse the V4SF code. */
44882 rtx t = gen_reg_rtx (V4SFmode);
44883 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44884 emit_move_insn (target, gen_lowpart (mode, t));
44889 use_vec_merge = TARGET_SSE2;
44892 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44896 use_vec_merge = TARGET_SSE4_1;
44903 half_mode = V16QImode;
44909 half_mode = V8HImode;
44915 half_mode = V4SImode;
44921 half_mode = V2DImode;
44927 half_mode = V4SFmode;
44933 half_mode = V2DFmode;
44939 /* Compute offset. */
44943 gcc_assert (i <= 1);
44945 /* Extract the half. */
44946 tmp = gen_reg_rtx (half_mode);
44947 emit_insn (gen_extract[j][i] (tmp, target));
44949 /* Put val in tmp at elt. */
44950 ix86_expand_vector_set (false, tmp, val, elt);
44953 emit_insn (gen_insert[j][i] (target, target, tmp));
44957 if (TARGET_AVX512F)
44959 tmp = gen_reg_rtx (mode);
44960 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44961 gen_rtx_VEC_DUPLICATE (mode, val)));
44962 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44963 force_reg (QImode, GEN_INT (1 << elt))));
44969 if (TARGET_AVX512F)
44971 tmp = gen_reg_rtx (mode);
44972 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44973 gen_rtx_VEC_DUPLICATE (mode, val)));
44974 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44975 force_reg (QImode, GEN_INT (1 << elt))));
44981 if (TARGET_AVX512F)
44983 tmp = gen_reg_rtx (mode);
44984 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44985 gen_rtx_VEC_DUPLICATE (mode, val)));
44986 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44987 force_reg (HImode, GEN_INT (1 << elt))));
44993 if (TARGET_AVX512F)
44995 tmp = gen_reg_rtx (mode);
44996 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44997 gen_rtx_VEC_DUPLICATE (mode, val)));
44998 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44999 force_reg (HImode, GEN_INT (1 << elt))));
45005 if (TARGET_AVX512F && TARGET_AVX512BW)
45007 tmp = gen_reg_rtx (mode);
45008 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45009 gen_rtx_VEC_DUPLICATE (mode, val)));
45010 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
45011 force_reg (SImode, GEN_INT (1 << elt))));
45017 if (TARGET_AVX512F && TARGET_AVX512BW)
45019 tmp = gen_reg_rtx (mode);
45020 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45021 gen_rtx_VEC_DUPLICATE (mode, val)));
45022 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
45023 force_reg (DImode, GEN_INT (1 << elt))));
45035 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45036 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45037 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45041 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45043 emit_move_insn (mem, target);
45045 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45046 emit_move_insn (tmp, val);
45048 emit_move_insn (target, mem);
45053 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45055 machine_mode mode = GET_MODE (vec);
45056 machine_mode inner_mode = GET_MODE_INNER (mode);
45057 bool use_vec_extr = false;
45070 use_vec_extr = true;
45074 use_vec_extr = TARGET_SSE4_1;
45086 tmp = gen_reg_rtx (mode);
45087 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45088 GEN_INT (elt), GEN_INT (elt),
45089 GEN_INT (elt+4), GEN_INT (elt+4)));
45093 tmp = gen_reg_rtx (mode);
45094 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45098 gcc_unreachable ();
45101 use_vec_extr = true;
45106 use_vec_extr = TARGET_SSE4_1;
45120 tmp = gen_reg_rtx (mode);
45121 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45122 GEN_INT (elt), GEN_INT (elt),
45123 GEN_INT (elt), GEN_INT (elt)));
45127 tmp = gen_reg_rtx (mode);
45128 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45132 gcc_unreachable ();
45135 use_vec_extr = true;
45140 /* For SSE1, we have to reuse the V4SF code. */
45141 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45142 gen_lowpart (V4SFmode, vec), elt);
45148 use_vec_extr = TARGET_SSE2;
45151 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45155 use_vec_extr = TARGET_SSE4_1;
45161 tmp = gen_reg_rtx (V4SFmode);
45163 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45165 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45166 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45174 tmp = gen_reg_rtx (V2DFmode);
45176 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45178 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45179 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45187 tmp = gen_reg_rtx (V16QImode);
45189 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45191 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45192 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45200 tmp = gen_reg_rtx (V8HImode);
45202 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45204 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45205 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45213 tmp = gen_reg_rtx (V4SImode);
45215 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45217 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45218 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45226 tmp = gen_reg_rtx (V2DImode);
45228 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45230 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45231 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45237 if (TARGET_AVX512BW)
45239 tmp = gen_reg_rtx (V16HImode);
45241 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45243 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45244 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45250 if (TARGET_AVX512BW)
45252 tmp = gen_reg_rtx (V32QImode);
45254 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45256 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45257 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45263 tmp = gen_reg_rtx (V8SFmode);
45265 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45267 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45268 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45272 tmp = gen_reg_rtx (V4DFmode);
45274 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45276 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45277 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45281 tmp = gen_reg_rtx (V8SImode);
45283 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45285 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45286 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45290 tmp = gen_reg_rtx (V4DImode);
45292 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45294 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45295 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45299 /* ??? Could extract the appropriate HImode element and shift. */
45306 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45307 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45309 /* Let the rtl optimizers know about the zero extension performed. */
45310 if (inner_mode == QImode || inner_mode == HImode)
45312 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45313 target = gen_lowpart (SImode, target);
45316 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45320 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45322 emit_move_insn (mem, vec);
45324 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45325 emit_move_insn (target, tmp);
45329 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45330 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45331 The upper bits of DEST are undefined, though they shouldn't cause
45332 exceptions (some bits from src or all zeros are ok). */
45335 emit_reduc_half (rtx dest, rtx src, int i)
45338 switch (GET_MODE (src))
45342 tem = gen_sse_movhlps (dest, src, src);
45344 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45345 GEN_INT (1 + 4), GEN_INT (1 + 4));
45348 tem = gen_vec_interleave_highv2df (dest, src, src);
45354 d = gen_reg_rtx (V1TImode);
45355 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45360 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45362 tem = gen_avx_shufps256 (dest, src, src,
45363 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45367 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45369 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45377 if (GET_MODE (dest) != V4DImode)
45378 d = gen_reg_rtx (V4DImode);
45379 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45380 gen_lowpart (V4DImode, src),
45385 d = gen_reg_rtx (V2TImode);
45386 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45397 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45398 gen_lowpart (V16SImode, src),
45399 gen_lowpart (V16SImode, src),
45400 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45401 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45402 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45403 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45404 GEN_INT (0xC), GEN_INT (0xD),
45405 GEN_INT (0xE), GEN_INT (0xF),
45406 GEN_INT (0x10), GEN_INT (0x11),
45407 GEN_INT (0x12), GEN_INT (0x13),
45408 GEN_INT (0x14), GEN_INT (0x15),
45409 GEN_INT (0x16), GEN_INT (0x17));
45411 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45412 gen_lowpart (V16SImode, src),
45413 GEN_INT (i == 128 ? 0x2 : 0x1),
45417 GEN_INT (i == 128 ? 0x6 : 0x5),
45421 GEN_INT (i == 128 ? 0xA : 0x9),
45425 GEN_INT (i == 128 ? 0xE : 0xD),
45431 gcc_unreachable ();
45435 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45438 /* Expand a vector reduction. FN is the binary pattern to reduce;
45439 DEST is the destination; IN is the input vector. */
45442 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45444 rtx half, dst, vec = in;
45445 machine_mode mode = GET_MODE (in);
45448 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45450 && mode == V8HImode
45451 && fn == gen_uminv8hi3)
45453 emit_insn (gen_sse4_1_phminposuw (dest, in));
45457 for (i = GET_MODE_BITSIZE (mode);
45458 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45461 half = gen_reg_rtx (mode);
45462 emit_reduc_half (half, vec, i);
45463 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45466 dst = gen_reg_rtx (mode);
45467 emit_insn (fn (dst, half, vec));
45472 /* Target hook for scalar_mode_supported_p. */
45474 ix86_scalar_mode_supported_p (machine_mode mode)
45476 if (DECIMAL_FLOAT_MODE_P (mode))
45477 return default_decimal_float_supported_p ();
45478 else if (mode == TFmode)
45481 return default_scalar_mode_supported_p (mode);
45484 /* Implements target hook vector_mode_supported_p. */
45486 ix86_vector_mode_supported_p (machine_mode mode)
45488 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45490 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45492 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45494 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45496 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45498 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45503 /* Implement target hook libgcc_floating_mode_supported_p. */
45505 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45515 #ifdef IX86_NO_LIBGCC_TFMODE
45517 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45518 return TARGET_LONG_DOUBLE_128;
45528 /* Target hook for c_mode_for_suffix. */
45529 static machine_mode
45530 ix86_c_mode_for_suffix (char suffix)
45540 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45542 We do this in the new i386 backend to maintain source compatibility
45543 with the old cc0-based compiler. */
45546 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45548 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45550 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45555 /* Implements target vector targetm.asm.encode_section_info. */
45557 static void ATTRIBUTE_UNUSED
45558 ix86_encode_section_info (tree decl, rtx rtl, int first)
45560 default_encode_section_info (decl, rtl, first);
45562 if (ix86_in_large_data_p (decl))
45563 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45566 /* Worker function for REVERSE_CONDITION. */
45569 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45571 return (mode != CCFPmode && mode != CCFPUmode
45572 ? reverse_condition (code)
45573 : reverse_condition_maybe_unordered (code));
45576 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45580 output_387_reg_move (rtx insn, rtx *operands)
45582 if (REG_P (operands[0]))
45584 if (REG_P (operands[1])
45585 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45587 if (REGNO (operands[0]) == FIRST_STACK_REG)
45588 return output_387_ffreep (operands, 0);
45589 return "fstp\t%y0";
45591 if (STACK_TOP_P (operands[0]))
45592 return "fld%Z1\t%y1";
45595 else if (MEM_P (operands[0]))
45597 gcc_assert (REG_P (operands[1]));
45598 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45599 return "fstp%Z0\t%y0";
45602 /* There is no non-popping store to memory for XFmode.
45603 So if we need one, follow the store with a load. */
45604 if (GET_MODE (operands[0]) == XFmode)
45605 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45607 return "fst%Z0\t%y0";
45614 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45615 FP status register is set. */
45618 ix86_emit_fp_unordered_jump (rtx label)
45620 rtx reg = gen_reg_rtx (HImode);
45623 emit_insn (gen_x86_fnstsw_1 (reg));
45625 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45627 emit_insn (gen_x86_sahf_1 (reg));
45629 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45630 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45634 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45636 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45637 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45640 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45641 gen_rtx_LABEL_REF (VOIDmode, label),
45643 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45645 emit_jump_insn (temp);
45646 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45649 /* Output code to perform a log1p XFmode calculation. */
45651 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45653 rtx_code_label *label1 = gen_label_rtx ();
45654 rtx_code_label *label2 = gen_label_rtx ();
45656 rtx tmp = gen_reg_rtx (XFmode);
45657 rtx tmp2 = gen_reg_rtx (XFmode);
45660 emit_insn (gen_absxf2 (tmp, op1));
45661 test = gen_rtx_GE (VOIDmode, tmp,
45662 CONST_DOUBLE_FROM_REAL_VALUE (
45663 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45665 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45667 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45668 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45669 emit_jump (label2);
45671 emit_label (label1);
45672 emit_move_insn (tmp, CONST1_RTX (XFmode));
45673 emit_insn (gen_addxf3 (tmp, op1, tmp));
45674 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45675 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45677 emit_label (label2);
45680 /* Emit code for round calculation. */
45681 void ix86_emit_i387_round (rtx op0, rtx op1)
45683 machine_mode inmode = GET_MODE (op1);
45684 machine_mode outmode = GET_MODE (op0);
45685 rtx e1, e2, res, tmp, tmp1, half;
45686 rtx scratch = gen_reg_rtx (HImode);
45687 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45688 rtx_code_label *jump_label = gen_label_rtx ();
45690 rtx (*gen_abs) (rtx, rtx);
45691 rtx (*gen_neg) (rtx, rtx);
45696 gen_abs = gen_abssf2;
45699 gen_abs = gen_absdf2;
45702 gen_abs = gen_absxf2;
45705 gcc_unreachable ();
45711 gen_neg = gen_negsf2;
45714 gen_neg = gen_negdf2;
45717 gen_neg = gen_negxf2;
45720 gen_neg = gen_neghi2;
45723 gen_neg = gen_negsi2;
45726 gen_neg = gen_negdi2;
45729 gcc_unreachable ();
45732 e1 = gen_reg_rtx (inmode);
45733 e2 = gen_reg_rtx (inmode);
45734 res = gen_reg_rtx (outmode);
45736 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45738 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45740 /* scratch = fxam(op1) */
45741 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45742 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45744 /* e1 = fabs(op1) */
45745 emit_insn (gen_abs (e1, op1));
45747 /* e2 = e1 + 0.5 */
45748 half = force_reg (inmode, half);
45749 emit_insn (gen_rtx_SET (VOIDmode, e2,
45750 gen_rtx_PLUS (inmode, e1, half)));
45752 /* res = floor(e2) */
45753 if (inmode != XFmode)
45755 tmp1 = gen_reg_rtx (XFmode);
45757 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45758 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45768 rtx tmp0 = gen_reg_rtx (XFmode);
45770 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45772 emit_insn (gen_rtx_SET (VOIDmode, res,
45773 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45774 UNSPEC_TRUNC_NOOP)));
45778 emit_insn (gen_frndintxf2_floor (res, tmp1));
45781 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45784 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45787 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45790 gcc_unreachable ();
45793 /* flags = signbit(a) */
45794 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45796 /* if (flags) then res = -res */
45797 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45798 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45799 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45801 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45802 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45803 JUMP_LABEL (insn) = jump_label;
45805 emit_insn (gen_neg (res, res));
45807 emit_label (jump_label);
45808 LABEL_NUSES (jump_label) = 1;
45810 emit_move_insn (op0, res);
45813 /* Output code to perform a Newton-Rhapson approximation of a single precision
45814 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45816 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45818 rtx x0, x1, e0, e1;
45820 x0 = gen_reg_rtx (mode);
45821 e0 = gen_reg_rtx (mode);
45822 e1 = gen_reg_rtx (mode);
45823 x1 = gen_reg_rtx (mode);
45825 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45827 b = force_reg (mode, b);
45829 /* x0 = rcp(b) estimate */
45830 if (mode == V16SFmode || mode == V8DFmode)
45831 emit_insn (gen_rtx_SET (VOIDmode, x0,
45832 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45835 emit_insn (gen_rtx_SET (VOIDmode, x0,
45836 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45840 emit_insn (gen_rtx_SET (VOIDmode, e0,
45841 gen_rtx_MULT (mode, x0, b)));
45844 emit_insn (gen_rtx_SET (VOIDmode, e0,
45845 gen_rtx_MULT (mode, x0, e0)));
45848 emit_insn (gen_rtx_SET (VOIDmode, e1,
45849 gen_rtx_PLUS (mode, x0, x0)));
45852 emit_insn (gen_rtx_SET (VOIDmode, x1,
45853 gen_rtx_MINUS (mode, e1, e0)));
45856 emit_insn (gen_rtx_SET (VOIDmode, res,
45857 gen_rtx_MULT (mode, a, x1)));
45860 /* Output code to perform a Newton-Rhapson approximation of a
45861 single precision floating point [reciprocal] square root. */
45863 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45866 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45870 x0 = gen_reg_rtx (mode);
45871 e0 = gen_reg_rtx (mode);
45872 e1 = gen_reg_rtx (mode);
45873 e2 = gen_reg_rtx (mode);
45874 e3 = gen_reg_rtx (mode);
45876 real_from_integer (&r, VOIDmode, -3, SIGNED);
45877 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45879 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45880 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45881 unspec = UNSPEC_RSQRT;
45883 if (VECTOR_MODE_P (mode))
45885 mthree = ix86_build_const_vector (mode, true, mthree);
45886 mhalf = ix86_build_const_vector (mode, true, mhalf);
45887 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45888 if (GET_MODE_SIZE (mode) == 64)
45889 unspec = UNSPEC_RSQRT14;
45892 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45893 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45895 a = force_reg (mode, a);
45897 /* x0 = rsqrt(a) estimate */
45898 emit_insn (gen_rtx_SET (VOIDmode, x0,
45899 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45902 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45907 zero = gen_reg_rtx (mode);
45908 mask = gen_reg_rtx (mode);
45910 zero = force_reg (mode, CONST0_RTX(mode));
45912 /* Handle masked compare. */
45913 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45915 mask = gen_reg_rtx (HImode);
45916 /* Imm value 0x4 corresponds to not-equal comparison. */
45917 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45918 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45922 emit_insn (gen_rtx_SET (VOIDmode, mask,
45923 gen_rtx_NE (mode, zero, a)));
45925 emit_insn (gen_rtx_SET (VOIDmode, x0,
45926 gen_rtx_AND (mode, x0, mask)));
45931 emit_insn (gen_rtx_SET (VOIDmode, e0,
45932 gen_rtx_MULT (mode, x0, a)));
45934 emit_insn (gen_rtx_SET (VOIDmode, e1,
45935 gen_rtx_MULT (mode, e0, x0)));
45938 mthree = force_reg (mode, mthree);
45939 emit_insn (gen_rtx_SET (VOIDmode, e2,
45940 gen_rtx_PLUS (mode, e1, mthree)));
45942 mhalf = force_reg (mode, mhalf);
45944 /* e3 = -.5 * x0 */
45945 emit_insn (gen_rtx_SET (VOIDmode, e3,
45946 gen_rtx_MULT (mode, x0, mhalf)));
45948 /* e3 = -.5 * e0 */
45949 emit_insn (gen_rtx_SET (VOIDmode, e3,
45950 gen_rtx_MULT (mode, e0, mhalf)));
45951 /* ret = e2 * e3 */
45952 emit_insn (gen_rtx_SET (VOIDmode, res,
45953 gen_rtx_MULT (mode, e2, e3)));
45956 #ifdef TARGET_SOLARIS
45957 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45960 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45963 /* With Binutils 2.15, the "@unwind" marker must be specified on
45964 every occurrence of the ".eh_frame" section, not just the first
45967 && strcmp (name, ".eh_frame") == 0)
45969 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45970 flags & SECTION_WRITE ? "aw" : "a");
45975 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45977 solaris_elf_asm_comdat_section (name, flags, decl);
45982 default_elf_asm_named_section (name, flags, decl);
45984 #endif /* TARGET_SOLARIS */
45986 /* Return the mangling of TYPE if it is an extended fundamental type. */
45988 static const char *
45989 ix86_mangle_type (const_tree type)
45991 type = TYPE_MAIN_VARIANT (type);
45993 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45994 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45997 switch (TYPE_MODE (type))
46000 /* __float128 is "g". */
46003 /* "long double" or __float80 is "e". */
46010 /* For 32-bit code we can save PIC register setup by using
46011 __stack_chk_fail_local hidden function instead of calling
46012 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46013 register, so it is better to call __stack_chk_fail directly. */
46015 static tree ATTRIBUTE_UNUSED
46016 ix86_stack_protect_fail (void)
46018 return TARGET_64BIT
46019 ? default_external_stack_protect_fail ()
46020 : default_hidden_stack_protect_fail ();
46023 /* Select a format to encode pointers in exception handling data. CODE
46024 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46025 true if the symbol may be affected by dynamic relocations.
46027 ??? All x86 object file formats are capable of representing this.
46028 After all, the relocation needed is the same as for the call insn.
46029 Whether or not a particular assembler allows us to enter such, I
46030 guess we'll have to see. */
46032 asm_preferred_eh_data_format (int code, int global)
46036 int type = DW_EH_PE_sdata8;
46038 || ix86_cmodel == CM_SMALL_PIC
46039 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46040 type = DW_EH_PE_sdata4;
46041 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46043 if (ix86_cmodel == CM_SMALL
46044 || (ix86_cmodel == CM_MEDIUM && code))
46045 return DW_EH_PE_udata4;
46046 return DW_EH_PE_absptr;
46049 /* Expand copysign from SIGN to the positive value ABS_VALUE
46050 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46053 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46055 machine_mode mode = GET_MODE (sign);
46056 rtx sgn = gen_reg_rtx (mode);
46057 if (mask == NULL_RTX)
46059 machine_mode vmode;
46061 if (mode == SFmode)
46063 else if (mode == DFmode)
46068 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46069 if (!VECTOR_MODE_P (mode))
46071 /* We need to generate a scalar mode mask in this case. */
46072 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46073 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46074 mask = gen_reg_rtx (mode);
46075 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46079 mask = gen_rtx_NOT (mode, mask);
46080 emit_insn (gen_rtx_SET (VOIDmode, sgn,
46081 gen_rtx_AND (mode, mask, sign)));
46082 emit_insn (gen_rtx_SET (VOIDmode, result,
46083 gen_rtx_IOR (mode, abs_value, sgn)));
46086 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46087 mask for masking out the sign-bit is stored in *SMASK, if that is
46090 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46092 machine_mode vmode, mode = GET_MODE (op0);
46095 xa = gen_reg_rtx (mode);
46096 if (mode == SFmode)
46098 else if (mode == DFmode)
46102 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46103 if (!VECTOR_MODE_P (mode))
46105 /* We need to generate a scalar mode mask in this case. */
46106 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46107 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46108 mask = gen_reg_rtx (mode);
46109 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46111 emit_insn (gen_rtx_SET (VOIDmode, xa,
46112 gen_rtx_AND (mode, op0, mask)));
46120 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46121 swapping the operands if SWAP_OPERANDS is true. The expanded
46122 code is a forward jump to a newly created label in case the
46123 comparison is true. The generated label rtx is returned. */
46124 static rtx_code_label *
46125 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46126 bool swap_operands)
46128 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46129 rtx_code_label *label;
46133 std::swap (op0, op1);
46135 label = gen_label_rtx ();
46136 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46137 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46138 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46139 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46140 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46141 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46142 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46143 JUMP_LABEL (tmp) = label;
46148 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46149 using comparison code CODE. Operands are swapped for the comparison if
46150 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46152 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46153 bool swap_operands)
46155 rtx (*insn)(rtx, rtx, rtx, rtx);
46156 machine_mode mode = GET_MODE (op0);
46157 rtx mask = gen_reg_rtx (mode);
46160 std::swap (op0, op1);
46162 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46164 emit_insn (insn (mask, op0, op1,
46165 gen_rtx_fmt_ee (code, mode, op0, op1)));
46169 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46170 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46172 ix86_gen_TWO52 (machine_mode mode)
46174 REAL_VALUE_TYPE TWO52r;
46177 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46178 TWO52 = const_double_from_real_value (TWO52r, mode);
46179 TWO52 = force_reg (mode, TWO52);
46184 /* Expand SSE sequence for computing lround from OP1 storing
46187 ix86_expand_lround (rtx op0, rtx op1)
46189 /* C code for the stuff we're doing below:
46190 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46193 machine_mode mode = GET_MODE (op1);
46194 const struct real_format *fmt;
46195 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46198 /* load nextafter (0.5, 0.0) */
46199 fmt = REAL_MODE_FORMAT (mode);
46200 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46201 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46203 /* adj = copysign (0.5, op1) */
46204 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46205 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46207 /* adj = op1 + adj */
46208 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46210 /* op0 = (imode)adj */
46211 expand_fix (op0, adj, 0);
46214 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46217 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46219 /* C code for the stuff we're doing below (for do_floor):
46221 xi -= (double)xi > op1 ? 1 : 0;
46224 machine_mode fmode = GET_MODE (op1);
46225 machine_mode imode = GET_MODE (op0);
46226 rtx ireg, freg, tmp;
46227 rtx_code_label *label;
46229 /* reg = (long)op1 */
46230 ireg = gen_reg_rtx (imode);
46231 expand_fix (ireg, op1, 0);
46233 /* freg = (double)reg */
46234 freg = gen_reg_rtx (fmode);
46235 expand_float (freg, ireg, 0);
46237 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46238 label = ix86_expand_sse_compare_and_jump (UNLE,
46239 freg, op1, !do_floor);
46240 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46241 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46242 emit_move_insn (ireg, tmp);
46244 emit_label (label);
46245 LABEL_NUSES (label) = 1;
46247 emit_move_insn (op0, ireg);
46250 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46251 result in OPERAND0. */
46253 ix86_expand_rint (rtx operand0, rtx operand1)
46255 /* C code for the stuff we're doing below:
46256 xa = fabs (operand1);
46257 if (!isless (xa, 2**52))
46259 xa = xa + 2**52 - 2**52;
46260 return copysign (xa, operand1);
46262 machine_mode mode = GET_MODE (operand0);
46263 rtx res, xa, TWO52, mask;
46264 rtx_code_label *label;
46266 res = gen_reg_rtx (mode);
46267 emit_move_insn (res, operand1);
46269 /* xa = abs (operand1) */
46270 xa = ix86_expand_sse_fabs (res, &mask);
46272 /* if (!isless (xa, TWO52)) goto label; */
46273 TWO52 = ix86_gen_TWO52 (mode);
46274 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46276 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46277 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46279 ix86_sse_copysign_to_positive (res, xa, res, mask);
46281 emit_label (label);
46282 LABEL_NUSES (label) = 1;
46284 emit_move_insn (operand0, res);
46287 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46290 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46292 /* C code for the stuff we expand below.
46293 double xa = fabs (x), x2;
46294 if (!isless (xa, TWO52))
46296 xa = xa + TWO52 - TWO52;
46297 x2 = copysign (xa, x);
46306 machine_mode mode = GET_MODE (operand0);
46307 rtx xa, TWO52, tmp, one, res, mask;
46308 rtx_code_label *label;
46310 TWO52 = ix86_gen_TWO52 (mode);
46312 /* Temporary for holding the result, initialized to the input
46313 operand to ease control flow. */
46314 res = gen_reg_rtx (mode);
46315 emit_move_insn (res, operand1);
46317 /* xa = abs (operand1) */
46318 xa = ix86_expand_sse_fabs (res, &mask);
46320 /* if (!isless (xa, TWO52)) goto label; */
46321 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46323 /* xa = xa + TWO52 - TWO52; */
46324 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46325 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46327 /* xa = copysign (xa, operand1) */
46328 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46330 /* generate 1.0 or -1.0 */
46331 one = force_reg (mode,
46332 const_double_from_real_value (do_floor
46333 ? dconst1 : dconstm1, mode));
46335 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46336 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46337 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46338 gen_rtx_AND (mode, one, tmp)));
46339 /* We always need to subtract here to preserve signed zero. */
46340 tmp = expand_simple_binop (mode, MINUS,
46341 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46342 emit_move_insn (res, tmp);
46344 emit_label (label);
46345 LABEL_NUSES (label) = 1;
46347 emit_move_insn (operand0, res);
46350 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46353 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46355 /* C code for the stuff we expand below.
46356 double xa = fabs (x), x2;
46357 if (!isless (xa, TWO52))
46359 x2 = (double)(long)x;
46366 if (HONOR_SIGNED_ZEROS (mode))
46367 return copysign (x2, x);
46370 machine_mode mode = GET_MODE (operand0);
46371 rtx xa, xi, TWO52, tmp, one, res, mask;
46372 rtx_code_label *label;
46374 TWO52 = ix86_gen_TWO52 (mode);
46376 /* Temporary for holding the result, initialized to the input
46377 operand to ease control flow. */
46378 res = gen_reg_rtx (mode);
46379 emit_move_insn (res, operand1);
46381 /* xa = abs (operand1) */
46382 xa = ix86_expand_sse_fabs (res, &mask);
46384 /* if (!isless (xa, TWO52)) goto label; */
46385 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46387 /* xa = (double)(long)x */
46388 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46389 expand_fix (xi, res, 0);
46390 expand_float (xa, xi, 0);
46393 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46395 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46396 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46397 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46398 gen_rtx_AND (mode, one, tmp)));
46399 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46400 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46401 emit_move_insn (res, tmp);
46403 if (HONOR_SIGNED_ZEROS (mode))
46404 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46406 emit_label (label);
46407 LABEL_NUSES (label) = 1;
46409 emit_move_insn (operand0, res);
46412 /* Expand SSE sequence for computing round from OPERAND1 storing
46413 into OPERAND0. Sequence that works without relying on DImode truncation
46414 via cvttsd2siq that is only available on 64bit targets. */
46416 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46418 /* C code for the stuff we expand below.
46419 double xa = fabs (x), xa2, x2;
46420 if (!isless (xa, TWO52))
46422 Using the absolute value and copying back sign makes
46423 -0.0 -> -0.0 correct.
46424 xa2 = xa + TWO52 - TWO52;
46429 else if (dxa > 0.5)
46431 x2 = copysign (xa2, x);
46434 machine_mode mode = GET_MODE (operand0);
46435 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46436 rtx_code_label *label;
46438 TWO52 = ix86_gen_TWO52 (mode);
46440 /* Temporary for holding the result, initialized to the input
46441 operand to ease control flow. */
46442 res = gen_reg_rtx (mode);
46443 emit_move_insn (res, operand1);
46445 /* xa = abs (operand1) */
46446 xa = ix86_expand_sse_fabs (res, &mask);
46448 /* if (!isless (xa, TWO52)) goto label; */
46449 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46451 /* xa2 = xa + TWO52 - TWO52; */
46452 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46453 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46455 /* dxa = xa2 - xa; */
46456 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46458 /* generate 0.5, 1.0 and -0.5 */
46459 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46460 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46461 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46465 tmp = gen_reg_rtx (mode);
46466 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46467 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46468 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46469 gen_rtx_AND (mode, one, tmp)));
46470 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46471 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46472 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46473 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46474 gen_rtx_AND (mode, one, tmp)));
46475 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46477 /* res = copysign (xa2, operand1) */
46478 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46480 emit_label (label);
46481 LABEL_NUSES (label) = 1;
46483 emit_move_insn (operand0, res);
46486 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46489 ix86_expand_trunc (rtx operand0, rtx operand1)
46491 /* C code for SSE variant we expand below.
46492 double xa = fabs (x), x2;
46493 if (!isless (xa, TWO52))
46495 x2 = (double)(long)x;
46496 if (HONOR_SIGNED_ZEROS (mode))
46497 return copysign (x2, x);
46500 machine_mode mode = GET_MODE (operand0);
46501 rtx xa, xi, TWO52, res, mask;
46502 rtx_code_label *label;
46504 TWO52 = ix86_gen_TWO52 (mode);
46506 /* Temporary for holding the result, initialized to the input
46507 operand to ease control flow. */
46508 res = gen_reg_rtx (mode);
46509 emit_move_insn (res, operand1);
46511 /* xa = abs (operand1) */
46512 xa = ix86_expand_sse_fabs (res, &mask);
46514 /* if (!isless (xa, TWO52)) goto label; */
46515 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46517 /* x = (double)(long)x */
46518 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46519 expand_fix (xi, res, 0);
46520 expand_float (res, xi, 0);
46522 if (HONOR_SIGNED_ZEROS (mode))
46523 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46525 emit_label (label);
46526 LABEL_NUSES (label) = 1;
46528 emit_move_insn (operand0, res);
46531 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46534 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46536 machine_mode mode = GET_MODE (operand0);
46537 rtx xa, mask, TWO52, one, res, smask, tmp;
46538 rtx_code_label *label;
46540 /* C code for SSE variant we expand below.
46541 double xa = fabs (x), x2;
46542 if (!isless (xa, TWO52))
46544 xa2 = xa + TWO52 - TWO52;
46548 x2 = copysign (xa2, x);
46552 TWO52 = ix86_gen_TWO52 (mode);
46554 /* Temporary for holding the result, initialized to the input
46555 operand to ease control flow. */
46556 res = gen_reg_rtx (mode);
46557 emit_move_insn (res, operand1);
46559 /* xa = abs (operand1) */
46560 xa = ix86_expand_sse_fabs (res, &smask);
46562 /* if (!isless (xa, TWO52)) goto label; */
46563 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46565 /* res = xa + TWO52 - TWO52; */
46566 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46567 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46568 emit_move_insn (res, tmp);
46571 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46573 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46574 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46575 emit_insn (gen_rtx_SET (VOIDmode, mask,
46576 gen_rtx_AND (mode, mask, one)));
46577 tmp = expand_simple_binop (mode, MINUS,
46578 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46579 emit_move_insn (res, tmp);
46581 /* res = copysign (res, operand1) */
46582 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46584 emit_label (label);
46585 LABEL_NUSES (label) = 1;
46587 emit_move_insn (operand0, res);
46590 /* Expand SSE sequence for computing round from OPERAND1 storing
46593 ix86_expand_round (rtx operand0, rtx operand1)
46595 /* C code for the stuff we're doing below:
46596 double xa = fabs (x);
46597 if (!isless (xa, TWO52))
46599 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46600 return copysign (xa, x);
46602 machine_mode mode = GET_MODE (operand0);
46603 rtx res, TWO52, xa, xi, half, mask;
46604 rtx_code_label *label;
46605 const struct real_format *fmt;
46606 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46608 /* Temporary for holding the result, initialized to the input
46609 operand to ease control flow. */
46610 res = gen_reg_rtx (mode);
46611 emit_move_insn (res, operand1);
46613 TWO52 = ix86_gen_TWO52 (mode);
46614 xa = ix86_expand_sse_fabs (res, &mask);
46615 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46617 /* load nextafter (0.5, 0.0) */
46618 fmt = REAL_MODE_FORMAT (mode);
46619 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46620 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46622 /* xa = xa + 0.5 */
46623 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46624 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46626 /* xa = (double)(int64_t)xa */
46627 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46628 expand_fix (xi, xa, 0);
46629 expand_float (xa, xi, 0);
46631 /* res = copysign (xa, operand1) */
46632 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46634 emit_label (label);
46635 LABEL_NUSES (label) = 1;
46637 emit_move_insn (operand0, res);
46640 /* Expand SSE sequence for computing round
46641 from OP1 storing into OP0 using sse4 round insn. */
46643 ix86_expand_round_sse4 (rtx op0, rtx op1)
46645 machine_mode mode = GET_MODE (op0);
46646 rtx e1, e2, res, half;
46647 const struct real_format *fmt;
46648 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46649 rtx (*gen_copysign) (rtx, rtx, rtx);
46650 rtx (*gen_round) (rtx, rtx, rtx);
46655 gen_copysign = gen_copysignsf3;
46656 gen_round = gen_sse4_1_roundsf2;
46659 gen_copysign = gen_copysigndf3;
46660 gen_round = gen_sse4_1_rounddf2;
46663 gcc_unreachable ();
46666 /* round (a) = trunc (a + copysign (0.5, a)) */
46668 /* load nextafter (0.5, 0.0) */
46669 fmt = REAL_MODE_FORMAT (mode);
46670 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46671 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46672 half = const_double_from_real_value (pred_half, mode);
46674 /* e1 = copysign (0.5, op1) */
46675 e1 = gen_reg_rtx (mode);
46676 emit_insn (gen_copysign (e1, half, op1));
46678 /* e2 = op1 + e1 */
46679 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46681 /* res = trunc (e2) */
46682 res = gen_reg_rtx (mode);
46683 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46685 emit_move_insn (op0, res);
46689 /* Table of valid machine attributes. */
46690 static const struct attribute_spec ix86_attribute_table[] =
46692 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46693 affects_type_identity } */
46694 /* Stdcall attribute says callee is responsible for popping arguments
46695 if they are not variable. */
46696 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46698 /* Fastcall attribute says callee is responsible for popping arguments
46699 if they are not variable. */
46700 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46702 /* Thiscall attribute says callee is responsible for popping arguments
46703 if they are not variable. */
46704 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46706 /* Cdecl attribute says the callee is a normal C declaration */
46707 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46709 /* Regparm attribute specifies how many integer arguments are to be
46710 passed in registers. */
46711 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46713 /* Sseregparm attribute says we are using x86_64 calling conventions
46714 for FP arguments. */
46715 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46717 /* The transactional memory builtins are implicitly regparm or fastcall
46718 depending on the ABI. Override the generic do-nothing attribute that
46719 these builtins were declared with. */
46720 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46722 /* force_align_arg_pointer says this function realigns the stack at entry. */
46723 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46724 false, true, true, ix86_handle_cconv_attribute, false },
46725 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46726 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46727 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46728 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46731 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46733 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46735 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46736 SUBTARGET_ATTRIBUTE_TABLE,
46738 /* ms_abi and sysv_abi calling convention function attributes. */
46739 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46740 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46741 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46743 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46744 ix86_handle_callee_pop_aggregate_return, true },
46746 { NULL, 0, 0, false, false, false, NULL, false }
46749 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46751 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46756 switch (type_of_cost)
46759 return ix86_cost->scalar_stmt_cost;
46762 return ix86_cost->scalar_load_cost;
46765 return ix86_cost->scalar_store_cost;
46768 return ix86_cost->vec_stmt_cost;
46771 return ix86_cost->vec_align_load_cost;
46774 return ix86_cost->vec_store_cost;
46776 case vec_to_scalar:
46777 return ix86_cost->vec_to_scalar_cost;
46779 case scalar_to_vec:
46780 return ix86_cost->scalar_to_vec_cost;
46782 case unaligned_load:
46783 case unaligned_store:
46784 return ix86_cost->vec_unalign_load_cost;
46786 case cond_branch_taken:
46787 return ix86_cost->cond_taken_branch_cost;
46789 case cond_branch_not_taken:
46790 return ix86_cost->cond_not_taken_branch_cost;
46793 case vec_promote_demote:
46794 return ix86_cost->vec_stmt_cost;
46796 case vec_construct:
46797 elements = TYPE_VECTOR_SUBPARTS (vectype);
46798 return elements / 2 + 1;
46801 gcc_unreachable ();
46805 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46806 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46807 insn every time. */
46809 static GTY(()) rtx_insn *vselect_insn;
46811 /* Initialize vselect_insn. */
46814 init_vselect_insn (void)
46819 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46820 for (i = 0; i < MAX_VECT_LEN; ++i)
46821 XVECEXP (x, 0, i) = const0_rtx;
46822 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46824 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46826 vselect_insn = emit_insn (x);
46830 /* Construct (set target (vec_select op0 (parallel perm))) and
46831 return true if that's a valid instruction in the active ISA. */
46834 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46835 unsigned nelt, bool testing_p)
46838 rtx x, save_vconcat;
46841 if (vselect_insn == NULL_RTX)
46842 init_vselect_insn ();
46844 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46845 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46846 for (i = 0; i < nelt; ++i)
46847 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46848 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46849 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46850 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46851 SET_DEST (PATTERN (vselect_insn)) = target;
46852 icode = recog_memoized (vselect_insn);
46854 if (icode >= 0 && !testing_p)
46855 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46857 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46858 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46859 INSN_CODE (vselect_insn) = -1;
46864 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46867 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46868 const unsigned char *perm, unsigned nelt,
46871 machine_mode v2mode;
46875 if (vselect_insn == NULL_RTX)
46876 init_vselect_insn ();
46878 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46879 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46880 PUT_MODE (x, v2mode);
46883 ok = expand_vselect (target, x, perm, nelt, testing_p);
46884 XEXP (x, 0) = const0_rtx;
46885 XEXP (x, 1) = const0_rtx;
46889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46890 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46893 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46895 machine_mode vmode = d->vmode;
46896 unsigned i, mask, nelt = d->nelt;
46897 rtx target, op0, op1, x;
46898 rtx rperm[32], vperm;
46900 if (d->one_operand_p)
46902 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46903 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46905 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46907 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46909 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46914 /* This is a blend, not a permute. Elements must stay in their
46915 respective lanes. */
46916 for (i = 0; i < nelt; ++i)
46918 unsigned e = d->perm[i];
46919 if (!(e == i || e == i + nelt))
46926 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46927 decision should be extracted elsewhere, so that we only try that
46928 sequence once all budget==3 options have been tried. */
46929 target = d->target;
46948 for (i = 0; i < nelt; ++i)
46949 mask |= (d->perm[i] >= nelt) << i;
46953 for (i = 0; i < 2; ++i)
46954 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46959 for (i = 0; i < 4; ++i)
46960 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46965 /* See if bytes move in pairs so we can use pblendw with
46966 an immediate argument, rather than pblendvb with a vector
46968 for (i = 0; i < 16; i += 2)
46969 if (d->perm[i] + 1 != d->perm[i + 1])
46972 for (i = 0; i < nelt; ++i)
46973 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46976 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46977 vperm = force_reg (vmode, vperm);
46979 if (GET_MODE_SIZE (vmode) == 16)
46980 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46982 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46983 if (target != d->target)
46984 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46988 for (i = 0; i < 8; ++i)
46989 mask |= (d->perm[i * 2] >= 16) << i;
46994 target = gen_reg_rtx (vmode);
46995 op0 = gen_lowpart (vmode, op0);
46996 op1 = gen_lowpart (vmode, op1);
47000 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47001 for (i = 0; i < 32; i += 2)
47002 if (d->perm[i] + 1 != d->perm[i + 1])
47004 /* See if bytes move in quadruplets. If yes, vpblendd
47005 with immediate can be used. */
47006 for (i = 0; i < 32; i += 4)
47007 if (d->perm[i] + 2 != d->perm[i + 2])
47011 /* See if bytes move the same in both lanes. If yes,
47012 vpblendw with immediate can be used. */
47013 for (i = 0; i < 16; i += 2)
47014 if (d->perm[i] + 16 != d->perm[i + 16])
47017 /* Use vpblendw. */
47018 for (i = 0; i < 16; ++i)
47019 mask |= (d->perm[i * 2] >= 32) << i;
47024 /* Use vpblendd. */
47025 for (i = 0; i < 8; ++i)
47026 mask |= (d->perm[i * 4] >= 32) << i;
47031 /* See if words move in pairs. If yes, vpblendd can be used. */
47032 for (i = 0; i < 16; i += 2)
47033 if (d->perm[i] + 1 != d->perm[i + 1])
47037 /* See if words move the same in both lanes. If not,
47038 vpblendvb must be used. */
47039 for (i = 0; i < 8; i++)
47040 if (d->perm[i] + 8 != d->perm[i + 8])
47042 /* Use vpblendvb. */
47043 for (i = 0; i < 32; ++i)
47044 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47048 target = gen_reg_rtx (vmode);
47049 op0 = gen_lowpart (vmode, op0);
47050 op1 = gen_lowpart (vmode, op1);
47051 goto finish_pblendvb;
47054 /* Use vpblendw. */
47055 for (i = 0; i < 16; ++i)
47056 mask |= (d->perm[i] >= 16) << i;
47060 /* Use vpblendd. */
47061 for (i = 0; i < 8; ++i)
47062 mask |= (d->perm[i * 2] >= 16) << i;
47067 /* Use vpblendd. */
47068 for (i = 0; i < 4; ++i)
47069 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47074 gcc_unreachable ();
47077 /* This matches five different patterns with the different modes. */
47078 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
47079 x = gen_rtx_SET (VOIDmode, target, x);
47081 if (target != d->target)
47082 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47087 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47088 in terms of the variable form of vpermilps.
47090 Note that we will have already failed the immediate input vpermilps,
47091 which requires that the high and low part shuffle be identical; the
47092 variable form doesn't require that. */
47095 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47097 rtx rperm[8], vperm;
47100 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47103 /* We can only permute within the 128-bit lane. */
47104 for (i = 0; i < 8; ++i)
47106 unsigned e = d->perm[i];
47107 if (i < 4 ? e >= 4 : e < 4)
47114 for (i = 0; i < 8; ++i)
47116 unsigned e = d->perm[i];
47118 /* Within each 128-bit lane, the elements of op0 are numbered
47119 from 0 and the elements of op1 are numbered from 4. */
47125 rperm[i] = GEN_INT (e);
47128 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47129 vperm = force_reg (V8SImode, vperm);
47130 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47135 /* Return true if permutation D can be performed as VMODE permutation
47139 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47141 unsigned int i, j, chunk;
47143 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47144 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47145 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47148 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47151 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47152 for (i = 0; i < d->nelt; i += chunk)
47153 if (d->perm[i] & (chunk - 1))
47156 for (j = 1; j < chunk; ++j)
47157 if (d->perm[i] + j != d->perm[i + j])
47163 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47164 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47167 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47169 unsigned i, nelt, eltsz, mask;
47170 unsigned char perm[64];
47171 machine_mode vmode = V16QImode;
47172 rtx rperm[64], vperm, target, op0, op1;
47176 if (!d->one_operand_p)
47178 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47181 && valid_perm_using_mode_p (V2TImode, d))
47186 /* Use vperm2i128 insn. The pattern uses
47187 V4DImode instead of V2TImode. */
47188 target = d->target;
47189 if (d->vmode != V4DImode)
47190 target = gen_reg_rtx (V4DImode);
47191 op0 = gen_lowpart (V4DImode, d->op0);
47192 op1 = gen_lowpart (V4DImode, d->op1);
47194 = GEN_INT ((d->perm[0] / (nelt / 2))
47195 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47196 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47197 if (target != d->target)
47198 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47206 if (GET_MODE_SIZE (d->vmode) == 16)
47211 else if (GET_MODE_SIZE (d->vmode) == 32)
47216 /* V4DImode should be already handled through
47217 expand_vselect by vpermq instruction. */
47218 gcc_assert (d->vmode != V4DImode);
47221 if (d->vmode == V8SImode
47222 || d->vmode == V16HImode
47223 || d->vmode == V32QImode)
47225 /* First see if vpermq can be used for
47226 V8SImode/V16HImode/V32QImode. */
47227 if (valid_perm_using_mode_p (V4DImode, d))
47229 for (i = 0; i < 4; i++)
47230 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47233 target = gen_reg_rtx (V4DImode);
47234 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47237 emit_move_insn (d->target,
47238 gen_lowpart (d->vmode, target));
47244 /* Next see if vpermd can be used. */
47245 if (valid_perm_using_mode_p (V8SImode, d))
47248 /* Or if vpermps can be used. */
47249 else if (d->vmode == V8SFmode)
47252 if (vmode == V32QImode)
47254 /* vpshufb only works intra lanes, it is not
47255 possible to shuffle bytes in between the lanes. */
47256 for (i = 0; i < nelt; ++i)
47257 if ((d->perm[i] ^ i) & (nelt / 2))
47261 else if (GET_MODE_SIZE (d->vmode) == 64)
47263 if (!TARGET_AVX512BW)
47266 /* If vpermq didn't work, vpshufb won't work either. */
47267 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47271 if (d->vmode == V16SImode
47272 || d->vmode == V32HImode
47273 || d->vmode == V64QImode)
47275 /* First see if vpermq can be used for
47276 V16SImode/V32HImode/V64QImode. */
47277 if (valid_perm_using_mode_p (V8DImode, d))
47279 for (i = 0; i < 8; i++)
47280 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47283 target = gen_reg_rtx (V8DImode);
47284 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47287 emit_move_insn (d->target,
47288 gen_lowpart (d->vmode, target));
47294 /* Next see if vpermd can be used. */
47295 if (valid_perm_using_mode_p (V16SImode, d))
47298 /* Or if vpermps can be used. */
47299 else if (d->vmode == V16SFmode)
47301 if (vmode == V64QImode)
47303 /* vpshufb only works intra lanes, it is not
47304 possible to shuffle bytes in between the lanes. */
47305 for (i = 0; i < nelt; ++i)
47306 if ((d->perm[i] ^ i) & (nelt / 4))
47317 if (vmode == V8SImode)
47318 for (i = 0; i < 8; ++i)
47319 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47320 else if (vmode == V16SImode)
47321 for (i = 0; i < 16; ++i)
47322 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47325 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47326 if (!d->one_operand_p)
47327 mask = 2 * nelt - 1;
47328 else if (vmode == V16QImode)
47330 else if (vmode == V64QImode)
47331 mask = nelt / 4 - 1;
47333 mask = nelt / 2 - 1;
47335 for (i = 0; i < nelt; ++i)
47337 unsigned j, e = d->perm[i] & mask;
47338 for (j = 0; j < eltsz; ++j)
47339 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47343 vperm = gen_rtx_CONST_VECTOR (vmode,
47344 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47345 vperm = force_reg (vmode, vperm);
47347 target = d->target;
47348 if (d->vmode != vmode)
47349 target = gen_reg_rtx (vmode);
47350 op0 = gen_lowpart (vmode, d->op0);
47351 if (d->one_operand_p)
47353 if (vmode == V16QImode)
47354 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47355 else if (vmode == V32QImode)
47356 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47357 else if (vmode == V64QImode)
47358 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47359 else if (vmode == V8SFmode)
47360 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47361 else if (vmode == V8SImode)
47362 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47363 else if (vmode == V16SFmode)
47364 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47365 else if (vmode == V16SImode)
47366 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47368 gcc_unreachable ();
47372 op1 = gen_lowpart (vmode, d->op1);
47373 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47375 if (target != d->target)
47376 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47381 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47382 in a single instruction. */
47385 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47387 unsigned i, nelt = d->nelt;
47388 unsigned char perm2[MAX_VECT_LEN];
47390 /* Check plain VEC_SELECT first, because AVX has instructions that could
47391 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47392 input where SEL+CONCAT may not. */
47393 if (d->one_operand_p)
47395 int mask = nelt - 1;
47396 bool identity_perm = true;
47397 bool broadcast_perm = true;
47399 for (i = 0; i < nelt; i++)
47401 perm2[i] = d->perm[i] & mask;
47403 identity_perm = false;
47405 broadcast_perm = false;
47411 emit_move_insn (d->target, d->op0);
47414 else if (broadcast_perm && TARGET_AVX2)
47416 /* Use vpbroadcast{b,w,d}. */
47417 rtx (*gen) (rtx, rtx) = NULL;
47421 if (TARGET_AVX512BW)
47422 gen = gen_avx512bw_vec_dupv64qi_1;
47425 gen = gen_avx2_pbroadcastv32qi_1;
47428 if (TARGET_AVX512BW)
47429 gen = gen_avx512bw_vec_dupv32hi_1;
47432 gen = gen_avx2_pbroadcastv16hi_1;
47435 if (TARGET_AVX512F)
47436 gen = gen_avx512f_vec_dupv16si_1;
47439 gen = gen_avx2_pbroadcastv8si_1;
47442 gen = gen_avx2_pbroadcastv16qi;
47445 gen = gen_avx2_pbroadcastv8hi;
47448 if (TARGET_AVX512F)
47449 gen = gen_avx512f_vec_dupv16sf_1;
47452 gen = gen_avx2_vec_dupv8sf_1;
47455 if (TARGET_AVX512F)
47456 gen = gen_avx512f_vec_dupv8df_1;
47459 if (TARGET_AVX512F)
47460 gen = gen_avx512f_vec_dupv8di_1;
47462 /* For other modes prefer other shuffles this function creates. */
47468 emit_insn (gen (d->target, d->op0));
47473 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47476 /* There are plenty of patterns in sse.md that are written for
47477 SEL+CONCAT and are not replicated for a single op. Perhaps
47478 that should be changed, to avoid the nastiness here. */
47480 /* Recognize interleave style patterns, which means incrementing
47481 every other permutation operand. */
47482 for (i = 0; i < nelt; i += 2)
47484 perm2[i] = d->perm[i] & mask;
47485 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47487 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47491 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47494 for (i = 0; i < nelt; i += 4)
47496 perm2[i + 0] = d->perm[i + 0] & mask;
47497 perm2[i + 1] = d->perm[i + 1] & mask;
47498 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47499 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47502 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47508 /* Finally, try the fully general two operand permute. */
47509 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47513 /* Recognize interleave style patterns with reversed operands. */
47514 if (!d->one_operand_p)
47516 for (i = 0; i < nelt; ++i)
47518 unsigned e = d->perm[i];
47526 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47531 /* Try the SSE4.1 blend variable merge instructions. */
47532 if (expand_vec_perm_blend (d))
47535 /* Try one of the AVX vpermil variable permutations. */
47536 if (expand_vec_perm_vpermil (d))
47539 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47540 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47541 if (expand_vec_perm_pshufb (d))
47544 /* Try the AVX2 vpalignr instruction. */
47545 if (expand_vec_perm_palignr (d, true))
47548 /* Try the AVX512F vpermi2 instructions. */
47549 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47555 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47556 in terms of a pair of pshuflw + pshufhw instructions. */
47559 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47561 unsigned char perm2[MAX_VECT_LEN];
47565 if (d->vmode != V8HImode || !d->one_operand_p)
47568 /* The two permutations only operate in 64-bit lanes. */
47569 for (i = 0; i < 4; ++i)
47570 if (d->perm[i] >= 4)
47572 for (i = 4; i < 8; ++i)
47573 if (d->perm[i] < 4)
47579 /* Emit the pshuflw. */
47580 memcpy (perm2, d->perm, 4);
47581 for (i = 4; i < 8; ++i)
47583 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47586 /* Emit the pshufhw. */
47587 memcpy (perm2 + 4, d->perm + 4, 4);
47588 for (i = 0; i < 4; ++i)
47590 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47596 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47597 the permutation using the SSSE3 palignr instruction. This succeeds
47598 when all of the elements in PERM fit within one vector and we merely
47599 need to shift them down so that a single vector permutation has a
47600 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47601 the vpalignr instruction itself can perform the requested permutation. */
47604 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47606 unsigned i, nelt = d->nelt;
47607 unsigned min, max, minswap, maxswap;
47608 bool in_order, ok, swap = false;
47610 struct expand_vec_perm_d dcopy;
47612 /* Even with AVX, palignr only operates on 128-bit vectors,
47613 in AVX2 palignr operates on both 128-bit lanes. */
47614 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47615 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47620 minswap = 2 * nelt;
47622 for (i = 0; i < nelt; ++i)
47624 unsigned e = d->perm[i];
47625 unsigned eswap = d->perm[i] ^ nelt;
47626 if (GET_MODE_SIZE (d->vmode) == 32)
47628 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47629 eswap = e ^ (nelt / 2);
47635 if (eswap < minswap)
47637 if (eswap > maxswap)
47641 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47643 if (d->one_operand_p
47645 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47646 ? nelt / 2 : nelt))
47653 /* Given that we have SSSE3, we know we'll be able to implement the
47654 single operand permutation after the palignr with pshufb for
47655 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47657 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47663 dcopy.op0 = d->op1;
47664 dcopy.op1 = d->op0;
47665 for (i = 0; i < nelt; ++i)
47666 dcopy.perm[i] ^= nelt;
47670 for (i = 0; i < nelt; ++i)
47672 unsigned e = dcopy.perm[i];
47673 if (GET_MODE_SIZE (d->vmode) == 32
47675 && (e & (nelt / 2 - 1)) < min)
47676 e = e - min - (nelt / 2);
47683 dcopy.one_operand_p = true;
47685 if (single_insn_only_p && !in_order)
47688 /* For AVX2, test whether we can permute the result in one instruction. */
47693 dcopy.op1 = dcopy.op0;
47694 return expand_vec_perm_1 (&dcopy);
47697 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47698 if (GET_MODE_SIZE (d->vmode) == 16)
47700 target = gen_reg_rtx (TImode);
47701 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47702 gen_lowpart (TImode, dcopy.op0), shift));
47706 target = gen_reg_rtx (V2TImode);
47707 emit_insn (gen_avx2_palignrv2ti (target,
47708 gen_lowpart (V2TImode, dcopy.op1),
47709 gen_lowpart (V2TImode, dcopy.op0),
47713 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47715 /* Test for the degenerate case where the alignment by itself
47716 produces the desired permutation. */
47719 emit_move_insn (d->target, dcopy.op0);
47723 ok = expand_vec_perm_1 (&dcopy);
47724 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47729 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47730 the permutation using the SSE4_1 pblendv instruction. Potentially
47731 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47734 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47736 unsigned i, which, nelt = d->nelt;
47737 struct expand_vec_perm_d dcopy, dcopy1;
47738 machine_mode vmode = d->vmode;
47741 /* Use the same checks as in expand_vec_perm_blend. */
47742 if (d->one_operand_p)
47744 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47746 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47748 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47753 /* Figure out where permutation elements stay not in their
47754 respective lanes. */
47755 for (i = 0, which = 0; i < nelt; ++i)
47757 unsigned e = d->perm[i];
47759 which |= (e < nelt ? 1 : 2);
47761 /* We can pblend the part where elements stay not in their
47762 respective lanes only when these elements are all in one
47763 half of a permutation.
47764 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47765 lanes, but both 8 and 9 >= 8
47766 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47767 respective lanes and 8 >= 8, but 2 not. */
47768 if (which != 1 && which != 2)
47770 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47773 /* First we apply one operand permutation to the part where
47774 elements stay not in their respective lanes. */
47777 dcopy.op0 = dcopy.op1 = d->op1;
47779 dcopy.op0 = dcopy.op1 = d->op0;
47781 dcopy.target = gen_reg_rtx (vmode);
47782 dcopy.one_operand_p = true;
47784 for (i = 0; i < nelt; ++i)
47785 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47787 ok = expand_vec_perm_1 (&dcopy);
47788 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47795 /* Next we put permuted elements into their positions. */
47798 dcopy1.op1 = dcopy.target;
47800 dcopy1.op0 = dcopy.target;
47802 for (i = 0; i < nelt; ++i)
47803 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47805 ok = expand_vec_perm_blend (&dcopy1);
47811 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47813 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47814 a two vector permutation into a single vector permutation by using
47815 an interleave operation to merge the vectors. */
47818 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47820 struct expand_vec_perm_d dremap, dfinal;
47821 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47822 unsigned HOST_WIDE_INT contents;
47823 unsigned char remap[2 * MAX_VECT_LEN];
47825 bool ok, same_halves = false;
47827 if (GET_MODE_SIZE (d->vmode) == 16)
47829 if (d->one_operand_p)
47832 else if (GET_MODE_SIZE (d->vmode) == 32)
47836 /* For 32-byte modes allow even d->one_operand_p.
47837 The lack of cross-lane shuffling in some instructions
47838 might prevent a single insn shuffle. */
47840 dfinal.testing_p = true;
47841 /* If expand_vec_perm_interleave3 can expand this into
47842 a 3 insn sequence, give up and let it be expanded as
47843 3 insn sequence. While that is one insn longer,
47844 it doesn't need a memory operand and in the common
47845 case that both interleave low and high permutations
47846 with the same operands are adjacent needs 4 insns
47847 for both after CSE. */
47848 if (expand_vec_perm_interleave3 (&dfinal))
47854 /* Examine from whence the elements come. */
47856 for (i = 0; i < nelt; ++i)
47857 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47859 memset (remap, 0xff, sizeof (remap));
47862 if (GET_MODE_SIZE (d->vmode) == 16)
47864 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47866 /* Split the two input vectors into 4 halves. */
47867 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47872 /* If the elements from the low halves use interleave low, and similarly
47873 for interleave high. If the elements are from mis-matched halves, we
47874 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47875 if ((contents & (h1 | h3)) == contents)
47878 for (i = 0; i < nelt2; ++i)
47881 remap[i + nelt] = i * 2 + 1;
47882 dremap.perm[i * 2] = i;
47883 dremap.perm[i * 2 + 1] = i + nelt;
47885 if (!TARGET_SSE2 && d->vmode == V4SImode)
47886 dremap.vmode = V4SFmode;
47888 else if ((contents & (h2 | h4)) == contents)
47891 for (i = 0; i < nelt2; ++i)
47893 remap[i + nelt2] = i * 2;
47894 remap[i + nelt + nelt2] = i * 2 + 1;
47895 dremap.perm[i * 2] = i + nelt2;
47896 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47898 if (!TARGET_SSE2 && d->vmode == V4SImode)
47899 dremap.vmode = V4SFmode;
47901 else if ((contents & (h1 | h4)) == contents)
47904 for (i = 0; i < nelt2; ++i)
47907 remap[i + nelt + nelt2] = i + nelt2;
47908 dremap.perm[i] = i;
47909 dremap.perm[i + nelt2] = i + nelt + nelt2;
47914 dremap.vmode = V2DImode;
47916 dremap.perm[0] = 0;
47917 dremap.perm[1] = 3;
47920 else if ((contents & (h2 | h3)) == contents)
47923 for (i = 0; i < nelt2; ++i)
47925 remap[i + nelt2] = i;
47926 remap[i + nelt] = i + nelt2;
47927 dremap.perm[i] = i + nelt2;
47928 dremap.perm[i + nelt2] = i + nelt;
47933 dremap.vmode = V2DImode;
47935 dremap.perm[0] = 1;
47936 dremap.perm[1] = 2;
47944 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47945 unsigned HOST_WIDE_INT q[8];
47946 unsigned int nonzero_halves[4];
47948 /* Split the two input vectors into 8 quarters. */
47949 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47950 for (i = 1; i < 8; ++i)
47951 q[i] = q[0] << (nelt4 * i);
47952 for (i = 0; i < 4; ++i)
47953 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47955 nonzero_halves[nzcnt] = i;
47961 gcc_assert (d->one_operand_p);
47962 nonzero_halves[1] = nonzero_halves[0];
47963 same_halves = true;
47965 else if (d->one_operand_p)
47967 gcc_assert (nonzero_halves[0] == 0);
47968 gcc_assert (nonzero_halves[1] == 1);
47973 if (d->perm[0] / nelt2 == nonzero_halves[1])
47975 /* Attempt to increase the likelihood that dfinal
47976 shuffle will be intra-lane. */
47977 char tmph = nonzero_halves[0];
47978 nonzero_halves[0] = nonzero_halves[1];
47979 nonzero_halves[1] = tmph;
47982 /* vperm2f128 or vperm2i128. */
47983 for (i = 0; i < nelt2; ++i)
47985 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47986 remap[i + nonzero_halves[0] * nelt2] = i;
47987 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47988 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47991 if (d->vmode != V8SFmode
47992 && d->vmode != V4DFmode
47993 && d->vmode != V8SImode)
47995 dremap.vmode = V8SImode;
47997 for (i = 0; i < 4; ++i)
47999 dremap.perm[i] = i + nonzero_halves[0] * 4;
48000 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48004 else if (d->one_operand_p)
48006 else if (TARGET_AVX2
48007 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48010 for (i = 0; i < nelt4; ++i)
48013 remap[i + nelt] = i * 2 + 1;
48014 remap[i + nelt2] = i * 2 + nelt2;
48015 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48016 dremap.perm[i * 2] = i;
48017 dremap.perm[i * 2 + 1] = i + nelt;
48018 dremap.perm[i * 2 + nelt2] = i + nelt2;
48019 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48022 else if (TARGET_AVX2
48023 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48026 for (i = 0; i < nelt4; ++i)
48028 remap[i + nelt4] = i * 2;
48029 remap[i + nelt + nelt4] = i * 2 + 1;
48030 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48031 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48032 dremap.perm[i * 2] = i + nelt4;
48033 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48034 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48035 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48042 /* Use the remapping array set up above to move the elements from their
48043 swizzled locations into their final destinations. */
48045 for (i = 0; i < nelt; ++i)
48047 unsigned e = remap[d->perm[i]];
48048 gcc_assert (e < nelt);
48049 /* If same_halves is true, both halves of the remapped vector are the
48050 same. Avoid cross-lane accesses if possible. */
48051 if (same_halves && i >= nelt2)
48053 gcc_assert (e < nelt2);
48054 dfinal.perm[i] = e + nelt2;
48057 dfinal.perm[i] = e;
48061 dremap.target = gen_reg_rtx (dremap.vmode);
48062 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48064 dfinal.op1 = dfinal.op0;
48065 dfinal.one_operand_p = true;
48067 /* Test if the final remap can be done with a single insn. For V4SFmode or
48068 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48070 ok = expand_vec_perm_1 (&dfinal);
48071 seq = get_insns ();
48080 if (dremap.vmode != dfinal.vmode)
48082 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48083 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48086 ok = expand_vec_perm_1 (&dremap);
48093 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48094 a single vector cross-lane permutation into vpermq followed
48095 by any of the single insn permutations. */
48098 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48100 struct expand_vec_perm_d dremap, dfinal;
48101 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48102 unsigned contents[2];
48106 && (d->vmode == V32QImode || d->vmode == V16HImode)
48107 && d->one_operand_p))
48112 for (i = 0; i < nelt2; ++i)
48114 contents[0] |= 1u << (d->perm[i] / nelt4);
48115 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48118 for (i = 0; i < 2; ++i)
48120 unsigned int cnt = 0;
48121 for (j = 0; j < 4; ++j)
48122 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48130 dremap.vmode = V4DImode;
48132 dremap.target = gen_reg_rtx (V4DImode);
48133 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48134 dremap.op1 = dremap.op0;
48135 dremap.one_operand_p = true;
48136 for (i = 0; i < 2; ++i)
48138 unsigned int cnt = 0;
48139 for (j = 0; j < 4; ++j)
48140 if ((contents[i] & (1u << j)) != 0)
48141 dremap.perm[2 * i + cnt++] = j;
48142 for (; cnt < 2; ++cnt)
48143 dremap.perm[2 * i + cnt] = 0;
48147 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48148 dfinal.op1 = dfinal.op0;
48149 dfinal.one_operand_p = true;
48150 for (i = 0, j = 0; i < nelt; ++i)
48154 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48155 if ((d->perm[i] / nelt4) == dremap.perm[j])
48157 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48158 dfinal.perm[i] |= nelt4;
48160 gcc_unreachable ();
48163 ok = expand_vec_perm_1 (&dremap);
48166 ok = expand_vec_perm_1 (&dfinal);
48172 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48173 a vector permutation using two instructions, vperm2f128 resp.
48174 vperm2i128 followed by any single in-lane permutation. */
48177 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48179 struct expand_vec_perm_d dfirst, dsecond;
48180 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48184 || GET_MODE_SIZE (d->vmode) != 32
48185 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48189 dsecond.one_operand_p = false;
48190 dsecond.testing_p = true;
48192 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48193 immediate. For perm < 16 the second permutation uses
48194 d->op0 as first operand, for perm >= 16 it uses d->op1
48195 as first operand. The second operand is the result of
48197 for (perm = 0; perm < 32; perm++)
48199 /* Ignore permutations which do not move anything cross-lane. */
48202 /* The second shuffle for e.g. V4DFmode has
48203 0123 and ABCD operands.
48204 Ignore AB23, as 23 is already in the second lane
48205 of the first operand. */
48206 if ((perm & 0xc) == (1 << 2)) continue;
48207 /* And 01CD, as 01 is in the first lane of the first
48209 if ((perm & 3) == 0) continue;
48210 /* And 4567, as then the vperm2[fi]128 doesn't change
48211 anything on the original 4567 second operand. */
48212 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48216 /* The second shuffle for e.g. V4DFmode has
48217 4567 and ABCD operands.
48218 Ignore AB67, as 67 is already in the second lane
48219 of the first operand. */
48220 if ((perm & 0xc) == (3 << 2)) continue;
48221 /* And 45CD, as 45 is in the first lane of the first
48223 if ((perm & 3) == 2) continue;
48224 /* And 0123, as then the vperm2[fi]128 doesn't change
48225 anything on the original 0123 first operand. */
48226 if ((perm & 0xf) == (1 << 2)) continue;
48229 for (i = 0; i < nelt; i++)
48231 j = d->perm[i] / nelt2;
48232 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48233 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48234 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48235 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48243 ok = expand_vec_perm_1 (&dsecond);
48254 /* Found a usable second shuffle. dfirst will be
48255 vperm2f128 on d->op0 and d->op1. */
48256 dsecond.testing_p = false;
48258 dfirst.target = gen_reg_rtx (d->vmode);
48259 for (i = 0; i < nelt; i++)
48260 dfirst.perm[i] = (i & (nelt2 - 1))
48261 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48263 canonicalize_perm (&dfirst);
48264 ok = expand_vec_perm_1 (&dfirst);
48267 /* And dsecond is some single insn shuffle, taking
48268 d->op0 and result of vperm2f128 (if perm < 16) or
48269 d->op1 and result of vperm2f128 (otherwise). */
48271 dsecond.op0 = dsecond.op1;
48272 dsecond.op1 = dfirst.target;
48274 ok = expand_vec_perm_1 (&dsecond);
48280 /* For one operand, the only useful vperm2f128 permutation is 0x01
48282 if (d->one_operand_p)
48289 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48290 a two vector permutation using 2 intra-lane interleave insns
48291 and cross-lane shuffle for 32-byte vectors. */
48294 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48297 rtx (*gen) (rtx, rtx, rtx);
48299 if (d->one_operand_p)
48301 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48303 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48309 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48311 for (i = 0; i < nelt; i += 2)
48312 if (d->perm[i] != d->perm[0] + i / 2
48313 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48323 gen = gen_vec_interleave_highv32qi;
48325 gen = gen_vec_interleave_lowv32qi;
48329 gen = gen_vec_interleave_highv16hi;
48331 gen = gen_vec_interleave_lowv16hi;
48335 gen = gen_vec_interleave_highv8si;
48337 gen = gen_vec_interleave_lowv8si;
48341 gen = gen_vec_interleave_highv4di;
48343 gen = gen_vec_interleave_lowv4di;
48347 gen = gen_vec_interleave_highv8sf;
48349 gen = gen_vec_interleave_lowv8sf;
48353 gen = gen_vec_interleave_highv4df;
48355 gen = gen_vec_interleave_lowv4df;
48358 gcc_unreachable ();
48361 emit_insn (gen (d->target, d->op0, d->op1));
48365 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48366 a single vector permutation using a single intra-lane vector
48367 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48368 the non-swapped and swapped vectors together. */
48371 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48373 struct expand_vec_perm_d dfirst, dsecond;
48374 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48377 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48381 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48382 || !d->one_operand_p)
48386 for (i = 0; i < nelt; i++)
48387 dfirst.perm[i] = 0xff;
48388 for (i = 0, msk = 0; i < nelt; i++)
48390 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48391 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48393 dfirst.perm[j] = d->perm[i];
48397 for (i = 0; i < nelt; i++)
48398 if (dfirst.perm[i] == 0xff)
48399 dfirst.perm[i] = i;
48402 dfirst.target = gen_reg_rtx (dfirst.vmode);
48405 ok = expand_vec_perm_1 (&dfirst);
48406 seq = get_insns ();
48418 dsecond.op0 = dfirst.target;
48419 dsecond.op1 = dfirst.target;
48420 dsecond.one_operand_p = true;
48421 dsecond.target = gen_reg_rtx (dsecond.vmode);
48422 for (i = 0; i < nelt; i++)
48423 dsecond.perm[i] = i ^ nelt2;
48425 ok = expand_vec_perm_1 (&dsecond);
48428 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48429 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48433 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48434 permutation using two vperm2f128, followed by a vshufpd insn blending
48435 the two vectors together. */
48438 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48440 struct expand_vec_perm_d dfirst, dsecond, dthird;
48443 if (!TARGET_AVX || (d->vmode != V4DFmode))
48453 dfirst.perm[0] = (d->perm[0] & ~1);
48454 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48455 dfirst.perm[2] = (d->perm[2] & ~1);
48456 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48457 dsecond.perm[0] = (d->perm[1] & ~1);
48458 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48459 dsecond.perm[2] = (d->perm[3] & ~1);
48460 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48461 dthird.perm[0] = (d->perm[0] % 2);
48462 dthird.perm[1] = (d->perm[1] % 2) + 4;
48463 dthird.perm[2] = (d->perm[2] % 2) + 2;
48464 dthird.perm[3] = (d->perm[3] % 2) + 6;
48466 dfirst.target = gen_reg_rtx (dfirst.vmode);
48467 dsecond.target = gen_reg_rtx (dsecond.vmode);
48468 dthird.op0 = dfirst.target;
48469 dthird.op1 = dsecond.target;
48470 dthird.one_operand_p = false;
48472 canonicalize_perm (&dfirst);
48473 canonicalize_perm (&dsecond);
48475 ok = expand_vec_perm_1 (&dfirst)
48476 && expand_vec_perm_1 (&dsecond)
48477 && expand_vec_perm_1 (&dthird);
48484 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48485 permutation with two pshufb insns and an ior. We should have already
48486 failed all two instruction sequences. */
48489 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48491 rtx rperm[2][16], vperm, l, h, op, m128;
48492 unsigned int i, nelt, eltsz;
48494 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48496 gcc_assert (!d->one_operand_p);
48502 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48504 /* Generate two permutation masks. If the required element is within
48505 the given vector it is shuffled into the proper lane. If the required
48506 element is in the other vector, force a zero into the lane by setting
48507 bit 7 in the permutation mask. */
48508 m128 = GEN_INT (-128);
48509 for (i = 0; i < nelt; ++i)
48511 unsigned j, e = d->perm[i];
48512 unsigned which = (e >= nelt);
48516 for (j = 0; j < eltsz; ++j)
48518 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48519 rperm[1-which][i*eltsz + j] = m128;
48523 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48524 vperm = force_reg (V16QImode, vperm);
48526 l = gen_reg_rtx (V16QImode);
48527 op = gen_lowpart (V16QImode, d->op0);
48528 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48530 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48531 vperm = force_reg (V16QImode, vperm);
48533 h = gen_reg_rtx (V16QImode);
48534 op = gen_lowpart (V16QImode, d->op1);
48535 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48538 if (d->vmode != V16QImode)
48539 op = gen_reg_rtx (V16QImode);
48540 emit_insn (gen_iorv16qi3 (op, l, h));
48541 if (op != d->target)
48542 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48547 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48548 with two vpshufb insns, vpermq and vpor. We should have already failed
48549 all two or three instruction sequences. */
48552 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48554 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48555 unsigned int i, nelt, eltsz;
48558 || !d->one_operand_p
48559 || (d->vmode != V32QImode && d->vmode != V16HImode))
48566 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48568 /* Generate two permutation masks. If the required element is within
48569 the same lane, it is shuffled in. If the required element from the
48570 other lane, force a zero by setting bit 7 in the permutation mask.
48571 In the other mask the mask has non-negative elements if element
48572 is requested from the other lane, but also moved to the other lane,
48573 so that the result of vpshufb can have the two V2TImode halves
48575 m128 = GEN_INT (-128);
48576 for (i = 0; i < nelt; ++i)
48578 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48579 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48581 for (j = 0; j < eltsz; ++j)
48583 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48584 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48588 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48589 vperm = force_reg (V32QImode, vperm);
48591 h = gen_reg_rtx (V32QImode);
48592 op = gen_lowpart (V32QImode, d->op0);
48593 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48595 /* Swap the 128-byte lanes of h into hp. */
48596 hp = gen_reg_rtx (V4DImode);
48597 op = gen_lowpart (V4DImode, h);
48598 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48601 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48602 vperm = force_reg (V32QImode, vperm);
48604 l = gen_reg_rtx (V32QImode);
48605 op = gen_lowpart (V32QImode, d->op0);
48606 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48609 if (d->vmode != V32QImode)
48610 op = gen_reg_rtx (V32QImode);
48611 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48612 if (op != d->target)
48613 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48618 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48619 and extract-odd permutations of two V32QImode and V16QImode operand
48620 with two vpshufb insns, vpor and vpermq. We should have already
48621 failed all two or three instruction sequences. */
48624 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48626 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48627 unsigned int i, nelt, eltsz;
48630 || d->one_operand_p
48631 || (d->vmode != V32QImode && d->vmode != V16HImode))
48634 for (i = 0; i < d->nelt; ++i)
48635 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48642 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48644 /* Generate two permutation masks. In the first permutation mask
48645 the first quarter will contain indexes for the first half
48646 of the op0, the second quarter will contain bit 7 set, third quarter
48647 will contain indexes for the second half of the op0 and the
48648 last quarter bit 7 set. In the second permutation mask
48649 the first quarter will contain bit 7 set, the second quarter
48650 indexes for the first half of the op1, the third quarter bit 7 set
48651 and last quarter indexes for the second half of the op1.
48652 I.e. the first mask e.g. for V32QImode extract even will be:
48653 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48654 (all values masked with 0xf except for -128) and second mask
48655 for extract even will be
48656 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48657 m128 = GEN_INT (-128);
48658 for (i = 0; i < nelt; ++i)
48660 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48661 unsigned which = d->perm[i] >= nelt;
48662 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48664 for (j = 0; j < eltsz; ++j)
48666 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48667 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48671 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48672 vperm = force_reg (V32QImode, vperm);
48674 l = gen_reg_rtx (V32QImode);
48675 op = gen_lowpart (V32QImode, d->op0);
48676 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48678 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48679 vperm = force_reg (V32QImode, vperm);
48681 h = gen_reg_rtx (V32QImode);
48682 op = gen_lowpart (V32QImode, d->op1);
48683 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48685 ior = gen_reg_rtx (V32QImode);
48686 emit_insn (gen_iorv32qi3 (ior, l, h));
48688 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48689 op = gen_reg_rtx (V4DImode);
48690 ior = gen_lowpart (V4DImode, ior);
48691 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48692 const1_rtx, GEN_INT (3)));
48693 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48698 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48699 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48700 with two "and" and "pack" or two "shift" and "pack" insns. We should
48701 have already failed all two instruction sequences. */
48704 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48706 rtx op, dop0, dop1, t, rperm[16];
48707 unsigned i, odd, c, s, nelt = d->nelt;
48708 bool end_perm = false;
48709 machine_mode half_mode;
48710 rtx (*gen_and) (rtx, rtx, rtx);
48711 rtx (*gen_pack) (rtx, rtx, rtx);
48712 rtx (*gen_shift) (rtx, rtx, rtx);
48714 if (d->one_operand_p)
48720 /* Required for "pack". */
48721 if (!TARGET_SSE4_1)
48725 half_mode = V4SImode;
48726 gen_and = gen_andv4si3;
48727 gen_pack = gen_sse4_1_packusdw;
48728 gen_shift = gen_lshrv4si3;
48731 /* No check as all instructions are SSE2. */
48734 half_mode = V8HImode;
48735 gen_and = gen_andv8hi3;
48736 gen_pack = gen_sse2_packuswb;
48737 gen_shift = gen_lshrv8hi3;
48744 half_mode = V8SImode;
48745 gen_and = gen_andv8si3;
48746 gen_pack = gen_avx2_packusdw;
48747 gen_shift = gen_lshrv8si3;
48755 half_mode = V16HImode;
48756 gen_and = gen_andv16hi3;
48757 gen_pack = gen_avx2_packuswb;
48758 gen_shift = gen_lshrv16hi3;
48762 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48763 general shuffles. */
48767 /* Check that permutation is even or odd. */
48772 for (i = 1; i < nelt; ++i)
48773 if (d->perm[i] != 2 * i + odd)
48779 dop0 = gen_reg_rtx (half_mode);
48780 dop1 = gen_reg_rtx (half_mode);
48783 for (i = 0; i < nelt / 2; i++)
48784 rperm[i] = GEN_INT (c);
48785 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48786 t = force_reg (half_mode, t);
48787 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48788 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48792 emit_insn (gen_shift (dop0,
48793 gen_lowpart (half_mode, d->op0),
48795 emit_insn (gen_shift (dop1,
48796 gen_lowpart (half_mode, d->op1),
48799 /* In AVX2 for 256 bit case we need to permute pack result. */
48800 if (TARGET_AVX2 && end_perm)
48802 op = gen_reg_rtx (d->vmode);
48803 t = gen_reg_rtx (V4DImode);
48804 emit_insn (gen_pack (op, dop0, dop1));
48805 emit_insn (gen_avx2_permv4di_1 (t,
48806 gen_lowpart (V4DImode, op),
48811 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48814 emit_insn (gen_pack (d->target, dop0, dop1));
48819 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48820 and extract-odd permutations. */
48823 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48825 rtx t1, t2, t3, t4, t5;
48832 t1 = gen_reg_rtx (V4DFmode);
48833 t2 = gen_reg_rtx (V4DFmode);
48835 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48836 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48837 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48839 /* Now an unpck[lh]pd will produce the result required. */
48841 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48843 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48849 int mask = odd ? 0xdd : 0x88;
48853 t1 = gen_reg_rtx (V8SFmode);
48854 t2 = gen_reg_rtx (V8SFmode);
48855 t3 = gen_reg_rtx (V8SFmode);
48857 /* Shuffle within the 128-bit lanes to produce:
48858 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48859 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48862 /* Shuffle the lanes around to produce:
48863 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48864 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48867 /* Shuffle within the 128-bit lanes to produce:
48868 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48869 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48871 /* Shuffle within the 128-bit lanes to produce:
48872 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48873 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48875 /* Shuffle the lanes around to produce:
48876 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48877 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48886 /* These are always directly implementable by expand_vec_perm_1. */
48887 gcc_unreachable ();
48891 return expand_vec_perm_even_odd_pack (d);
48892 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48893 return expand_vec_perm_pshufb2 (d);
48898 /* We need 2*log2(N)-1 operations to achieve odd/even
48899 with interleave. */
48900 t1 = gen_reg_rtx (V8HImode);
48901 t2 = gen_reg_rtx (V8HImode);
48902 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48903 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48904 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48905 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48907 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48909 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48915 return expand_vec_perm_even_odd_pack (d);
48919 return expand_vec_perm_even_odd_pack (d);
48924 struct expand_vec_perm_d d_copy = *d;
48925 d_copy.vmode = V4DFmode;
48927 d_copy.target = gen_lowpart (V4DFmode, d->target);
48929 d_copy.target = gen_reg_rtx (V4DFmode);
48930 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48931 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48932 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48935 emit_move_insn (d->target,
48936 gen_lowpart (V4DImode, d_copy.target));
48945 t1 = gen_reg_rtx (V4DImode);
48946 t2 = gen_reg_rtx (V4DImode);
48948 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48949 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48950 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48952 /* Now an vpunpck[lh]qdq will produce the result required. */
48954 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48956 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48963 struct expand_vec_perm_d d_copy = *d;
48964 d_copy.vmode = V8SFmode;
48966 d_copy.target = gen_lowpart (V8SFmode, d->target);
48968 d_copy.target = gen_reg_rtx (V8SFmode);
48969 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48970 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48971 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48974 emit_move_insn (d->target,
48975 gen_lowpart (V8SImode, d_copy.target));
48984 t1 = gen_reg_rtx (V8SImode);
48985 t2 = gen_reg_rtx (V8SImode);
48986 t3 = gen_reg_rtx (V4DImode);
48987 t4 = gen_reg_rtx (V4DImode);
48988 t5 = gen_reg_rtx (V4DImode);
48990 /* Shuffle the lanes around into
48991 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48992 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48993 gen_lowpart (V4DImode, d->op1),
48995 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48996 gen_lowpart (V4DImode, d->op1),
48999 /* Swap the 2nd and 3rd position in each lane into
49000 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49001 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49002 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49003 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49004 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49006 /* Now an vpunpck[lh]qdq will produce
49007 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49009 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49010 gen_lowpart (V4DImode, t2));
49012 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49013 gen_lowpart (V4DImode, t2));
49015 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49019 gcc_unreachable ();
49025 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49026 extract-even and extract-odd permutations. */
49029 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49031 unsigned i, odd, nelt = d->nelt;
49034 if (odd != 0 && odd != 1)
49037 for (i = 1; i < nelt; ++i)
49038 if (d->perm[i] != 2 * i + odd)
49041 return expand_vec_perm_even_odd_1 (d, odd);
49044 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49045 permutations. We assume that expand_vec_perm_1 has already failed. */
49048 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49050 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49051 machine_mode vmode = d->vmode;
49052 unsigned char perm2[4];
49053 rtx op0 = d->op0, dest;
49060 /* These are special-cased in sse.md so that we can optionally
49061 use the vbroadcast instruction. They expand to two insns
49062 if the input happens to be in a register. */
49063 gcc_unreachable ();
49069 /* These are always implementable using standard shuffle patterns. */
49070 gcc_unreachable ();
49074 /* These can be implemented via interleave. We save one insn by
49075 stopping once we have promoted to V4SImode and then use pshufd. */
49081 rtx (*gen) (rtx, rtx, rtx)
49082 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49083 : gen_vec_interleave_lowv8hi;
49087 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49088 : gen_vec_interleave_highv8hi;
49093 dest = gen_reg_rtx (vmode);
49094 emit_insn (gen (dest, op0, op0));
49095 vmode = get_mode_wider_vector (vmode);
49096 op0 = gen_lowpart (vmode, dest);
49098 while (vmode != V4SImode);
49100 memset (perm2, elt, 4);
49101 dest = gen_reg_rtx (V4SImode);
49102 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49105 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49113 /* For AVX2 broadcasts of the first element vpbroadcast* or
49114 vpermq should be used by expand_vec_perm_1. */
49115 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49119 gcc_unreachable ();
49123 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49124 broadcast permutations. */
49127 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49129 unsigned i, elt, nelt = d->nelt;
49131 if (!d->one_operand_p)
49135 for (i = 1; i < nelt; ++i)
49136 if (d->perm[i] != elt)
49139 return expand_vec_perm_broadcast_1 (d);
49142 /* Implement arbitrary permutations of two V64QImode operands
49143 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49145 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49147 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49153 struct expand_vec_perm_d ds[2];
49154 rtx rperm[128], vperm, target0, target1;
49155 unsigned int i, nelt;
49156 machine_mode vmode;
49161 for (i = 0; i < 2; i++)
49164 ds[i].vmode = V32HImode;
49166 ds[i].target = gen_reg_rtx (V32HImode);
49167 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49168 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49171 /* Prepare permutations such that the first one takes care of
49172 putting the even bytes into the right positions or one higher
49173 positions (ds[0]) and the second one takes care of
49174 putting the odd bytes into the right positions or one below
49177 for (i = 0; i < nelt; i++)
49179 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49182 rperm[i] = constm1_rtx;
49183 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49187 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49188 rperm[i + 64] = constm1_rtx;
49192 bool ok = expand_vec_perm_1 (&ds[0]);
49194 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49196 ok = expand_vec_perm_1 (&ds[1]);
49198 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49200 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49201 vperm = force_reg (vmode, vperm);
49202 target0 = gen_reg_rtx (V64QImode);
49203 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49205 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49206 vperm = force_reg (vmode, vperm);
49207 target1 = gen_reg_rtx (V64QImode);
49208 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49210 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49214 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49215 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49216 all the shorter instruction sequences. */
49219 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49221 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49222 unsigned int i, nelt, eltsz;
49226 || d->one_operand_p
49227 || (d->vmode != V32QImode && d->vmode != V16HImode))
49234 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49236 /* Generate 4 permutation masks. If the required element is within
49237 the same lane, it is shuffled in. If the required element from the
49238 other lane, force a zero by setting bit 7 in the permutation mask.
49239 In the other mask the mask has non-negative elements if element
49240 is requested from the other lane, but also moved to the other lane,
49241 so that the result of vpshufb can have the two V2TImode halves
49243 m128 = GEN_INT (-128);
49244 for (i = 0; i < 32; ++i)
49246 rperm[0][i] = m128;
49247 rperm[1][i] = m128;
49248 rperm[2][i] = m128;
49249 rperm[3][i] = m128;
49255 for (i = 0; i < nelt; ++i)
49257 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49258 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49259 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49261 for (j = 0; j < eltsz; ++j)
49262 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49263 used[which] = true;
49266 for (i = 0; i < 2; ++i)
49268 if (!used[2 * i + 1])
49273 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49274 gen_rtvec_v (32, rperm[2 * i + 1]));
49275 vperm = force_reg (V32QImode, vperm);
49276 h[i] = gen_reg_rtx (V32QImode);
49277 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49278 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49281 /* Swap the 128-byte lanes of h[X]. */
49282 for (i = 0; i < 2; ++i)
49284 if (h[i] == NULL_RTX)
49286 op = gen_reg_rtx (V4DImode);
49287 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49288 const2_rtx, GEN_INT (3), const0_rtx,
49290 h[i] = gen_lowpart (V32QImode, op);
49293 for (i = 0; i < 2; ++i)
49300 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49301 vperm = force_reg (V32QImode, vperm);
49302 l[i] = gen_reg_rtx (V32QImode);
49303 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49304 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49307 for (i = 0; i < 2; ++i)
49311 op = gen_reg_rtx (V32QImode);
49312 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49319 gcc_assert (l[0] && l[1]);
49321 if (d->vmode != V32QImode)
49322 op = gen_reg_rtx (V32QImode);
49323 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49324 if (op != d->target)
49325 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49329 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49330 With all of the interface bits taken care of, perform the expansion
49331 in D and return true on success. */
49334 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49336 /* Try a single instruction expansion. */
49337 if (expand_vec_perm_1 (d))
49340 /* Try sequences of two instructions. */
49342 if (expand_vec_perm_pshuflw_pshufhw (d))
49345 if (expand_vec_perm_palignr (d, false))
49348 if (expand_vec_perm_interleave2 (d))
49351 if (expand_vec_perm_broadcast (d))
49354 if (expand_vec_perm_vpermq_perm_1 (d))
49357 if (expand_vec_perm_vperm2f128 (d))
49360 if (expand_vec_perm_pblendv (d))
49363 /* Try sequences of three instructions. */
49365 if (expand_vec_perm_even_odd_pack (d))
49368 if (expand_vec_perm_2vperm2f128_vshuf (d))
49371 if (expand_vec_perm_pshufb2 (d))
49374 if (expand_vec_perm_interleave3 (d))
49377 if (expand_vec_perm_vperm2f128_vblend (d))
49380 /* Try sequences of four instructions. */
49382 if (expand_vec_perm_vpshufb2_vpermq (d))
49385 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49388 if (expand_vec_perm_vpermi2_vpshub2 (d))
49391 /* ??? Look for narrow permutations whose element orderings would
49392 allow the promotion to a wider mode. */
49394 /* ??? Look for sequences of interleave or a wider permute that place
49395 the data into the correct lanes for a half-vector shuffle like
49396 pshuf[lh]w or vpermilps. */
49398 /* ??? Look for sequences of interleave that produce the desired results.
49399 The combinatorics of punpck[lh] get pretty ugly... */
49401 if (expand_vec_perm_even_odd (d))
49404 /* Even longer sequences. */
49405 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49411 /* If a permutation only uses one operand, make it clear. Returns true
49412 if the permutation references both operands. */
49415 canonicalize_perm (struct expand_vec_perm_d *d)
49417 int i, which, nelt = d->nelt;
49419 for (i = which = 0; i < nelt; ++i)
49420 which |= (d->perm[i] < nelt ? 1 : 2);
49422 d->one_operand_p = true;
49429 if (!rtx_equal_p (d->op0, d->op1))
49431 d->one_operand_p = false;
49434 /* The elements of PERM do not suggest that only the first operand
49435 is used, but both operands are identical. Allow easier matching
49436 of the permutation by folding the permutation into the single
49441 for (i = 0; i < nelt; ++i)
49442 d->perm[i] &= nelt - 1;
49451 return (which == 3);
49455 ix86_expand_vec_perm_const (rtx operands[4])
49457 struct expand_vec_perm_d d;
49458 unsigned char perm[MAX_VECT_LEN];
49463 d.target = operands[0];
49464 d.op0 = operands[1];
49465 d.op1 = operands[2];
49468 d.vmode = GET_MODE (d.target);
49469 gcc_assert (VECTOR_MODE_P (d.vmode));
49470 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49471 d.testing_p = false;
49473 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49474 gcc_assert (XVECLEN (sel, 0) == nelt);
49475 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49477 for (i = 0; i < nelt; ++i)
49479 rtx e = XVECEXP (sel, 0, i);
49480 int ei = INTVAL (e) & (2 * nelt - 1);
49485 two_args = canonicalize_perm (&d);
49487 if (ix86_expand_vec_perm_const_1 (&d))
49490 /* If the selector says both arguments are needed, but the operands are the
49491 same, the above tried to expand with one_operand_p and flattened selector.
49492 If that didn't work, retry without one_operand_p; we succeeded with that
49494 if (two_args && d.one_operand_p)
49496 d.one_operand_p = false;
49497 memcpy (d.perm, perm, sizeof (perm));
49498 return ix86_expand_vec_perm_const_1 (&d);
49504 /* Implement targetm.vectorize.vec_perm_const_ok. */
49507 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49508 const unsigned char *sel)
49510 struct expand_vec_perm_d d;
49511 unsigned int i, nelt, which;
49515 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49516 d.testing_p = true;
49518 /* Given sufficient ISA support we can just return true here
49519 for selected vector modes. */
49526 if (TARGET_AVX512F)
49527 /* All implementable with a single vpermi2 insn. */
49531 if (TARGET_AVX512BW)
49532 /* All implementable with a single vpermi2 insn. */
49536 if (TARGET_AVX512BW)
49537 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49544 if (TARGET_AVX512VL)
49545 /* All implementable with a single vpermi2 insn. */
49550 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49555 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49562 /* All implementable with a single vpperm insn. */
49565 /* All implementable with 2 pshufb + 1 ior. */
49571 /* All implementable with shufpd or unpck[lh]pd. */
49577 /* Extract the values from the vector CST into the permutation
49579 memcpy (d.perm, sel, nelt);
49580 for (i = which = 0; i < nelt; ++i)
49582 unsigned char e = d.perm[i];
49583 gcc_assert (e < 2 * nelt);
49584 which |= (e < nelt ? 1 : 2);
49587 /* For all elements from second vector, fold the elements to first. */
49589 for (i = 0; i < nelt; ++i)
49592 /* Check whether the mask can be applied to the vector type. */
49593 d.one_operand_p = (which != 3);
49595 /* Implementable with shufps or pshufd. */
49596 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49599 /* Otherwise we have to go through the motions and see if we can
49600 figure out how to generate the requested permutation. */
49601 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49602 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49603 if (!d.one_operand_p)
49604 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49607 ret = ix86_expand_vec_perm_const_1 (&d);
49614 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49616 struct expand_vec_perm_d d;
49622 d.vmode = GET_MODE (targ);
49623 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49624 d.one_operand_p = false;
49625 d.testing_p = false;
49627 for (i = 0; i < nelt; ++i)
49628 d.perm[i] = i * 2 + odd;
49630 /* We'll either be able to implement the permutation directly... */
49631 if (expand_vec_perm_1 (&d))
49634 /* ... or we use the special-case patterns. */
49635 expand_vec_perm_even_odd_1 (&d, odd);
49639 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49641 struct expand_vec_perm_d d;
49642 unsigned i, nelt, base;
49648 d.vmode = GET_MODE (targ);
49649 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49650 d.one_operand_p = false;
49651 d.testing_p = false;
49653 base = high_p ? nelt / 2 : 0;
49654 for (i = 0; i < nelt / 2; ++i)
49656 d.perm[i * 2] = i + base;
49657 d.perm[i * 2 + 1] = i + base + nelt;
49660 /* Note that for AVX this isn't one instruction. */
49661 ok = ix86_expand_vec_perm_const_1 (&d);
49666 /* Expand a vector operation CODE for a V*QImode in terms of the
49667 same operation on V*HImode. */
49670 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49672 machine_mode qimode = GET_MODE (dest);
49673 machine_mode himode;
49674 rtx (*gen_il) (rtx, rtx, rtx);
49675 rtx (*gen_ih) (rtx, rtx, rtx);
49676 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49677 struct expand_vec_perm_d d;
49678 bool ok, full_interleave;
49679 bool uns_p = false;
49686 gen_il = gen_vec_interleave_lowv16qi;
49687 gen_ih = gen_vec_interleave_highv16qi;
49690 himode = V16HImode;
49691 gen_il = gen_avx2_interleave_lowv32qi;
49692 gen_ih = gen_avx2_interleave_highv32qi;
49695 himode = V32HImode;
49696 gen_il = gen_avx512bw_interleave_lowv64qi;
49697 gen_ih = gen_avx512bw_interleave_highv64qi;
49700 gcc_unreachable ();
49703 op2_l = op2_h = op2;
49707 /* Unpack data such that we've got a source byte in each low byte of
49708 each word. We don't care what goes into the high byte of each word.
49709 Rather than trying to get zero in there, most convenient is to let
49710 it be a copy of the low byte. */
49711 op2_l = gen_reg_rtx (qimode);
49712 op2_h = gen_reg_rtx (qimode);
49713 emit_insn (gen_il (op2_l, op2, op2));
49714 emit_insn (gen_ih (op2_h, op2, op2));
49717 op1_l = gen_reg_rtx (qimode);
49718 op1_h = gen_reg_rtx (qimode);
49719 emit_insn (gen_il (op1_l, op1, op1));
49720 emit_insn (gen_ih (op1_h, op1, op1));
49721 full_interleave = qimode == V16QImode;
49729 op1_l = gen_reg_rtx (himode);
49730 op1_h = gen_reg_rtx (himode);
49731 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49732 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49733 full_interleave = true;
49736 gcc_unreachable ();
49739 /* Perform the operation. */
49740 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49742 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49744 gcc_assert (res_l && res_h);
49746 /* Merge the data back into the right place. */
49748 d.op0 = gen_lowpart (qimode, res_l);
49749 d.op1 = gen_lowpart (qimode, res_h);
49751 d.nelt = GET_MODE_NUNITS (qimode);
49752 d.one_operand_p = false;
49753 d.testing_p = false;
49755 if (full_interleave)
49757 /* For SSE2, we used an full interleave, so the desired
49758 results are in the even elements. */
49759 for (i = 0; i < 64; ++i)
49764 /* For AVX, the interleave used above was not cross-lane. So the
49765 extraction is evens but with the second and third quarter swapped.
49766 Happily, that is even one insn shorter than even extraction. */
49767 for (i = 0; i < 64; ++i)
49768 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49771 ok = ix86_expand_vec_perm_const_1 (&d);
49774 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49775 gen_rtx_fmt_ee (code, qimode, op1, op2));
49778 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49779 if op is CONST_VECTOR with all odd elements equal to their
49780 preceding element. */
49783 const_vector_equal_evenodd_p (rtx op)
49785 machine_mode mode = GET_MODE (op);
49786 int i, nunits = GET_MODE_NUNITS (mode);
49787 if (GET_CODE (op) != CONST_VECTOR
49788 || nunits != CONST_VECTOR_NUNITS (op))
49790 for (i = 0; i < nunits; i += 2)
49791 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49797 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49798 bool uns_p, bool odd_p)
49800 machine_mode mode = GET_MODE (op1);
49801 machine_mode wmode = GET_MODE (dest);
49803 rtx orig_op1 = op1, orig_op2 = op2;
49805 if (!nonimmediate_operand (op1, mode))
49806 op1 = force_reg (mode, op1);
49807 if (!nonimmediate_operand (op2, mode))
49808 op2 = force_reg (mode, op2);
49810 /* We only play even/odd games with vectors of SImode. */
49811 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49813 /* If we're looking for the odd results, shift those members down to
49814 the even slots. For some cpus this is faster than a PSHUFD. */
49817 /* For XOP use vpmacsdqh, but only for smult, as it is only
49819 if (TARGET_XOP && mode == V4SImode && !uns_p)
49821 x = force_reg (wmode, CONST0_RTX (wmode));
49822 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49826 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49827 if (!const_vector_equal_evenodd_p (orig_op1))
49828 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49829 x, NULL, 1, OPTAB_DIRECT);
49830 if (!const_vector_equal_evenodd_p (orig_op2))
49831 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49832 x, NULL, 1, OPTAB_DIRECT);
49833 op1 = gen_lowpart (mode, op1);
49834 op2 = gen_lowpart (mode, op2);
49837 if (mode == V16SImode)
49840 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49842 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49844 else if (mode == V8SImode)
49847 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49849 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49852 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49853 else if (TARGET_SSE4_1)
49854 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49857 rtx s1, s2, t0, t1, t2;
49859 /* The easiest way to implement this without PMULDQ is to go through
49860 the motions as if we are performing a full 64-bit multiply. With
49861 the exception that we need to do less shuffling of the elements. */
49863 /* Compute the sign-extension, aka highparts, of the two operands. */
49864 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49865 op1, pc_rtx, pc_rtx);
49866 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49867 op2, pc_rtx, pc_rtx);
49869 /* Multiply LO(A) * HI(B), and vice-versa. */
49870 t1 = gen_reg_rtx (wmode);
49871 t2 = gen_reg_rtx (wmode);
49872 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49873 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49875 /* Multiply LO(A) * LO(B). */
49876 t0 = gen_reg_rtx (wmode);
49877 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49879 /* Combine and shift the highparts into place. */
49880 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49881 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49884 /* Combine high and low parts. */
49885 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49892 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49893 bool uns_p, bool high_p)
49895 machine_mode wmode = GET_MODE (dest);
49896 machine_mode mode = GET_MODE (op1);
49897 rtx t1, t2, t3, t4, mask;
49902 t1 = gen_reg_rtx (mode);
49903 t2 = gen_reg_rtx (mode);
49904 if (TARGET_XOP && !uns_p)
49906 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49907 shuffle the elements once so that all elements are in the right
49908 place for immediate use: { A C B D }. */
49909 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49910 const1_rtx, GEN_INT (3)));
49911 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49912 const1_rtx, GEN_INT (3)));
49916 /* Put the elements into place for the multiply. */
49917 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49918 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49921 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49925 /* Shuffle the elements between the lanes. After this we
49926 have { A B E F | C D G H } for each operand. */
49927 t1 = gen_reg_rtx (V4DImode);
49928 t2 = gen_reg_rtx (V4DImode);
49929 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49930 const0_rtx, const2_rtx,
49931 const1_rtx, GEN_INT (3)));
49932 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49933 const0_rtx, const2_rtx,
49934 const1_rtx, GEN_INT (3)));
49936 /* Shuffle the elements within the lanes. After this we
49937 have { A A B B | C C D D } or { E E F F | G G H H }. */
49938 t3 = gen_reg_rtx (V8SImode);
49939 t4 = gen_reg_rtx (V8SImode);
49940 mask = GEN_INT (high_p
49941 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49942 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49943 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49944 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49946 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49951 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49952 uns_p, OPTAB_DIRECT);
49953 t2 = expand_binop (mode,
49954 uns_p ? umul_highpart_optab : smul_highpart_optab,
49955 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49956 gcc_assert (t1 && t2);
49958 t3 = gen_reg_rtx (mode);
49959 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49960 emit_move_insn (dest, gen_lowpart (wmode, t3));
49968 t1 = gen_reg_rtx (wmode);
49969 t2 = gen_reg_rtx (wmode);
49970 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49971 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49973 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49977 gcc_unreachable ();
49982 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49984 rtx res_1, res_2, res_3, res_4;
49986 res_1 = gen_reg_rtx (V4SImode);
49987 res_2 = gen_reg_rtx (V4SImode);
49988 res_3 = gen_reg_rtx (V2DImode);
49989 res_4 = gen_reg_rtx (V2DImode);
49990 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49991 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49993 /* Move the results in element 2 down to element 1; we don't care
49994 what goes in elements 2 and 3. Then we can merge the parts
49995 back together with an interleave.
49997 Note that two other sequences were tried:
49998 (1) Use interleaves at the start instead of psrldq, which allows
49999 us to use a single shufps to merge things back at the end.
50000 (2) Use shufps here to combine the two vectors, then pshufd to
50001 put the elements in the correct order.
50002 In both cases the cost of the reformatting stall was too high
50003 and the overall sequence slower. */
50005 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50006 const0_rtx, const2_rtx,
50007 const0_rtx, const0_rtx));
50008 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50009 const0_rtx, const2_rtx,
50010 const0_rtx, const0_rtx));
50011 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50013 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50017 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50019 machine_mode mode = GET_MODE (op0);
50020 rtx t1, t2, t3, t4, t5, t6;
50022 if (TARGET_AVX512DQ && mode == V8DImode)
50023 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50024 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50025 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50026 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50027 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50028 else if (TARGET_XOP && mode == V2DImode)
50030 /* op1: A,B,C,D, op2: E,F,G,H */
50031 op1 = gen_lowpart (V4SImode, op1);
50032 op2 = gen_lowpart (V4SImode, op2);
50034 t1 = gen_reg_rtx (V4SImode);
50035 t2 = gen_reg_rtx (V4SImode);
50036 t3 = gen_reg_rtx (V2DImode);
50037 t4 = gen_reg_rtx (V2DImode);
50040 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50046 /* t2: (B*E),(A*F),(D*G),(C*H) */
50047 emit_insn (gen_mulv4si3 (t2, t1, op2));
50049 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50050 emit_insn (gen_xop_phadddq (t3, t2));
50052 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50053 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50055 /* Multiply lower parts and add all */
50056 t5 = gen_reg_rtx (V2DImode);
50057 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50058 gen_lowpart (V4SImode, op1),
50059 gen_lowpart (V4SImode, op2)));
50060 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50065 machine_mode nmode;
50066 rtx (*umul) (rtx, rtx, rtx);
50068 if (mode == V2DImode)
50070 umul = gen_vec_widen_umult_even_v4si;
50073 else if (mode == V4DImode)
50075 umul = gen_vec_widen_umult_even_v8si;
50078 else if (mode == V8DImode)
50080 umul = gen_vec_widen_umult_even_v16si;
50084 gcc_unreachable ();
50087 /* Multiply low parts. */
50088 t1 = gen_reg_rtx (mode);
50089 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50091 /* Shift input vectors right 32 bits so we can multiply high parts. */
50093 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50094 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50096 /* Multiply high parts by low parts. */
50097 t4 = gen_reg_rtx (mode);
50098 t5 = gen_reg_rtx (mode);
50099 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50100 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50102 /* Combine and shift the highparts back. */
50103 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50104 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50106 /* Combine high and low parts. */
50107 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50110 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50111 gen_rtx_MULT (mode, op1, op2));
50114 /* Return 1 if control tansfer instruction INSN
50115 should be encoded with bnd prefix.
50116 If insn is NULL then return 1 when control
50117 transfer instructions should be prefixed with
50118 bnd by default for current function. */
50121 ix86_bnd_prefixed_insn_p (rtx insn)
50123 /* For call insns check special flag. */
50124 if (insn && CALL_P (insn))
50126 rtx call = get_call_rtx_from (insn);
50128 return CALL_EXPR_WITH_BOUNDS_P (call);
50131 /* All other insns are prefixed only if function is instrumented. */
50132 return chkp_function_instrumented_p (current_function_decl);
50135 /* Calculate integer abs() using only SSE2 instructions. */
50138 ix86_expand_sse2_abs (rtx target, rtx input)
50140 machine_mode mode = GET_MODE (target);
50145 /* For 32-bit signed integer X, the best way to calculate the absolute
50146 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50148 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50149 GEN_INT (GET_MODE_BITSIZE
50150 (GET_MODE_INNER (mode)) - 1),
50151 NULL, 0, OPTAB_DIRECT);
50152 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50153 NULL, 0, OPTAB_DIRECT);
50154 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50155 target, 0, OPTAB_DIRECT);
50158 /* For 16-bit signed integer X, the best way to calculate the absolute
50159 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50161 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50163 x = expand_simple_binop (mode, SMAX, tmp0, input,
50164 target, 0, OPTAB_DIRECT);
50167 /* For 8-bit signed integer X, the best way to calculate the absolute
50168 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50169 as SSE2 provides the PMINUB insn. */
50171 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50173 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50174 target, 0, OPTAB_DIRECT);
50178 gcc_unreachable ();
50182 emit_move_insn (target, x);
50185 /* Expand an insert into a vector register through pinsr insn.
50186 Return true if successful. */
50189 ix86_expand_pinsr (rtx *operands)
50191 rtx dst = operands[0];
50192 rtx src = operands[3];
50194 unsigned int size = INTVAL (operands[1]);
50195 unsigned int pos = INTVAL (operands[2]);
50197 if (GET_CODE (dst) == SUBREG)
50199 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50200 dst = SUBREG_REG (dst);
50203 if (GET_CODE (src) == SUBREG)
50204 src = SUBREG_REG (src);
50206 switch (GET_MODE (dst))
50213 machine_mode srcmode, dstmode;
50214 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50216 srcmode = mode_for_size (size, MODE_INT, 0);
50221 if (!TARGET_SSE4_1)
50223 dstmode = V16QImode;
50224 pinsr = gen_sse4_1_pinsrb;
50230 dstmode = V8HImode;
50231 pinsr = gen_sse2_pinsrw;
50235 if (!TARGET_SSE4_1)
50237 dstmode = V4SImode;
50238 pinsr = gen_sse4_1_pinsrd;
50242 gcc_assert (TARGET_64BIT);
50243 if (!TARGET_SSE4_1)
50245 dstmode = V2DImode;
50246 pinsr = gen_sse4_1_pinsrq;
50254 if (GET_MODE (dst) != dstmode)
50255 d = gen_reg_rtx (dstmode);
50256 src = gen_lowpart (srcmode, src);
50260 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50261 GEN_INT (1 << pos)));
50263 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50272 /* This function returns the calling abi specific va_list type node.
50273 It returns the FNDECL specific va_list type. */
50276 ix86_fn_abi_va_list (tree fndecl)
50279 return va_list_type_node;
50280 gcc_assert (fndecl != NULL_TREE);
50282 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50283 return ms_va_list_type_node;
50285 return sysv_va_list_type_node;
50288 /* Returns the canonical va_list type specified by TYPE. If there
50289 is no valid TYPE provided, it return NULL_TREE. */
50292 ix86_canonical_va_list_type (tree type)
50296 /* Resolve references and pointers to va_list type. */
50297 if (TREE_CODE (type) == MEM_REF)
50298 type = TREE_TYPE (type);
50299 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50300 type = TREE_TYPE (type);
50301 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50302 type = TREE_TYPE (type);
50304 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50306 wtype = va_list_type_node;
50307 gcc_assert (wtype != NULL_TREE);
50309 if (TREE_CODE (wtype) == ARRAY_TYPE)
50311 /* If va_list is an array type, the argument may have decayed
50312 to a pointer type, e.g. by being passed to another function.
50313 In that case, unwrap both types so that we can compare the
50314 underlying records. */
50315 if (TREE_CODE (htype) == ARRAY_TYPE
50316 || POINTER_TYPE_P (htype))
50318 wtype = TREE_TYPE (wtype);
50319 htype = TREE_TYPE (htype);
50322 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50323 return va_list_type_node;
50324 wtype = sysv_va_list_type_node;
50325 gcc_assert (wtype != NULL_TREE);
50327 if (TREE_CODE (wtype) == ARRAY_TYPE)
50329 /* If va_list is an array type, the argument may have decayed
50330 to a pointer type, e.g. by being passed to another function.
50331 In that case, unwrap both types so that we can compare the
50332 underlying records. */
50333 if (TREE_CODE (htype) == ARRAY_TYPE
50334 || POINTER_TYPE_P (htype))
50336 wtype = TREE_TYPE (wtype);
50337 htype = TREE_TYPE (htype);
50340 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50341 return sysv_va_list_type_node;
50342 wtype = ms_va_list_type_node;
50343 gcc_assert (wtype != NULL_TREE);
50345 if (TREE_CODE (wtype) == ARRAY_TYPE)
50347 /* If va_list is an array type, the argument may have decayed
50348 to a pointer type, e.g. by being passed to another function.
50349 In that case, unwrap both types so that we can compare the
50350 underlying records. */
50351 if (TREE_CODE (htype) == ARRAY_TYPE
50352 || POINTER_TYPE_P (htype))
50354 wtype = TREE_TYPE (wtype);
50355 htype = TREE_TYPE (htype);
50358 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50359 return ms_va_list_type_node;
50362 return std_canonical_va_list_type (type);
50365 /* Iterate through the target-specific builtin types for va_list.
50366 IDX denotes the iterator, *PTREE is set to the result type of
50367 the va_list builtin, and *PNAME to its internal type.
50368 Returns zero if there is no element for this index, otherwise
50369 IDX should be increased upon the next call.
50370 Note, do not iterate a base builtin's name like __builtin_va_list.
50371 Used from c_common_nodes_and_builtins. */
50374 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50384 *ptree = ms_va_list_type_node;
50385 *pname = "__builtin_ms_va_list";
50389 *ptree = sysv_va_list_type_node;
50390 *pname = "__builtin_sysv_va_list";
50398 #undef TARGET_SCHED_DISPATCH
50399 #define TARGET_SCHED_DISPATCH has_dispatch
50400 #undef TARGET_SCHED_DISPATCH_DO
50401 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50402 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50403 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50404 #undef TARGET_SCHED_REORDER
50405 #define TARGET_SCHED_REORDER ix86_sched_reorder
50406 #undef TARGET_SCHED_ADJUST_PRIORITY
50407 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50408 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50409 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50410 ix86_dependencies_evaluation_hook
50412 /* The size of the dispatch window is the total number of bytes of
50413 object code allowed in a window. */
50414 #define DISPATCH_WINDOW_SIZE 16
50416 /* Number of dispatch windows considered for scheduling. */
50417 #define MAX_DISPATCH_WINDOWS 3
50419 /* Maximum number of instructions in a window. */
50422 /* Maximum number of immediate operands in a window. */
50425 /* Maximum number of immediate bits allowed in a window. */
50426 #define MAX_IMM_SIZE 128
50428 /* Maximum number of 32 bit immediates allowed in a window. */
50429 #define MAX_IMM_32 4
50431 /* Maximum number of 64 bit immediates allowed in a window. */
50432 #define MAX_IMM_64 2
50434 /* Maximum total of loads or prefetches allowed in a window. */
50437 /* Maximum total of stores allowed in a window. */
50438 #define MAX_STORE 1
50444 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50445 enum dispatch_group {
50460 /* Number of allowable groups in a dispatch window. It is an array
50461 indexed by dispatch_group enum. 100 is used as a big number,
50462 because the number of these kind of operations does not have any
50463 effect in dispatch window, but we need them for other reasons in
50465 static unsigned int num_allowable_groups[disp_last] = {
50466 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50469 char group_name[disp_last + 1][16] = {
50470 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50471 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50472 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50475 /* Instruction path. */
50478 path_single, /* Single micro op. */
50479 path_double, /* Double micro op. */
50480 path_multi, /* Instructions with more than 2 micro op.. */
50484 /* sched_insn_info defines a window to the instructions scheduled in
50485 the basic block. It contains a pointer to the insn_info table and
50486 the instruction scheduled.
50488 Windows are allocated for each basic block and are linked
50490 typedef struct sched_insn_info_s {
50492 enum dispatch_group group;
50493 enum insn_path path;
50498 /* Linked list of dispatch windows. This is a two way list of
50499 dispatch windows of a basic block. It contains information about
50500 the number of uops in the window and the total number of
50501 instructions and of bytes in the object code for this dispatch
50503 typedef struct dispatch_windows_s {
50504 int num_insn; /* Number of insn in the window. */
50505 int num_uops; /* Number of uops in the window. */
50506 int window_size; /* Number of bytes in the window. */
50507 int window_num; /* Window number between 0 or 1. */
50508 int num_imm; /* Number of immediates in an insn. */
50509 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50510 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50511 int imm_size; /* Total immediates in the window. */
50512 int num_loads; /* Total memory loads in the window. */
50513 int num_stores; /* Total memory stores in the window. */
50514 int violation; /* Violation exists in window. */
50515 sched_insn_info *window; /* Pointer to the window. */
50516 struct dispatch_windows_s *next;
50517 struct dispatch_windows_s *prev;
50518 } dispatch_windows;
50520 /* Immediate valuse used in an insn. */
50521 typedef struct imm_info_s
50528 static dispatch_windows *dispatch_window_list;
50529 static dispatch_windows *dispatch_window_list1;
50531 /* Get dispatch group of insn. */
50533 static enum dispatch_group
50534 get_mem_group (rtx_insn *insn)
50536 enum attr_memory memory;
50538 if (INSN_CODE (insn) < 0)
50539 return disp_no_group;
50540 memory = get_attr_memory (insn);
50541 if (memory == MEMORY_STORE)
50544 if (memory == MEMORY_LOAD)
50547 if (memory == MEMORY_BOTH)
50548 return disp_load_store;
50550 return disp_no_group;
50553 /* Return true if insn is a compare instruction. */
50556 is_cmp (rtx_insn *insn)
50558 enum attr_type type;
50560 type = get_attr_type (insn);
50561 return (type == TYPE_TEST
50562 || type == TYPE_ICMP
50563 || type == TYPE_FCMP
50564 || GET_CODE (PATTERN (insn)) == COMPARE);
50567 /* Return true if a dispatch violation encountered. */
50570 dispatch_violation (void)
50572 if (dispatch_window_list->next)
50573 return dispatch_window_list->next->violation;
50574 return dispatch_window_list->violation;
50577 /* Return true if insn is a branch instruction. */
50580 is_branch (rtx insn)
50582 return (CALL_P (insn) || JUMP_P (insn));
50585 /* Return true if insn is a prefetch instruction. */
50588 is_prefetch (rtx insn)
50590 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50593 /* This function initializes a dispatch window and the list container holding a
50594 pointer to the window. */
50597 init_window (int window_num)
50600 dispatch_windows *new_list;
50602 if (window_num == 0)
50603 new_list = dispatch_window_list;
50605 new_list = dispatch_window_list1;
50607 new_list->num_insn = 0;
50608 new_list->num_uops = 0;
50609 new_list->window_size = 0;
50610 new_list->next = NULL;
50611 new_list->prev = NULL;
50612 new_list->window_num = window_num;
50613 new_list->num_imm = 0;
50614 new_list->num_imm_32 = 0;
50615 new_list->num_imm_64 = 0;
50616 new_list->imm_size = 0;
50617 new_list->num_loads = 0;
50618 new_list->num_stores = 0;
50619 new_list->violation = false;
50621 for (i = 0; i < MAX_INSN; i++)
50623 new_list->window[i].insn = NULL;
50624 new_list->window[i].group = disp_no_group;
50625 new_list->window[i].path = no_path;
50626 new_list->window[i].byte_len = 0;
50627 new_list->window[i].imm_bytes = 0;
50632 /* This function allocates and initializes a dispatch window and the
50633 list container holding a pointer to the window. */
50635 static dispatch_windows *
50636 allocate_window (void)
50638 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50639 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50644 /* This routine initializes the dispatch scheduling information. It
50645 initiates building dispatch scheduler tables and constructs the
50646 first dispatch window. */
50649 init_dispatch_sched (void)
50651 /* Allocate a dispatch list and a window. */
50652 dispatch_window_list = allocate_window ();
50653 dispatch_window_list1 = allocate_window ();
50658 /* This function returns true if a branch is detected. End of a basic block
50659 does not have to be a branch, but here we assume only branches end a
50663 is_end_basic_block (enum dispatch_group group)
50665 return group == disp_branch;
50668 /* This function is called when the end of a window processing is reached. */
50671 process_end_window (void)
50673 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50674 if (dispatch_window_list->next)
50676 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50677 gcc_assert (dispatch_window_list->window_size
50678 + dispatch_window_list1->window_size <= 48);
50684 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50685 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50686 for 48 bytes of instructions. Note that these windows are not dispatch
50687 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50689 static dispatch_windows *
50690 allocate_next_window (int window_num)
50692 if (window_num == 0)
50694 if (dispatch_window_list->next)
50697 return dispatch_window_list;
50700 dispatch_window_list->next = dispatch_window_list1;
50701 dispatch_window_list1->prev = dispatch_window_list;
50703 return dispatch_window_list1;
50706 /* Compute number of immediate operands of an instruction. */
50709 find_constant (rtx in_rtx, imm_info *imm_values)
50711 if (INSN_P (in_rtx))
50712 in_rtx = PATTERN (in_rtx);
50713 subrtx_iterator::array_type array;
50714 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50715 if (const_rtx x = *iter)
50716 switch (GET_CODE (x))
50721 (imm_values->imm)++;
50722 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50723 (imm_values->imm32)++;
50725 (imm_values->imm64)++;
50729 (imm_values->imm)++;
50730 (imm_values->imm64)++;
50734 if (LABEL_KIND (x) == LABEL_NORMAL)
50736 (imm_values->imm)++;
50737 (imm_values->imm32)++;
50746 /* Return total size of immediate operands of an instruction along with number
50747 of corresponding immediate-operands. It initializes its parameters to zero
50748 befor calling FIND_CONSTANT.
50749 INSN is the input instruction. IMM is the total of immediates.
50750 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50754 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50756 imm_info imm_values = {0, 0, 0};
50758 find_constant (insn, &imm_values);
50759 *imm = imm_values.imm;
50760 *imm32 = imm_values.imm32;
50761 *imm64 = imm_values.imm64;
50762 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50765 /* This function indicates if an operand of an instruction is an
50769 has_immediate (rtx insn)
50771 int num_imm_operand;
50772 int num_imm32_operand;
50773 int num_imm64_operand;
50776 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50777 &num_imm64_operand);
50781 /* Return single or double path for instructions. */
50783 static enum insn_path
50784 get_insn_path (rtx_insn *insn)
50786 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50788 if ((int)path == 0)
50789 return path_single;
50791 if ((int)path == 1)
50792 return path_double;
50797 /* Return insn dispatch group. */
50799 static enum dispatch_group
50800 get_insn_group (rtx_insn *insn)
50802 enum dispatch_group group = get_mem_group (insn);
50806 if (is_branch (insn))
50807 return disp_branch;
50812 if (has_immediate (insn))
50815 if (is_prefetch (insn))
50816 return disp_prefetch;
50818 return disp_no_group;
50821 /* Count number of GROUP restricted instructions in a dispatch
50822 window WINDOW_LIST. */
50825 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50827 enum dispatch_group group = get_insn_group (insn);
50829 int num_imm_operand;
50830 int num_imm32_operand;
50831 int num_imm64_operand;
50833 if (group == disp_no_group)
50836 if (group == disp_imm)
50838 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50839 &num_imm64_operand);
50840 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50841 || num_imm_operand + window_list->num_imm > MAX_IMM
50842 || (num_imm32_operand > 0
50843 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50844 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50845 || (num_imm64_operand > 0
50846 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50847 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50848 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50849 && num_imm64_operand > 0
50850 && ((window_list->num_imm_64 > 0
50851 && window_list->num_insn >= 2)
50852 || window_list->num_insn >= 3)))
50858 if ((group == disp_load_store
50859 && (window_list->num_loads >= MAX_LOAD
50860 || window_list->num_stores >= MAX_STORE))
50861 || ((group == disp_load
50862 || group == disp_prefetch)
50863 && window_list->num_loads >= MAX_LOAD)
50864 || (group == disp_store
50865 && window_list->num_stores >= MAX_STORE))
50871 /* This function returns true if insn satisfies dispatch rules on the
50872 last window scheduled. */
50875 fits_dispatch_window (rtx_insn *insn)
50877 dispatch_windows *window_list = dispatch_window_list;
50878 dispatch_windows *window_list_next = dispatch_window_list->next;
50879 unsigned int num_restrict;
50880 enum dispatch_group group = get_insn_group (insn);
50881 enum insn_path path = get_insn_path (insn);
50884 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50885 instructions should be given the lowest priority in the
50886 scheduling process in Haifa scheduler to make sure they will be
50887 scheduled in the same dispatch window as the reference to them. */
50888 if (group == disp_jcc || group == disp_cmp)
50891 /* Check nonrestricted. */
50892 if (group == disp_no_group || group == disp_branch)
50895 /* Get last dispatch window. */
50896 if (window_list_next)
50897 window_list = window_list_next;
50899 if (window_list->window_num == 1)
50901 sum = window_list->prev->window_size + window_list->window_size;
50904 || (min_insn_size (insn) + sum) >= 48)
50905 /* Window 1 is full. Go for next window. */
50909 num_restrict = count_num_restricted (insn, window_list);
50911 if (num_restrict > num_allowable_groups[group])
50914 /* See if it fits in the first window. */
50915 if (window_list->window_num == 0)
50917 /* The first widow should have only single and double path
50919 if (path == path_double
50920 && (window_list->num_uops + 2) > MAX_INSN)
50922 else if (path != path_single)
50928 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50929 dispatch window WINDOW_LIST. */
50932 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50934 int byte_len = min_insn_size (insn);
50935 int num_insn = window_list->num_insn;
50937 sched_insn_info *window = window_list->window;
50938 enum dispatch_group group = get_insn_group (insn);
50939 enum insn_path path = get_insn_path (insn);
50940 int num_imm_operand;
50941 int num_imm32_operand;
50942 int num_imm64_operand;
50944 if (!window_list->violation && group != disp_cmp
50945 && !fits_dispatch_window (insn))
50946 window_list->violation = true;
50948 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50949 &num_imm64_operand);
50951 /* Initialize window with new instruction. */
50952 window[num_insn].insn = insn;
50953 window[num_insn].byte_len = byte_len;
50954 window[num_insn].group = group;
50955 window[num_insn].path = path;
50956 window[num_insn].imm_bytes = imm_size;
50958 window_list->window_size += byte_len;
50959 window_list->num_insn = num_insn + 1;
50960 window_list->num_uops = window_list->num_uops + num_uops;
50961 window_list->imm_size += imm_size;
50962 window_list->num_imm += num_imm_operand;
50963 window_list->num_imm_32 += num_imm32_operand;
50964 window_list->num_imm_64 += num_imm64_operand;
50966 if (group == disp_store)
50967 window_list->num_stores += 1;
50968 else if (group == disp_load
50969 || group == disp_prefetch)
50970 window_list->num_loads += 1;
50971 else if (group == disp_load_store)
50973 window_list->num_stores += 1;
50974 window_list->num_loads += 1;
50978 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50979 If the total bytes of instructions or the number of instructions in
50980 the window exceed allowable, it allocates a new window. */
50983 add_to_dispatch_window (rtx_insn *insn)
50986 dispatch_windows *window_list;
50987 dispatch_windows *next_list;
50988 dispatch_windows *window0_list;
50989 enum insn_path path;
50990 enum dispatch_group insn_group;
50998 if (INSN_CODE (insn) < 0)
51001 byte_len = min_insn_size (insn);
51002 window_list = dispatch_window_list;
51003 next_list = window_list->next;
51004 path = get_insn_path (insn);
51005 insn_group = get_insn_group (insn);
51007 /* Get the last dispatch window. */
51009 window_list = dispatch_window_list->next;
51011 if (path == path_single)
51013 else if (path == path_double)
51016 insn_num_uops = (int) path;
51018 /* If current window is full, get a new window.
51019 Window number zero is full, if MAX_INSN uops are scheduled in it.
51020 Window number one is full, if window zero's bytes plus window
51021 one's bytes is 32, or if the bytes of the new instruction added
51022 to the total makes it greater than 48, or it has already MAX_INSN
51023 instructions in it. */
51024 num_insn = window_list->num_insn;
51025 num_uops = window_list->num_uops;
51026 window_num = window_list->window_num;
51027 insn_fits = fits_dispatch_window (insn);
51029 if (num_insn >= MAX_INSN
51030 || num_uops + insn_num_uops > MAX_INSN
51033 window_num = ~window_num & 1;
51034 window_list = allocate_next_window (window_num);
51037 if (window_num == 0)
51039 add_insn_window (insn, window_list, insn_num_uops);
51040 if (window_list->num_insn >= MAX_INSN
51041 && insn_group == disp_branch)
51043 process_end_window ();
51047 else if (window_num == 1)
51049 window0_list = window_list->prev;
51050 sum = window0_list->window_size + window_list->window_size;
51052 || (byte_len + sum) >= 48)
51054 process_end_window ();
51055 window_list = dispatch_window_list;
51058 add_insn_window (insn, window_list, insn_num_uops);
51061 gcc_unreachable ();
51063 if (is_end_basic_block (insn_group))
51065 /* End of basic block is reached do end-basic-block process. */
51066 process_end_window ();
51071 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51073 DEBUG_FUNCTION static void
51074 debug_dispatch_window_file (FILE *file, int window_num)
51076 dispatch_windows *list;
51079 if (window_num == 0)
51080 list = dispatch_window_list;
51082 list = dispatch_window_list1;
51084 fprintf (file, "Window #%d:\n", list->window_num);
51085 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51086 list->num_insn, list->num_uops, list->window_size);
51087 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51088 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51090 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51092 fprintf (file, " insn info:\n");
51094 for (i = 0; i < MAX_INSN; i++)
51096 if (!list->window[i].insn)
51098 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51099 i, group_name[list->window[i].group],
51100 i, (void *)list->window[i].insn,
51101 i, list->window[i].path,
51102 i, list->window[i].byte_len,
51103 i, list->window[i].imm_bytes);
51107 /* Print to stdout a dispatch window. */
51109 DEBUG_FUNCTION void
51110 debug_dispatch_window (int window_num)
51112 debug_dispatch_window_file (stdout, window_num);
51115 /* Print INSN dispatch information to FILE. */
51117 DEBUG_FUNCTION static void
51118 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51121 enum insn_path path;
51122 enum dispatch_group group;
51124 int num_imm_operand;
51125 int num_imm32_operand;
51126 int num_imm64_operand;
51128 if (INSN_CODE (insn) < 0)
51131 byte_len = min_insn_size (insn);
51132 path = get_insn_path (insn);
51133 group = get_insn_group (insn);
51134 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51135 &num_imm64_operand);
51137 fprintf (file, " insn info:\n");
51138 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51139 group_name[group], path, byte_len);
51140 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51141 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51144 /* Print to STDERR the status of the ready list with respect to
51145 dispatch windows. */
51147 DEBUG_FUNCTION void
51148 debug_ready_dispatch (void)
51151 int no_ready = number_in_ready ();
51153 fprintf (stdout, "Number of ready: %d\n", no_ready);
51155 for (i = 0; i < no_ready; i++)
51156 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51159 /* This routine is the driver of the dispatch scheduler. */
51162 do_dispatch (rtx_insn *insn, int mode)
51164 if (mode == DISPATCH_INIT)
51165 init_dispatch_sched ();
51166 else if (mode == ADD_TO_DISPATCH_WINDOW)
51167 add_to_dispatch_window (insn);
51170 /* Return TRUE if Dispatch Scheduling is supported. */
51173 has_dispatch (rtx_insn *insn, int action)
51175 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51176 && flag_dispatch_scheduler)
51182 case IS_DISPATCH_ON:
51187 return is_cmp (insn);
51189 case DISPATCH_VIOLATION:
51190 return dispatch_violation ();
51192 case FITS_DISPATCH_WINDOW:
51193 return fits_dispatch_window (insn);
51199 /* Implementation of reassociation_width target hook used by
51200 reassoc phase to identify parallelism level in reassociated
51201 tree. Statements tree_code is passed in OPC. Arguments type
51204 Currently parallel reassociation is enabled for Atom
51205 processors only and we set reassociation width to be 2
51206 because Atom may issue up to 2 instructions per cycle.
51208 Return value should be fixed if parallel reassociation is
51209 enabled for other processors. */
51212 ix86_reassociation_width (unsigned int, machine_mode mode)
51215 if (VECTOR_MODE_P (mode))
51217 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51224 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51226 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51232 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51233 place emms and femms instructions. */
51235 static machine_mode
51236 ix86_preferred_simd_mode (machine_mode mode)
51244 return TARGET_AVX512BW ? V64QImode :
51245 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51247 return TARGET_AVX512BW ? V32HImode :
51248 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51250 return TARGET_AVX512F ? V16SImode :
51251 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51253 return TARGET_AVX512F ? V8DImode :
51254 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51257 if (TARGET_AVX512F)
51259 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51265 if (!TARGET_VECTORIZE_DOUBLE)
51267 else if (TARGET_AVX512F)
51269 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51271 else if (TARGET_SSE2)
51280 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51281 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51282 256bit and 128bit vectors. */
51284 static unsigned int
51285 ix86_autovectorize_vector_sizes (void)
51287 return TARGET_AVX512F ? 64 | 32 | 16 :
51288 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51293 /* Return class of registers which could be used for pseudo of MODE
51294 and of class RCLASS for spilling instead of memory. Return NO_REGS
51295 if it is not possible or non-profitable. */
51297 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51299 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51300 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51301 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51302 return ALL_SSE_REGS;
51306 /* Implement targetm.vectorize.init_cost. */
51309 ix86_init_cost (struct loop *)
51311 unsigned *cost = XNEWVEC (unsigned, 3);
51312 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51316 /* Implement targetm.vectorize.add_stmt_cost. */
51319 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51320 struct _stmt_vec_info *stmt_info, int misalign,
51321 enum vect_cost_model_location where)
51323 unsigned *cost = (unsigned *) data;
51324 unsigned retval = 0;
51326 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51327 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51329 /* Statements in an inner loop relative to the loop being
51330 vectorized are weighted more heavily. The value here is
51331 arbitrary and could potentially be improved with analysis. */
51332 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51333 count *= 50; /* FIXME. */
51335 retval = (unsigned) (count * stmt_cost);
51337 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51338 for Silvermont as it has out of order integer pipeline and can execute
51339 2 scalar instruction per tick, but has in order SIMD pipeline. */
51340 if (TARGET_SILVERMONT || TARGET_INTEL)
51341 if (stmt_info && stmt_info->stmt)
51343 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51344 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51345 retval = (retval * 17) / 10;
51348 cost[where] += retval;
51353 /* Implement targetm.vectorize.finish_cost. */
51356 ix86_finish_cost (void *data, unsigned *prologue_cost,
51357 unsigned *body_cost, unsigned *epilogue_cost)
51359 unsigned *cost = (unsigned *) data;
51360 *prologue_cost = cost[vect_prologue];
51361 *body_cost = cost[vect_body];
51362 *epilogue_cost = cost[vect_epilogue];
51365 /* Implement targetm.vectorize.destroy_cost_data. */
51368 ix86_destroy_cost_data (void *data)
51373 /* Validate target specific memory model bits in VAL. */
51375 static unsigned HOST_WIDE_INT
51376 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51378 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51381 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51383 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51385 warning (OPT_Winvalid_memory_model,
51386 "Unknown architecture specific memory model");
51387 return MEMMODEL_SEQ_CST;
51389 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51390 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51392 warning (OPT_Winvalid_memory_model,
51393 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51394 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51396 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51398 warning (OPT_Winvalid_memory_model,
51399 "HLE_RELEASE not used with RELEASE or stronger memory model");
51400 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51405 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51406 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51407 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51408 or number of vecsize_mangle variants that should be emitted. */
51411 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51412 struct cgraph_simd_clone *clonei,
51413 tree base_type, int num)
51417 if (clonei->simdlen
51418 && (clonei->simdlen < 2
51419 || clonei->simdlen > 16
51420 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51422 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51423 "unsupported simdlen %d", clonei->simdlen);
51427 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51428 if (TREE_CODE (ret_type) != VOID_TYPE)
51429 switch (TYPE_MODE (ret_type))
51441 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51442 "unsupported return type %qT for simd\n", ret_type);
51449 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51450 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51451 switch (TYPE_MODE (TREE_TYPE (t)))
51463 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51464 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51468 if (clonei->cilk_elemental)
51470 /* Parse here processor clause. If not present, default to 'b'. */
51471 clonei->vecsize_mangle = 'b';
51473 else if (!TREE_PUBLIC (node->decl))
51475 /* If the function isn't exported, we can pick up just one ISA
51478 clonei->vecsize_mangle = 'd';
51479 else if (TARGET_AVX)
51480 clonei->vecsize_mangle = 'c';
51482 clonei->vecsize_mangle = 'b';
51487 clonei->vecsize_mangle = "bcd"[num];
51490 switch (clonei->vecsize_mangle)
51493 clonei->vecsize_int = 128;
51494 clonei->vecsize_float = 128;
51497 clonei->vecsize_int = 128;
51498 clonei->vecsize_float = 256;
51501 clonei->vecsize_int = 256;
51502 clonei->vecsize_float = 256;
51505 if (clonei->simdlen == 0)
51507 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51508 clonei->simdlen = clonei->vecsize_int;
51510 clonei->simdlen = clonei->vecsize_float;
51511 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51512 if (clonei->simdlen > 16)
51513 clonei->simdlen = 16;
51518 /* Add target attribute to SIMD clone NODE if needed. */
51521 ix86_simd_clone_adjust (struct cgraph_node *node)
51523 const char *str = NULL;
51524 gcc_assert (node->decl == cfun->decl);
51525 switch (node->simdclone->vecsize_mangle)
51540 gcc_unreachable ();
51545 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51546 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51549 ix86_reset_previous_fndecl ();
51550 ix86_set_current_function (node->decl);
51553 /* If SIMD clone NODE can't be used in a vectorized loop
51554 in current function, return -1, otherwise return a badness of using it
51555 (0 if it is most desirable from vecsize_mangle point of view, 1
51556 slightly less desirable, etc.). */
51559 ix86_simd_clone_usable (struct cgraph_node *node)
51561 switch (node->simdclone->vecsize_mangle)
51568 return TARGET_AVX2 ? 2 : 1;
51572 return TARGET_AVX2 ? 1 : 0;
51579 gcc_unreachable ();
51583 /* This function adjusts the unroll factor based on
51584 the hardware capabilities. For ex, bdver3 has
51585 a loop buffer which makes unrolling of smaller
51586 loops less important. This function decides the
51587 unroll factor using number of memory references
51588 (value 32 is used) as a heuristic. */
51591 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51596 unsigned mem_count = 0;
51598 if (!TARGET_ADJUST_UNROLL)
51601 /* Count the number of memory references within the loop body.
51602 This value determines the unrolling factor for bdver3 and bdver4
51604 subrtx_iterator::array_type array;
51605 bbs = get_loop_body (loop);
51606 for (i = 0; i < loop->num_nodes; i++)
51607 FOR_BB_INSNS (bbs[i], insn)
51608 if (NONDEBUG_INSN_P (insn))
51609 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51610 if (const_rtx x = *iter)
51613 machine_mode mode = GET_MODE (x);
51614 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51622 if (mem_count && mem_count <=32)
51623 return 32/mem_count;
51629 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51632 ix86_float_exceptions_rounding_supported_p (void)
51634 /* For x87 floating point with standard excess precision handling,
51635 there is no adddf3 pattern (since x87 floating point only has
51636 XFmode operations) so the default hook implementation gets this
51638 return TARGET_80387 || TARGET_SSE_MATH;
51641 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51644 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51646 if (!TARGET_80387 && !TARGET_SSE_MATH)
51648 tree exceptions_var = create_tmp_var (integer_type_node);
51651 tree fenv_index_type = build_index_type (size_int (6));
51652 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51653 tree fenv_var = create_tmp_var (fenv_type);
51654 mark_addressable (fenv_var);
51655 tree fenv_ptr = build_pointer_type (fenv_type);
51656 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51657 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51658 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51659 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51660 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51661 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51662 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51663 tree hold_fnclex = build_call_expr (fnclex, 0);
51664 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51666 *clear = build_call_expr (fnclex, 0);
51667 tree sw_var = create_tmp_var (short_unsigned_type_node);
51668 tree fnstsw_call = build_call_expr (fnstsw, 0);
51669 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51670 sw_var, fnstsw_call);
51671 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51672 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51673 exceptions_var, exceptions_x87);
51674 *update = build2 (COMPOUND_EXPR, integer_type_node,
51675 sw_mod, update_mod);
51676 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51677 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51679 if (TARGET_SSE_MATH)
51681 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51682 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51683 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51684 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51685 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51686 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51687 mxcsr_orig_var, stmxcsr_hold_call);
51688 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51690 build_int_cst (unsigned_type_node, 0x1f80));
51691 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51692 build_int_cst (unsigned_type_node, 0xffffffc0));
51693 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51694 mxcsr_mod_var, hold_mod_val);
51695 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51696 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51697 hold_assign_orig, hold_assign_mod);
51698 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51699 ldmxcsr_hold_call);
51701 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51704 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51706 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51707 ldmxcsr_clear_call);
51709 *clear = ldmxcsr_clear_call;
51710 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51711 tree exceptions_sse = fold_convert (integer_type_node,
51712 stxmcsr_update_call);
51715 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51716 exceptions_var, exceptions_sse);
51717 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51718 exceptions_var, exceptions_mod);
51719 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51720 exceptions_assign);
51723 *update = build2 (MODIFY_EXPR, integer_type_node,
51724 exceptions_var, exceptions_sse);
51725 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51726 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51727 ldmxcsr_update_call);
51729 tree atomic_feraiseexcept
51730 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51731 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51732 1, exceptions_var);
51733 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51734 atomic_feraiseexcept_call);
51737 /* Return mode to be used for bounds or VOIDmode
51738 if bounds are not supported. */
51740 static enum machine_mode
51741 ix86_mpx_bound_mode ()
51743 /* Do not support pointer checker if MPX
51747 if (flag_check_pointer_bounds)
51748 warning (0, "Pointer Checker requires MPX support on this target."
51749 " Use -mmpx options to enable MPX.");
51756 /* Return constant used to statically initialize constant bounds.
51758 This function is used to create special bound values. For now
51759 only INIT bounds and NONE bounds are expected. More special
51760 values may be added later. */
51763 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51765 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51766 : build_zero_cst (pointer_sized_int_node);
51767 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51768 : build_minus_one_cst (pointer_sized_int_node);
51770 /* This function is supposed to be used to create INIT and
51771 NONE bounds only. */
51772 gcc_assert ((lb == 0 && ub == -1)
51773 || (lb == -1 && ub == 0));
51775 return build_complex (NULL, low, high);
51778 /* Generate a list of statements STMTS to initialize pointer bounds
51779 variable VAR with bounds LB and UB. Return the number of generated
51783 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51785 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51786 tree lhs, modify, var_p;
51788 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51789 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51791 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51792 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51793 append_to_statement_list (modify, stmts);
51795 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51796 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51797 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51798 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51799 append_to_statement_list (modify, stmts);
51804 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51805 /* For i386, common symbol is local only for non-PIE binaries. For
51806 x86-64, common symbol is local only for non-PIE binaries or linker
51807 supports copy reloc in PIE binaries. */
51810 ix86_binds_local_p (const_tree exp)
51812 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51815 && HAVE_LD_PIE_COPYRELOC != 0)));
51819 /* Initialize the GCC target structure. */
51820 #undef TARGET_RETURN_IN_MEMORY
51821 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51823 #undef TARGET_LEGITIMIZE_ADDRESS
51824 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51826 #undef TARGET_ATTRIBUTE_TABLE
51827 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51828 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51829 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51830 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51831 # undef TARGET_MERGE_DECL_ATTRIBUTES
51832 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51835 #undef TARGET_COMP_TYPE_ATTRIBUTES
51836 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51838 #undef TARGET_INIT_BUILTINS
51839 #define TARGET_INIT_BUILTINS ix86_init_builtins
51840 #undef TARGET_BUILTIN_DECL
51841 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51842 #undef TARGET_EXPAND_BUILTIN
51843 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51845 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51846 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51847 ix86_builtin_vectorized_function
51849 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51850 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51852 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51853 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51855 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51856 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51858 #undef TARGET_BUILTIN_RECIPROCAL
51859 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51861 #undef TARGET_ASM_FUNCTION_EPILOGUE
51862 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51864 #undef TARGET_ENCODE_SECTION_INFO
51865 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51866 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51868 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51871 #undef TARGET_ASM_OPEN_PAREN
51872 #define TARGET_ASM_OPEN_PAREN ""
51873 #undef TARGET_ASM_CLOSE_PAREN
51874 #define TARGET_ASM_CLOSE_PAREN ""
51876 #undef TARGET_ASM_BYTE_OP
51877 #define TARGET_ASM_BYTE_OP ASM_BYTE
51879 #undef TARGET_ASM_ALIGNED_HI_OP
51880 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51881 #undef TARGET_ASM_ALIGNED_SI_OP
51882 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51884 #undef TARGET_ASM_ALIGNED_DI_OP
51885 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51888 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51889 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51891 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51892 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51894 #undef TARGET_ASM_UNALIGNED_HI_OP
51895 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51896 #undef TARGET_ASM_UNALIGNED_SI_OP
51897 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51898 #undef TARGET_ASM_UNALIGNED_DI_OP
51899 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51901 #undef TARGET_PRINT_OPERAND
51902 #define TARGET_PRINT_OPERAND ix86_print_operand
51903 #undef TARGET_PRINT_OPERAND_ADDRESS
51904 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51905 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51906 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51907 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51908 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51910 #undef TARGET_SCHED_INIT_GLOBAL
51911 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51912 #undef TARGET_SCHED_ADJUST_COST
51913 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51914 #undef TARGET_SCHED_ISSUE_RATE
51915 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51916 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51917 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51918 ia32_multipass_dfa_lookahead
51919 #undef TARGET_SCHED_MACRO_FUSION_P
51920 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51921 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51922 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51924 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51925 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51927 #undef TARGET_MEMMODEL_CHECK
51928 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51930 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51931 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51934 #undef TARGET_HAVE_TLS
51935 #define TARGET_HAVE_TLS true
51937 #undef TARGET_CANNOT_FORCE_CONST_MEM
51938 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51939 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51940 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51942 #undef TARGET_DELEGITIMIZE_ADDRESS
51943 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51945 #undef TARGET_MS_BITFIELD_LAYOUT_P
51946 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51949 #undef TARGET_BINDS_LOCAL_P
51950 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51952 #undef TARGET_BINDS_LOCAL_P
51953 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51955 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51956 #undef TARGET_BINDS_LOCAL_P
51957 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51960 #undef TARGET_ASM_OUTPUT_MI_THUNK
51961 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51962 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51963 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51965 #undef TARGET_ASM_FILE_START
51966 #define TARGET_ASM_FILE_START x86_file_start
51968 #undef TARGET_OPTION_OVERRIDE
51969 #define TARGET_OPTION_OVERRIDE ix86_option_override
51971 #undef TARGET_REGISTER_MOVE_COST
51972 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51973 #undef TARGET_MEMORY_MOVE_COST
51974 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51975 #undef TARGET_RTX_COSTS
51976 #define TARGET_RTX_COSTS ix86_rtx_costs
51977 #undef TARGET_ADDRESS_COST
51978 #define TARGET_ADDRESS_COST ix86_address_cost
51980 #undef TARGET_FIXED_CONDITION_CODE_REGS
51981 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51982 #undef TARGET_CC_MODES_COMPATIBLE
51983 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51985 #undef TARGET_MACHINE_DEPENDENT_REORG
51986 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51988 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51989 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51991 #undef TARGET_BUILD_BUILTIN_VA_LIST
51992 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51994 #undef TARGET_FOLD_BUILTIN
51995 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51997 #undef TARGET_COMPARE_VERSION_PRIORITY
51998 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52000 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52001 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52002 ix86_generate_version_dispatcher_body
52004 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52005 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52006 ix86_get_function_versions_dispatcher
52008 #undef TARGET_ENUM_VA_LIST_P
52009 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52011 #undef TARGET_FN_ABI_VA_LIST
52012 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52014 #undef TARGET_CANONICAL_VA_LIST_TYPE
52015 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52017 #undef TARGET_EXPAND_BUILTIN_VA_START
52018 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52020 #undef TARGET_MD_ASM_CLOBBERS
52021 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
52023 #undef TARGET_PROMOTE_PROTOTYPES
52024 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52025 #undef TARGET_SETUP_INCOMING_VARARGS
52026 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52027 #undef TARGET_MUST_PASS_IN_STACK
52028 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52029 #undef TARGET_FUNCTION_ARG_ADVANCE
52030 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52031 #undef TARGET_FUNCTION_ARG
52032 #define TARGET_FUNCTION_ARG ix86_function_arg
52033 #undef TARGET_INIT_PIC_REG
52034 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52035 #undef TARGET_USE_PSEUDO_PIC_REG
52036 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52037 #undef TARGET_FUNCTION_ARG_BOUNDARY
52038 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52039 #undef TARGET_PASS_BY_REFERENCE
52040 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52041 #undef TARGET_INTERNAL_ARG_POINTER
52042 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52043 #undef TARGET_UPDATE_STACK_BOUNDARY
52044 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52045 #undef TARGET_GET_DRAP_RTX
52046 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52047 #undef TARGET_STRICT_ARGUMENT_NAMING
52048 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52049 #undef TARGET_STATIC_CHAIN
52050 #define TARGET_STATIC_CHAIN ix86_static_chain
52051 #undef TARGET_TRAMPOLINE_INIT
52052 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52053 #undef TARGET_RETURN_POPS_ARGS
52054 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52056 #undef TARGET_LEGITIMATE_COMBINED_INSN
52057 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52059 #undef TARGET_ASAN_SHADOW_OFFSET
52060 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52062 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52063 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52065 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52066 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52068 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52069 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52071 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52072 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52073 ix86_libgcc_floating_mode_supported_p
52075 #undef TARGET_C_MODE_FOR_SUFFIX
52076 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52079 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52080 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52083 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52084 #undef TARGET_INSERT_ATTRIBUTES
52085 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52088 #undef TARGET_MANGLE_TYPE
52089 #define TARGET_MANGLE_TYPE ix86_mangle_type
52092 #undef TARGET_STACK_PROTECT_FAIL
52093 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52096 #undef TARGET_FUNCTION_VALUE
52097 #define TARGET_FUNCTION_VALUE ix86_function_value
52099 #undef TARGET_FUNCTION_VALUE_REGNO_P
52100 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52102 #undef TARGET_PROMOTE_FUNCTION_MODE
52103 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52105 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52106 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52108 #undef TARGET_INSTANTIATE_DECLS
52109 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52111 #undef TARGET_SECONDARY_RELOAD
52112 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52114 #undef TARGET_CLASS_MAX_NREGS
52115 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52117 #undef TARGET_PREFERRED_RELOAD_CLASS
52118 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52119 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52120 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52121 #undef TARGET_CLASS_LIKELY_SPILLED_P
52122 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52124 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52125 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52126 ix86_builtin_vectorization_cost
52127 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52128 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52129 ix86_vectorize_vec_perm_const_ok
52130 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52131 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52132 ix86_preferred_simd_mode
52133 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52134 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52135 ix86_autovectorize_vector_sizes
52136 #undef TARGET_VECTORIZE_INIT_COST
52137 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52138 #undef TARGET_VECTORIZE_ADD_STMT_COST
52139 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52140 #undef TARGET_VECTORIZE_FINISH_COST
52141 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52142 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52143 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52145 #undef TARGET_SET_CURRENT_FUNCTION
52146 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52148 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52149 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52151 #undef TARGET_OPTION_SAVE
52152 #define TARGET_OPTION_SAVE ix86_function_specific_save
52154 #undef TARGET_OPTION_RESTORE
52155 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52157 #undef TARGET_OPTION_POST_STREAM_IN
52158 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52160 #undef TARGET_OPTION_PRINT
52161 #define TARGET_OPTION_PRINT ix86_function_specific_print
52163 #undef TARGET_OPTION_FUNCTION_VERSIONS
52164 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52166 #undef TARGET_CAN_INLINE_P
52167 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52169 #undef TARGET_EXPAND_TO_RTL_HOOK
52170 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52172 #undef TARGET_LEGITIMATE_ADDRESS_P
52173 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52175 #undef TARGET_LRA_P
52176 #define TARGET_LRA_P hook_bool_void_true
52178 #undef TARGET_REGISTER_PRIORITY
52179 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52181 #undef TARGET_REGISTER_USAGE_LEVELING_P
52182 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52184 #undef TARGET_LEGITIMATE_CONSTANT_P
52185 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52187 #undef TARGET_FRAME_POINTER_REQUIRED
52188 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52190 #undef TARGET_CAN_ELIMINATE
52191 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52193 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52194 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52196 #undef TARGET_ASM_CODE_END
52197 #define TARGET_ASM_CODE_END ix86_code_end
52199 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52200 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52203 #undef TARGET_INIT_LIBFUNCS
52204 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52207 #undef TARGET_LOOP_UNROLL_ADJUST
52208 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52210 #undef TARGET_SPILL_CLASS
52211 #define TARGET_SPILL_CLASS ix86_spill_class
52213 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52214 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52215 ix86_simd_clone_compute_vecsize_and_simdlen
52217 #undef TARGET_SIMD_CLONE_ADJUST
52218 #define TARGET_SIMD_CLONE_ADJUST \
52219 ix86_simd_clone_adjust
52221 #undef TARGET_SIMD_CLONE_USABLE
52222 #define TARGET_SIMD_CLONE_USABLE \
52223 ix86_simd_clone_usable
52225 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52226 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52227 ix86_float_exceptions_rounding_supported_p
52229 #undef TARGET_MODE_EMIT
52230 #define TARGET_MODE_EMIT ix86_emit_mode_set
52232 #undef TARGET_MODE_NEEDED
52233 #define TARGET_MODE_NEEDED ix86_mode_needed
52235 #undef TARGET_MODE_AFTER
52236 #define TARGET_MODE_AFTER ix86_mode_after
52238 #undef TARGET_MODE_ENTRY
52239 #define TARGET_MODE_ENTRY ix86_mode_entry
52241 #undef TARGET_MODE_EXIT
52242 #define TARGET_MODE_EXIT ix86_mode_exit
52244 #undef TARGET_MODE_PRIORITY
52245 #define TARGET_MODE_PRIORITY ix86_mode_priority
52247 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52248 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52250 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52251 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52253 #undef TARGET_STORE_BOUNDS_FOR_ARG
52254 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52256 #undef TARGET_LOAD_RETURNED_BOUNDS
52257 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52259 #undef TARGET_STORE_RETURNED_BOUNDS
52260 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52262 #undef TARGET_CHKP_BOUND_MODE
52263 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52265 #undef TARGET_BUILTIN_CHKP_FUNCTION
52266 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52268 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52269 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52271 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52272 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52274 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52275 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52277 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52278 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52280 #undef TARGET_OFFLOAD_OPTIONS
52281 #define TARGET_OFFLOAD_OPTIONS \
52282 ix86_offload_options
52284 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52285 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52287 struct gcc_target targetm = TARGET_INITIALIZER;
52289 #include "gt-i386.h"