1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
39 #include "stor-layout.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
54 #include "statistics.h"
56 #include "fixed-value.h"
64 #include "diagnostic-core.h"
67 #include "dominance.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
83 #include "plugin-api.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
91 #include "gimple-expr.h"
97 #include "tm-constrs.h"
101 #include "sched-int.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
120 static rtx legitimize_dllimport_symbol (rtx, bool);
121 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
122 static rtx legitimize_pe_coff_symbol (rtx, bool);
124 #ifndef CHECK_STACK_LIMIT
125 #define CHECK_STACK_LIMIT (-1)
128 /* Return index of given mode in mult and division cost tables. */
129 #define MODE_INDEX(mode) \
130 ((mode) == QImode ? 0 \
131 : (mode) == HImode ? 1 \
132 : (mode) == SImode ? 2 \
133 : (mode) == DImode ? 3 \
136 /* Processor costs (relative to an add) */
137 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
138 #define COSTS_N_BYTES(N) ((N) * 2)
140 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
142 static stringop_algs ix86_size_memcpy[2] = {
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
144 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
145 static stringop_algs ix86_size_memset[2] = {
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
147 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
150 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
151 COSTS_N_BYTES (2), /* cost of an add instruction */
152 COSTS_N_BYTES (3), /* cost of a lea instruction */
153 COSTS_N_BYTES (2), /* variable shift costs */
154 COSTS_N_BYTES (3), /* constant shift costs */
155 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
156 COSTS_N_BYTES (3), /* HI */
157 COSTS_N_BYTES (3), /* SI */
158 COSTS_N_BYTES (3), /* DI */
159 COSTS_N_BYTES (5)}, /* other */
160 0, /* cost of multiply per each bit set */
161 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
162 COSTS_N_BYTES (3), /* HI */
163 COSTS_N_BYTES (3), /* SI */
164 COSTS_N_BYTES (3), /* DI */
165 COSTS_N_BYTES (5)}, /* other */
166 COSTS_N_BYTES (3), /* cost of movsx */
167 COSTS_N_BYTES (3), /* cost of movzx */
168 0, /* "large" insn */
170 2, /* cost for loading QImode using movzbl */
171 {2, 2, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 2, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {2, 2, 2}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {2, 2, 2}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 3, /* cost of moving MMX register */
181 {3, 3}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {3, 3}, /* cost of storing MMX registers
184 in SImode and DImode */
185 3, /* cost of moving SSE register */
186 {3, 3, 3}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {3, 3, 3}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
197 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
198 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
199 COSTS_N_BYTES (2), /* cost of FABS instruction. */
200 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
201 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
204 1, /* scalar_stmt_cost. */
205 1, /* scalar load_cost. */
206 1, /* scalar_store_cost. */
207 1, /* vec_stmt_cost. */
208 1, /* vec_to_scalar_cost. */
209 1, /* scalar_to_vec_cost. */
210 1, /* vec_align_load_cost. */
211 1, /* vec_unalign_load_cost. */
212 1, /* vec_store_cost. */
213 1, /* cond_taken_branch_cost. */
214 1, /* cond_not_taken_branch_cost. */
217 /* Processor costs (relative to an add) */
218 static stringop_algs i386_memcpy[2] = {
219 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
220 DUMMY_STRINGOP_ALGS};
221 static stringop_algs i386_memset[2] = {
222 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
223 DUMMY_STRINGOP_ALGS};
226 struct processor_costs i386_cost = { /* 386 specific costs */
227 COSTS_N_INSNS (1), /* cost of an add instruction */
228 COSTS_N_INSNS (1), /* cost of a lea instruction */
229 COSTS_N_INSNS (3), /* variable shift costs */
230 COSTS_N_INSNS (2), /* constant shift costs */
231 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
232 COSTS_N_INSNS (6), /* HI */
233 COSTS_N_INSNS (6), /* SI */
234 COSTS_N_INSNS (6), /* DI */
235 COSTS_N_INSNS (6)}, /* other */
236 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
237 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
238 COSTS_N_INSNS (23), /* HI */
239 COSTS_N_INSNS (23), /* SI */
240 COSTS_N_INSNS (23), /* DI */
241 COSTS_N_INSNS (23)}, /* other */
242 COSTS_N_INSNS (3), /* cost of movsx */
243 COSTS_N_INSNS (2), /* cost of movzx */
244 15, /* "large" insn */
246 4, /* cost for loading QImode using movzbl */
247 {2, 4, 2}, /* cost of loading integer registers
248 in QImode, HImode and SImode.
249 Relative to reg-reg move (2). */
250 {2, 4, 2}, /* cost of storing integer registers */
251 2, /* cost of reg,reg fld/fst */
252 {8, 8, 8}, /* cost of loading fp registers
253 in SFmode, DFmode and XFmode */
254 {8, 8, 8}, /* cost of storing fp registers
255 in SFmode, DFmode and XFmode */
256 2, /* cost of moving MMX register */
257 {4, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {4, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of l1 cache */
268 0, /* size of l2 cache */
269 0, /* size of prefetch block */
270 0, /* number of parallel prefetches */
272 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
273 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
274 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
275 COSTS_N_INSNS (22), /* cost of FABS instruction. */
276 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
277 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
280 1, /* scalar_stmt_cost. */
281 1, /* scalar load_cost. */
282 1, /* scalar_store_cost. */
283 1, /* vec_stmt_cost. */
284 1, /* vec_to_scalar_cost. */
285 1, /* scalar_to_vec_cost. */
286 1, /* vec_align_load_cost. */
287 2, /* vec_unalign_load_cost. */
288 1, /* vec_store_cost. */
289 3, /* cond_taken_branch_cost. */
290 1, /* cond_not_taken_branch_cost. */
293 static stringop_algs i486_memcpy[2] = {
294 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
295 DUMMY_STRINGOP_ALGS};
296 static stringop_algs i486_memset[2] = {
297 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
298 DUMMY_STRINGOP_ALGS};
301 struct processor_costs i486_cost = { /* 486 specific costs */
302 COSTS_N_INSNS (1), /* cost of an add instruction */
303 COSTS_N_INSNS (1), /* cost of a lea instruction */
304 COSTS_N_INSNS (3), /* variable shift costs */
305 COSTS_N_INSNS (2), /* constant shift costs */
306 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
307 COSTS_N_INSNS (12), /* HI */
308 COSTS_N_INSNS (12), /* SI */
309 COSTS_N_INSNS (12), /* DI */
310 COSTS_N_INSNS (12)}, /* other */
311 1, /* cost of multiply per each bit set */
312 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
313 COSTS_N_INSNS (40), /* HI */
314 COSTS_N_INSNS (40), /* SI */
315 COSTS_N_INSNS (40), /* DI */
316 COSTS_N_INSNS (40)}, /* other */
317 COSTS_N_INSNS (3), /* cost of movsx */
318 COSTS_N_INSNS (2), /* cost of movzx */
319 15, /* "large" insn */
321 4, /* cost for loading QImode using movzbl */
322 {2, 4, 2}, /* cost of loading integer registers
323 in QImode, HImode and SImode.
324 Relative to reg-reg move (2). */
325 {2, 4, 2}, /* cost of storing integer registers */
326 2, /* cost of reg,reg fld/fst */
327 {8, 8, 8}, /* cost of loading fp registers
328 in SFmode, DFmode and XFmode */
329 {8, 8, 8}, /* cost of storing fp registers
330 in SFmode, DFmode and XFmode */
331 2, /* cost of moving MMX register */
332 {4, 8}, /* cost of loading MMX registers
333 in SImode and DImode */
334 {4, 8}, /* cost of storing MMX registers
335 in SImode and DImode */
336 2, /* cost of moving SSE register */
337 {4, 8, 16}, /* cost of loading SSE registers
338 in SImode, DImode and TImode */
339 {4, 8, 16}, /* cost of storing SSE registers
340 in SImode, DImode and TImode */
341 3, /* MMX or SSE register to integer */
342 4, /* size of l1 cache. 486 has 8kB cache
343 shared for code and data, so 4kB is
344 not really precise. */
345 4, /* size of l2 cache */
346 0, /* size of prefetch block */
347 0, /* number of parallel prefetches */
349 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
350 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
351 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
352 COSTS_N_INSNS (3), /* cost of FABS instruction. */
353 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
354 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
357 1, /* scalar_stmt_cost. */
358 1, /* scalar load_cost. */
359 1, /* scalar_store_cost. */
360 1, /* vec_stmt_cost. */
361 1, /* vec_to_scalar_cost. */
362 1, /* scalar_to_vec_cost. */
363 1, /* vec_align_load_cost. */
364 2, /* vec_unalign_load_cost. */
365 1, /* vec_store_cost. */
366 3, /* cond_taken_branch_cost. */
367 1, /* cond_not_taken_branch_cost. */
370 static stringop_algs pentium_memcpy[2] = {
371 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
372 DUMMY_STRINGOP_ALGS};
373 static stringop_algs pentium_memset[2] = {
374 {libcall, {{-1, rep_prefix_4_byte, false}}},
375 DUMMY_STRINGOP_ALGS};
378 struct processor_costs pentium_cost = {
379 COSTS_N_INSNS (1), /* cost of an add instruction */
380 COSTS_N_INSNS (1), /* cost of a lea instruction */
381 COSTS_N_INSNS (4), /* variable shift costs */
382 COSTS_N_INSNS (1), /* constant shift costs */
383 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
384 COSTS_N_INSNS (11), /* HI */
385 COSTS_N_INSNS (11), /* SI */
386 COSTS_N_INSNS (11), /* DI */
387 COSTS_N_INSNS (11)}, /* other */
388 0, /* cost of multiply per each bit set */
389 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
390 COSTS_N_INSNS (25), /* HI */
391 COSTS_N_INSNS (25), /* SI */
392 COSTS_N_INSNS (25), /* DI */
393 COSTS_N_INSNS (25)}, /* other */
394 COSTS_N_INSNS (3), /* cost of movsx */
395 COSTS_N_INSNS (2), /* cost of movzx */
396 8, /* "large" insn */
398 6, /* cost for loading QImode using movzbl */
399 {2, 4, 2}, /* cost of loading integer registers
400 in QImode, HImode and SImode.
401 Relative to reg-reg move (2). */
402 {2, 4, 2}, /* cost of storing integer registers */
403 2, /* cost of reg,reg fld/fst */
404 {2, 2, 6}, /* cost of loading fp registers
405 in SFmode, DFmode and XFmode */
406 {4, 4, 6}, /* cost of storing fp registers
407 in SFmode, DFmode and XFmode */
408 8, /* cost of moving MMX register */
409 {8, 8}, /* cost of loading MMX registers
410 in SImode and DImode */
411 {8, 8}, /* cost of storing MMX registers
412 in SImode and DImode */
413 2, /* cost of moving SSE register */
414 {4, 8, 16}, /* cost of loading SSE registers
415 in SImode, DImode and TImode */
416 {4, 8, 16}, /* cost of storing SSE registers
417 in SImode, DImode and TImode */
418 3, /* MMX or SSE register to integer */
419 8, /* size of l1 cache. */
420 8, /* size of l2 cache */
421 0, /* size of prefetch block */
422 0, /* number of parallel prefetches */
424 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
425 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
426 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
427 COSTS_N_INSNS (1), /* cost of FABS instruction. */
428 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
429 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
432 1, /* scalar_stmt_cost. */
433 1, /* scalar load_cost. */
434 1, /* scalar_store_cost. */
435 1, /* vec_stmt_cost. */
436 1, /* vec_to_scalar_cost. */
437 1, /* scalar_to_vec_cost. */
438 1, /* vec_align_load_cost. */
439 2, /* vec_unalign_load_cost. */
440 1, /* vec_store_cost. */
441 3, /* cond_taken_branch_cost. */
442 1, /* cond_not_taken_branch_cost. */
445 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
446 (we ensure the alignment). For small blocks inline loop is still a
447 noticeable win, for bigger blocks either rep movsl or rep movsb is
448 way to go. Rep movsb has apparently more expensive startup time in CPU,
449 but after 4K the difference is down in the noise. */
450 static stringop_algs pentiumpro_memcpy[2] = {
451 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
452 {8192, rep_prefix_4_byte, false},
453 {-1, rep_prefix_1_byte, false}}},
454 DUMMY_STRINGOP_ALGS};
455 static stringop_algs pentiumpro_memset[2] = {
456 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
457 {8192, rep_prefix_4_byte, false},
458 {-1, libcall, false}}},
459 DUMMY_STRINGOP_ALGS};
461 struct processor_costs pentiumpro_cost = {
462 COSTS_N_INSNS (1), /* cost of an add instruction */
463 COSTS_N_INSNS (1), /* cost of a lea instruction */
464 COSTS_N_INSNS (1), /* variable shift costs */
465 COSTS_N_INSNS (1), /* constant shift costs */
466 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
467 COSTS_N_INSNS (4), /* HI */
468 COSTS_N_INSNS (4), /* SI */
469 COSTS_N_INSNS (4), /* DI */
470 COSTS_N_INSNS (4)}, /* other */
471 0, /* cost of multiply per each bit set */
472 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
473 COSTS_N_INSNS (17), /* HI */
474 COSTS_N_INSNS (17), /* SI */
475 COSTS_N_INSNS (17), /* DI */
476 COSTS_N_INSNS (17)}, /* other */
477 COSTS_N_INSNS (1), /* cost of movsx */
478 COSTS_N_INSNS (1), /* cost of movzx */
479 8, /* "large" insn */
481 2, /* cost for loading QImode using movzbl */
482 {4, 4, 4}, /* cost of loading integer registers
483 in QImode, HImode and SImode.
484 Relative to reg-reg move (2). */
485 {2, 2, 2}, /* cost of storing integer registers */
486 2, /* cost of reg,reg fld/fst */
487 {2, 2, 6}, /* cost of loading fp registers
488 in SFmode, DFmode and XFmode */
489 {4, 4, 6}, /* cost of storing fp registers
490 in SFmode, DFmode and XFmode */
491 2, /* cost of moving MMX register */
492 {2, 2}, /* cost of loading MMX registers
493 in SImode and DImode */
494 {2, 2}, /* cost of storing MMX registers
495 in SImode and DImode */
496 2, /* cost of moving SSE register */
497 {2, 2, 8}, /* cost of loading SSE registers
498 in SImode, DImode and TImode */
499 {2, 2, 8}, /* cost of storing SSE registers
500 in SImode, DImode and TImode */
501 3, /* MMX or SSE register to integer */
502 8, /* size of l1 cache. */
503 256, /* size of l2 cache */
504 32, /* size of prefetch block */
505 6, /* number of parallel prefetches */
507 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
508 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
509 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
510 COSTS_N_INSNS (2), /* cost of FABS instruction. */
511 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
512 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
515 1, /* scalar_stmt_cost. */
516 1, /* scalar load_cost. */
517 1, /* scalar_store_cost. */
518 1, /* vec_stmt_cost. */
519 1, /* vec_to_scalar_cost. */
520 1, /* scalar_to_vec_cost. */
521 1, /* vec_align_load_cost. */
522 2, /* vec_unalign_load_cost. */
523 1, /* vec_store_cost. */
524 3, /* cond_taken_branch_cost. */
525 1, /* cond_not_taken_branch_cost. */
528 static stringop_algs geode_memcpy[2] = {
529 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
530 DUMMY_STRINGOP_ALGS};
531 static stringop_algs geode_memset[2] = {
532 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
533 DUMMY_STRINGOP_ALGS};
535 struct processor_costs geode_cost = {
536 COSTS_N_INSNS (1), /* cost of an add instruction */
537 COSTS_N_INSNS (1), /* cost of a lea instruction */
538 COSTS_N_INSNS (2), /* variable shift costs */
539 COSTS_N_INSNS (1), /* constant shift costs */
540 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
541 COSTS_N_INSNS (4), /* HI */
542 COSTS_N_INSNS (7), /* SI */
543 COSTS_N_INSNS (7), /* DI */
544 COSTS_N_INSNS (7)}, /* other */
545 0, /* cost of multiply per each bit set */
546 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
547 COSTS_N_INSNS (23), /* HI */
548 COSTS_N_INSNS (39), /* SI */
549 COSTS_N_INSNS (39), /* DI */
550 COSTS_N_INSNS (39)}, /* other */
551 COSTS_N_INSNS (1), /* cost of movsx */
552 COSTS_N_INSNS (1), /* cost of movzx */
553 8, /* "large" insn */
555 1, /* cost for loading QImode using movzbl */
556 {1, 1, 1}, /* cost of loading integer registers
557 in QImode, HImode and SImode.
558 Relative to reg-reg move (2). */
559 {1, 1, 1}, /* cost of storing integer registers */
560 1, /* cost of reg,reg fld/fst */
561 {1, 1, 1}, /* cost of loading fp registers
562 in SFmode, DFmode and XFmode */
563 {4, 6, 6}, /* cost of storing fp registers
564 in SFmode, DFmode and XFmode */
566 2, /* cost of moving MMX register */
567 {2, 2}, /* cost of loading MMX registers
568 in SImode and DImode */
569 {2, 2}, /* cost of storing MMX registers
570 in SImode and DImode */
571 2, /* cost of moving SSE register */
572 {2, 2, 8}, /* cost of loading SSE registers
573 in SImode, DImode and TImode */
574 {2, 2, 8}, /* cost of storing SSE registers
575 in SImode, DImode and TImode */
576 3, /* MMX or SSE register to integer */
577 64, /* size of l1 cache. */
578 128, /* size of l2 cache. */
579 32, /* size of prefetch block */
580 1, /* number of parallel prefetches */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (1), /* cost of FABS instruction. */
586 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
590 1, /* scalar_stmt_cost. */
591 1, /* scalar load_cost. */
592 1, /* scalar_store_cost. */
593 1, /* vec_stmt_cost. */
594 1, /* vec_to_scalar_cost. */
595 1, /* scalar_to_vec_cost. */
596 1, /* vec_align_load_cost. */
597 2, /* vec_unalign_load_cost. */
598 1, /* vec_store_cost. */
599 3, /* cond_taken_branch_cost. */
600 1, /* cond_not_taken_branch_cost. */
603 static stringop_algs k6_memcpy[2] = {
604 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
605 DUMMY_STRINGOP_ALGS};
606 static stringop_algs k6_memset[2] = {
607 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
608 DUMMY_STRINGOP_ALGS};
610 struct processor_costs k6_cost = {
611 COSTS_N_INSNS (1), /* cost of an add instruction */
612 COSTS_N_INSNS (2), /* cost of a lea instruction */
613 COSTS_N_INSNS (1), /* variable shift costs */
614 COSTS_N_INSNS (1), /* constant shift costs */
615 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
616 COSTS_N_INSNS (3), /* HI */
617 COSTS_N_INSNS (3), /* SI */
618 COSTS_N_INSNS (3), /* DI */
619 COSTS_N_INSNS (3)}, /* other */
620 0, /* cost of multiply per each bit set */
621 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
622 COSTS_N_INSNS (18), /* HI */
623 COSTS_N_INSNS (18), /* SI */
624 COSTS_N_INSNS (18), /* DI */
625 COSTS_N_INSNS (18)}, /* other */
626 COSTS_N_INSNS (2), /* cost of movsx */
627 COSTS_N_INSNS (2), /* cost of movzx */
628 8, /* "large" insn */
630 3, /* cost for loading QImode using movzbl */
631 {4, 5, 4}, /* cost of loading integer registers
632 in QImode, HImode and SImode.
633 Relative to reg-reg move (2). */
634 {2, 3, 2}, /* cost of storing integer registers */
635 4, /* cost of reg,reg fld/fst */
636 {6, 6, 6}, /* cost of loading fp registers
637 in SFmode, DFmode and XFmode */
638 {4, 4, 4}, /* cost of storing fp registers
639 in SFmode, DFmode and XFmode */
640 2, /* cost of moving MMX register */
641 {2, 2}, /* cost of loading MMX registers
642 in SImode and DImode */
643 {2, 2}, /* cost of storing MMX registers
644 in SImode and DImode */
645 2, /* cost of moving SSE register */
646 {2, 2, 8}, /* cost of loading SSE registers
647 in SImode, DImode and TImode */
648 {2, 2, 8}, /* cost of storing SSE registers
649 in SImode, DImode and TImode */
650 6, /* MMX or SSE register to integer */
651 32, /* size of l1 cache. */
652 32, /* size of l2 cache. Some models
653 have integrated l2 cache, but
654 optimizing for k6 is not important
655 enough to worry about that. */
656 32, /* size of prefetch block */
657 1, /* number of parallel prefetches */
659 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
660 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
661 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
662 COSTS_N_INSNS (2), /* cost of FABS instruction. */
663 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
664 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
667 1, /* scalar_stmt_cost. */
668 1, /* scalar load_cost. */
669 1, /* scalar_store_cost. */
670 1, /* vec_stmt_cost. */
671 1, /* vec_to_scalar_cost. */
672 1, /* scalar_to_vec_cost. */
673 1, /* vec_align_load_cost. */
674 2, /* vec_unalign_load_cost. */
675 1, /* vec_store_cost. */
676 3, /* cond_taken_branch_cost. */
677 1, /* cond_not_taken_branch_cost. */
680 /* For some reason, Athlon deals better with REP prefix (relative to loops)
681 compared to K8. Alignment becomes important after 8 bytes for memcpy and
682 128 bytes for memset. */
683 static stringop_algs athlon_memcpy[2] = {
684 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
685 DUMMY_STRINGOP_ALGS};
686 static stringop_algs athlon_memset[2] = {
687 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
688 DUMMY_STRINGOP_ALGS};
690 struct processor_costs athlon_cost = {
691 COSTS_N_INSNS (1), /* cost of an add instruction */
692 COSTS_N_INSNS (2), /* cost of a lea instruction */
693 COSTS_N_INSNS (1), /* variable shift costs */
694 COSTS_N_INSNS (1), /* constant shift costs */
695 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
696 COSTS_N_INSNS (5), /* HI */
697 COSTS_N_INSNS (5), /* SI */
698 COSTS_N_INSNS (5), /* DI */
699 COSTS_N_INSNS (5)}, /* other */
700 0, /* cost of multiply per each bit set */
701 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
702 COSTS_N_INSNS (26), /* HI */
703 COSTS_N_INSNS (42), /* SI */
704 COSTS_N_INSNS (74), /* DI */
705 COSTS_N_INSNS (74)}, /* other */
706 COSTS_N_INSNS (1), /* cost of movsx */
707 COSTS_N_INSNS (1), /* cost of movzx */
708 8, /* "large" insn */
710 4, /* cost for loading QImode using movzbl */
711 {3, 4, 3}, /* cost of loading integer registers
712 in QImode, HImode and SImode.
713 Relative to reg-reg move (2). */
714 {3, 4, 3}, /* cost of storing integer registers */
715 4, /* cost of reg,reg fld/fst */
716 {4, 4, 12}, /* cost of loading fp registers
717 in SFmode, DFmode and XFmode */
718 {6, 6, 8}, /* cost of storing fp registers
719 in SFmode, DFmode and XFmode */
720 2, /* cost of moving MMX register */
721 {4, 4}, /* cost of loading MMX registers
722 in SImode and DImode */
723 {4, 4}, /* cost of storing MMX registers
724 in SImode and DImode */
725 2, /* cost of moving SSE register */
726 {4, 4, 6}, /* cost of loading SSE registers
727 in SImode, DImode and TImode */
728 {4, 4, 5}, /* cost of storing SSE registers
729 in SImode, DImode and TImode */
730 5, /* MMX or SSE register to integer */
731 64, /* size of l1 cache. */
732 256, /* size of l2 cache. */
733 64, /* size of prefetch block */
734 6, /* number of parallel prefetches */
736 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
737 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
738 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
739 COSTS_N_INSNS (2), /* cost of FABS instruction. */
740 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
741 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
744 1, /* scalar_stmt_cost. */
745 1, /* scalar load_cost. */
746 1, /* scalar_store_cost. */
747 1, /* vec_stmt_cost. */
748 1, /* vec_to_scalar_cost. */
749 1, /* scalar_to_vec_cost. */
750 1, /* vec_align_load_cost. */
751 2, /* vec_unalign_load_cost. */
752 1, /* vec_store_cost. */
753 3, /* cond_taken_branch_cost. */
754 1, /* cond_not_taken_branch_cost. */
757 /* K8 has optimized REP instruction for medium sized blocks, but for very
758 small blocks it is better to use loop. For large blocks, libcall can
759 do nontemporary accesses and beat inline considerably. */
760 static stringop_algs k8_memcpy[2] = {
761 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
762 {-1, rep_prefix_4_byte, false}}},
763 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
764 {-1, libcall, false}}}};
765 static stringop_algs k8_memset[2] = {
766 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
767 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
768 {libcall, {{48, unrolled_loop, false},
769 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
771 struct processor_costs k8_cost = {
772 COSTS_N_INSNS (1), /* cost of an add instruction */
773 COSTS_N_INSNS (2), /* cost of a lea instruction */
774 COSTS_N_INSNS (1), /* variable shift costs */
775 COSTS_N_INSNS (1), /* constant shift costs */
776 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
777 COSTS_N_INSNS (4), /* HI */
778 COSTS_N_INSNS (3), /* SI */
779 COSTS_N_INSNS (4), /* DI */
780 COSTS_N_INSNS (5)}, /* other */
781 0, /* cost of multiply per each bit set */
782 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
783 COSTS_N_INSNS (26), /* HI */
784 COSTS_N_INSNS (42), /* SI */
785 COSTS_N_INSNS (74), /* DI */
786 COSTS_N_INSNS (74)}, /* other */
787 COSTS_N_INSNS (1), /* cost of movsx */
788 COSTS_N_INSNS (1), /* cost of movzx */
789 8, /* "large" insn */
791 4, /* cost for loading QImode using movzbl */
792 {3, 4, 3}, /* cost of loading integer registers
793 in QImode, HImode and SImode.
794 Relative to reg-reg move (2). */
795 {3, 4, 3}, /* cost of storing integer registers */
796 4, /* cost of reg,reg fld/fst */
797 {4, 4, 12}, /* cost of loading fp registers
798 in SFmode, DFmode and XFmode */
799 {6, 6, 8}, /* cost of storing fp registers
800 in SFmode, DFmode and XFmode */
801 2, /* cost of moving MMX register */
802 {3, 3}, /* cost of loading MMX registers
803 in SImode and DImode */
804 {4, 4}, /* cost of storing MMX registers
805 in SImode and DImode */
806 2, /* cost of moving SSE register */
807 {4, 3, 6}, /* cost of loading SSE registers
808 in SImode, DImode and TImode */
809 {4, 4, 5}, /* cost of storing SSE registers
810 in SImode, DImode and TImode */
811 5, /* MMX or SSE register to integer */
812 64, /* size of l1 cache. */
813 512, /* size of l2 cache. */
814 64, /* size of prefetch block */
815 /* New AMD processors never drop prefetches; if they cannot be performed
816 immediately, they are queued. We set number of simultaneous prefetches
817 to a large constant to reflect this (it probably is not a good idea not
818 to limit number of prefetches at all, as their execution also takes some
820 100, /* number of parallel prefetches */
822 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
823 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
824 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
825 COSTS_N_INSNS (2), /* cost of FABS instruction. */
826 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
827 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
831 4, /* scalar_stmt_cost. */
832 2, /* scalar load_cost. */
833 2, /* scalar_store_cost. */
834 5, /* vec_stmt_cost. */
835 0, /* vec_to_scalar_cost. */
836 2, /* scalar_to_vec_cost. */
837 2, /* vec_align_load_cost. */
838 3, /* vec_unalign_load_cost. */
839 3, /* vec_store_cost. */
840 3, /* cond_taken_branch_cost. */
841 2, /* cond_not_taken_branch_cost. */
844 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
845 very small blocks it is better to use loop. For large blocks, libcall can
846 do nontemporary accesses and beat inline considerably. */
847 static stringop_algs amdfam10_memcpy[2] = {
848 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
849 {-1, rep_prefix_4_byte, false}}},
850 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
851 {-1, libcall, false}}}};
852 static stringop_algs amdfam10_memset[2] = {
853 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
854 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
855 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
856 {-1, libcall, false}}}};
857 struct processor_costs amdfam10_cost = {
858 COSTS_N_INSNS (1), /* cost of an add instruction */
859 COSTS_N_INSNS (2), /* cost of a lea instruction */
860 COSTS_N_INSNS (1), /* variable shift costs */
861 COSTS_N_INSNS (1), /* constant shift costs */
862 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
863 COSTS_N_INSNS (4), /* HI */
864 COSTS_N_INSNS (3), /* SI */
865 COSTS_N_INSNS (4), /* DI */
866 COSTS_N_INSNS (5)}, /* other */
867 0, /* cost of multiply per each bit set */
868 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
869 COSTS_N_INSNS (35), /* HI */
870 COSTS_N_INSNS (51), /* SI */
871 COSTS_N_INSNS (83), /* DI */
872 COSTS_N_INSNS (83)}, /* other */
873 COSTS_N_INSNS (1), /* cost of movsx */
874 COSTS_N_INSNS (1), /* cost of movzx */
875 8, /* "large" insn */
877 4, /* cost for loading QImode using movzbl */
878 {3, 4, 3}, /* cost of loading integer registers
879 in QImode, HImode and SImode.
880 Relative to reg-reg move (2). */
881 {3, 4, 3}, /* cost of storing integer registers */
882 4, /* cost of reg,reg fld/fst */
883 {4, 4, 12}, /* cost of loading fp registers
884 in SFmode, DFmode and XFmode */
885 {6, 6, 8}, /* cost of storing fp registers
886 in SFmode, DFmode and XFmode */
887 2, /* cost of moving MMX register */
888 {3, 3}, /* cost of loading MMX registers
889 in SImode and DImode */
890 {4, 4}, /* cost of storing MMX registers
891 in SImode and DImode */
892 2, /* cost of moving SSE register */
893 {4, 4, 3}, /* cost of loading SSE registers
894 in SImode, DImode and TImode */
895 {4, 4, 5}, /* cost of storing SSE registers
896 in SImode, DImode and TImode */
897 3, /* MMX or SSE register to integer */
899 MOVD reg64, xmmreg Double FSTORE 4
900 MOVD reg32, xmmreg Double FSTORE 4
902 MOVD reg64, xmmreg Double FADD 3
904 MOVD reg32, xmmreg Double FADD 3
906 64, /* size of l1 cache. */
907 512, /* size of l2 cache. */
908 64, /* size of prefetch block */
909 /* New AMD processors never drop prefetches; if they cannot be performed
910 immediately, they are queued. We set number of simultaneous prefetches
911 to a large constant to reflect this (it probably is not a good idea not
912 to limit number of prefetches at all, as their execution also takes some
914 100, /* number of parallel prefetches */
916 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
917 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
918 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
919 COSTS_N_INSNS (2), /* cost of FABS instruction. */
920 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
921 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
925 4, /* scalar_stmt_cost. */
926 2, /* scalar load_cost. */
927 2, /* scalar_store_cost. */
928 6, /* vec_stmt_cost. */
929 0, /* vec_to_scalar_cost. */
930 2, /* scalar_to_vec_cost. */
931 2, /* vec_align_load_cost. */
932 2, /* vec_unalign_load_cost. */
933 2, /* vec_store_cost. */
934 2, /* cond_taken_branch_cost. */
935 1, /* cond_not_taken_branch_cost. */
938 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
939 very small blocks it is better to use loop. For large blocks, libcall
940 can do nontemporary accesses and beat inline considerably. */
941 static stringop_algs bdver1_memcpy[2] = {
942 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
943 {-1, rep_prefix_4_byte, false}}},
944 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
945 {-1, libcall, false}}}};
946 static stringop_algs bdver1_memset[2] = {
947 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
948 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
949 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
950 {-1, libcall, false}}}};
952 const struct processor_costs bdver1_cost = {
953 COSTS_N_INSNS (1), /* cost of an add instruction */
954 COSTS_N_INSNS (1), /* cost of a lea instruction */
955 COSTS_N_INSNS (1), /* variable shift costs */
956 COSTS_N_INSNS (1), /* constant shift costs */
957 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
958 COSTS_N_INSNS (4), /* HI */
959 COSTS_N_INSNS (4), /* SI */
960 COSTS_N_INSNS (6), /* DI */
961 COSTS_N_INSNS (6)}, /* other */
962 0, /* cost of multiply per each bit set */
963 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
964 COSTS_N_INSNS (35), /* HI */
965 COSTS_N_INSNS (51), /* SI */
966 COSTS_N_INSNS (83), /* DI */
967 COSTS_N_INSNS (83)}, /* other */
968 COSTS_N_INSNS (1), /* cost of movsx */
969 COSTS_N_INSNS (1), /* cost of movzx */
970 8, /* "large" insn */
972 4, /* cost for loading QImode using movzbl */
973 {5, 5, 4}, /* cost of loading integer registers
974 in QImode, HImode and SImode.
975 Relative to reg-reg move (2). */
976 {4, 4, 4}, /* cost of storing integer registers */
977 2, /* cost of reg,reg fld/fst */
978 {5, 5, 12}, /* cost of loading fp registers
979 in SFmode, DFmode and XFmode */
980 {4, 4, 8}, /* cost of storing fp registers
981 in SFmode, DFmode and XFmode */
982 2, /* cost of moving MMX register */
983 {4, 4}, /* cost of loading MMX registers
984 in SImode and DImode */
985 {4, 4}, /* cost of storing MMX registers
986 in SImode and DImode */
987 2, /* cost of moving SSE register */
988 {4, 4, 4}, /* cost of loading SSE registers
989 in SImode, DImode and TImode */
990 {4, 4, 4}, /* cost of storing SSE registers
991 in SImode, DImode and TImode */
992 2, /* MMX or SSE register to integer */
994 MOVD reg64, xmmreg Double FSTORE 4
995 MOVD reg32, xmmreg Double FSTORE 4
997 MOVD reg64, xmmreg Double FADD 3
999 MOVD reg32, xmmreg Double FADD 3
1001 16, /* size of l1 cache. */
1002 2048, /* size of l2 cache. */
1003 64, /* size of prefetch block */
1004 /* New AMD processors never drop prefetches; if they cannot be performed
1005 immediately, they are queued. We set number of simultaneous prefetches
1006 to a large constant to reflect this (it probably is not a good idea not
1007 to limit number of prefetches at all, as their execution also takes some
1009 100, /* number of parallel prefetches */
1010 2, /* Branch cost */
1011 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1020 6, /* scalar_stmt_cost. */
1021 4, /* scalar load_cost. */
1022 4, /* scalar_store_cost. */
1023 6, /* vec_stmt_cost. */
1024 0, /* vec_to_scalar_cost. */
1025 2, /* scalar_to_vec_cost. */
1026 4, /* vec_align_load_cost. */
1027 4, /* vec_unalign_load_cost. */
1028 4, /* vec_store_cost. */
1029 2, /* cond_taken_branch_cost. */
1030 1, /* cond_not_taken_branch_cost. */
1033 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1034 very small blocks it is better to use loop. For large blocks, libcall
1035 can do nontemporary accesses and beat inline considerably. */
1037 static stringop_algs bdver2_memcpy[2] = {
1038 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1039 {-1, rep_prefix_4_byte, false}}},
1040 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1041 {-1, libcall, false}}}};
1042 static stringop_algs bdver2_memset[2] = {
1043 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1044 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1045 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1046 {-1, libcall, false}}}};
1048 const struct processor_costs bdver2_cost = {
1049 COSTS_N_INSNS (1), /* cost of an add instruction */
1050 COSTS_N_INSNS (1), /* cost of a lea instruction */
1051 COSTS_N_INSNS (1), /* variable shift costs */
1052 COSTS_N_INSNS (1), /* constant shift costs */
1053 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1054 COSTS_N_INSNS (4), /* HI */
1055 COSTS_N_INSNS (4), /* SI */
1056 COSTS_N_INSNS (6), /* DI */
1057 COSTS_N_INSNS (6)}, /* other */
1058 0, /* cost of multiply per each bit set */
1059 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1060 COSTS_N_INSNS (35), /* HI */
1061 COSTS_N_INSNS (51), /* SI */
1062 COSTS_N_INSNS (83), /* DI */
1063 COSTS_N_INSNS (83)}, /* other */
1064 COSTS_N_INSNS (1), /* cost of movsx */
1065 COSTS_N_INSNS (1), /* cost of movzx */
1066 8, /* "large" insn */
1068 4, /* cost for loading QImode using movzbl */
1069 {5, 5, 4}, /* cost of loading integer registers
1070 in QImode, HImode and SImode.
1071 Relative to reg-reg move (2). */
1072 {4, 4, 4}, /* cost of storing integer registers */
1073 2, /* cost of reg,reg fld/fst */
1074 {5, 5, 12}, /* cost of loading fp registers
1075 in SFmode, DFmode and XFmode */
1076 {4, 4, 8}, /* cost of storing fp registers
1077 in SFmode, DFmode and XFmode */
1078 2, /* cost of moving MMX register */
1079 {4, 4}, /* cost of loading MMX registers
1080 in SImode and DImode */
1081 {4, 4}, /* cost of storing MMX registers
1082 in SImode and DImode */
1083 2, /* cost of moving SSE register */
1084 {4, 4, 4}, /* cost of loading SSE registers
1085 in SImode, DImode and TImode */
1086 {4, 4, 4}, /* cost of storing SSE registers
1087 in SImode, DImode and TImode */
1088 2, /* MMX or SSE register to integer */
1090 MOVD reg64, xmmreg Double FSTORE 4
1091 MOVD reg32, xmmreg Double FSTORE 4
1093 MOVD reg64, xmmreg Double FADD 3
1095 MOVD reg32, xmmreg Double FADD 3
1097 16, /* size of l1 cache. */
1098 2048, /* size of l2 cache. */
1099 64, /* size of prefetch block */
1100 /* New AMD processors never drop prefetches; if they cannot be performed
1101 immediately, they are queued. We set number of simultaneous prefetches
1102 to a large constant to reflect this (it probably is not a good idea not
1103 to limit number of prefetches at all, as their execution also takes some
1105 100, /* number of parallel prefetches */
1106 2, /* Branch cost */
1107 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1108 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1109 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1110 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1111 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1112 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1116 6, /* scalar_stmt_cost. */
1117 4, /* scalar load_cost. */
1118 4, /* scalar_store_cost. */
1119 6, /* vec_stmt_cost. */
1120 0, /* vec_to_scalar_cost. */
1121 2, /* scalar_to_vec_cost. */
1122 4, /* vec_align_load_cost. */
1123 4, /* vec_unalign_load_cost. */
1124 4, /* vec_store_cost. */
1125 2, /* cond_taken_branch_cost. */
1126 1, /* cond_not_taken_branch_cost. */
1130 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1131 very small blocks it is better to use loop. For large blocks, libcall
1132 can do nontemporary accesses and beat inline considerably. */
1133 static stringop_algs bdver3_memcpy[2] = {
1134 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1135 {-1, rep_prefix_4_byte, false}}},
1136 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1137 {-1, libcall, false}}}};
1138 static stringop_algs bdver3_memset[2] = {
1139 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1140 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1141 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1142 {-1, libcall, false}}}};
1143 struct processor_costs bdver3_cost = {
1144 COSTS_N_INSNS (1), /* cost of an add instruction */
1145 COSTS_N_INSNS (1), /* cost of a lea instruction */
1146 COSTS_N_INSNS (1), /* variable shift costs */
1147 COSTS_N_INSNS (1), /* constant shift costs */
1148 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1149 COSTS_N_INSNS (4), /* HI */
1150 COSTS_N_INSNS (4), /* SI */
1151 COSTS_N_INSNS (6), /* DI */
1152 COSTS_N_INSNS (6)}, /* other */
1153 0, /* cost of multiply per each bit set */
1154 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1155 COSTS_N_INSNS (35), /* HI */
1156 COSTS_N_INSNS (51), /* SI */
1157 COSTS_N_INSNS (83), /* DI */
1158 COSTS_N_INSNS (83)}, /* other */
1159 COSTS_N_INSNS (1), /* cost of movsx */
1160 COSTS_N_INSNS (1), /* cost of movzx */
1161 8, /* "large" insn */
1163 4, /* cost for loading QImode using movzbl */
1164 {5, 5, 4}, /* cost of loading integer registers
1165 in QImode, HImode and SImode.
1166 Relative to reg-reg move (2). */
1167 {4, 4, 4}, /* cost of storing integer registers */
1168 2, /* cost of reg,reg fld/fst */
1169 {5, 5, 12}, /* cost of loading fp registers
1170 in SFmode, DFmode and XFmode */
1171 {4, 4, 8}, /* cost of storing fp registers
1172 in SFmode, DFmode and XFmode */
1173 2, /* cost of moving MMX register */
1174 {4, 4}, /* cost of loading MMX registers
1175 in SImode and DImode */
1176 {4, 4}, /* cost of storing MMX registers
1177 in SImode and DImode */
1178 2, /* cost of moving SSE register */
1179 {4, 4, 4}, /* cost of loading SSE registers
1180 in SImode, DImode and TImode */
1181 {4, 4, 4}, /* cost of storing SSE registers
1182 in SImode, DImode and TImode */
1183 2, /* MMX or SSE register to integer */
1184 16, /* size of l1 cache. */
1185 2048, /* size of l2 cache. */
1186 64, /* size of prefetch block */
1187 /* New AMD processors never drop prefetches; if they cannot be performed
1188 immediately, they are queued. We set number of simultaneous prefetches
1189 to a large constant to reflect this (it probably is not a good idea not
1190 to limit number of prefetches at all, as their execution also takes some
1192 100, /* number of parallel prefetches */
1193 2, /* Branch cost */
1194 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1195 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1196 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1197 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1198 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1199 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1203 6, /* scalar_stmt_cost. */
1204 4, /* scalar load_cost. */
1205 4, /* scalar_store_cost. */
1206 6, /* vec_stmt_cost. */
1207 0, /* vec_to_scalar_cost. */
1208 2, /* scalar_to_vec_cost. */
1209 4, /* vec_align_load_cost. */
1210 4, /* vec_unalign_load_cost. */
1211 4, /* vec_store_cost. */
1212 2, /* cond_taken_branch_cost. */
1213 1, /* cond_not_taken_branch_cost. */
1216 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1217 very small blocks it is better to use loop. For large blocks, libcall
1218 can do nontemporary accesses and beat inline considerably. */
1219 static stringop_algs bdver4_memcpy[2] = {
1220 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1221 {-1, rep_prefix_4_byte, false}}},
1222 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1223 {-1, libcall, false}}}};
1224 static stringop_algs bdver4_memset[2] = {
1225 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1226 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1227 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1228 {-1, libcall, false}}}};
1229 struct processor_costs bdver4_cost = {
1230 COSTS_N_INSNS (1), /* cost of an add instruction */
1231 COSTS_N_INSNS (1), /* cost of a lea instruction */
1232 COSTS_N_INSNS (1), /* variable shift costs */
1233 COSTS_N_INSNS (1), /* constant shift costs */
1234 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1235 COSTS_N_INSNS (4), /* HI */
1236 COSTS_N_INSNS (4), /* SI */
1237 COSTS_N_INSNS (6), /* DI */
1238 COSTS_N_INSNS (6)}, /* other */
1239 0, /* cost of multiply per each bit set */
1240 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1241 COSTS_N_INSNS (35), /* HI */
1242 COSTS_N_INSNS (51), /* SI */
1243 COSTS_N_INSNS (83), /* DI */
1244 COSTS_N_INSNS (83)}, /* other */
1245 COSTS_N_INSNS (1), /* cost of movsx */
1246 COSTS_N_INSNS (1), /* cost of movzx */
1247 8, /* "large" insn */
1249 4, /* cost for loading QImode using movzbl */
1250 {5, 5, 4}, /* cost of loading integer registers
1251 in QImode, HImode and SImode.
1252 Relative to reg-reg move (2). */
1253 {4, 4, 4}, /* cost of storing integer registers */
1254 2, /* cost of reg,reg fld/fst */
1255 {5, 5, 12}, /* cost of loading fp registers
1256 in SFmode, DFmode and XFmode */
1257 {4, 4, 8}, /* cost of storing fp registers
1258 in SFmode, DFmode and XFmode */
1259 2, /* cost of moving MMX register */
1260 {4, 4}, /* cost of loading MMX registers
1261 in SImode and DImode */
1262 {4, 4}, /* cost of storing MMX registers
1263 in SImode and DImode */
1264 2, /* cost of moving SSE register */
1265 {4, 4, 4}, /* cost of loading SSE registers
1266 in SImode, DImode and TImode */
1267 {4, 4, 4}, /* cost of storing SSE registers
1268 in SImode, DImode and TImode */
1269 2, /* MMX or SSE register to integer */
1270 16, /* size of l1 cache. */
1271 2048, /* size of l2 cache. */
1272 64, /* size of prefetch block */
1273 /* New AMD processors never drop prefetches; if they cannot be performed
1274 immediately, they are queued. We set number of simultaneous prefetches
1275 to a large constant to reflect this (it probably is not a good idea not
1276 to limit number of prefetches at all, as their execution also takes some
1278 100, /* number of parallel prefetches */
1279 2, /* Branch cost */
1280 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1281 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1282 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1283 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1284 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1285 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1289 6, /* scalar_stmt_cost. */
1290 4, /* scalar load_cost. */
1291 4, /* scalar_store_cost. */
1292 6, /* vec_stmt_cost. */
1293 0, /* vec_to_scalar_cost. */
1294 2, /* scalar_to_vec_cost. */
1295 4, /* vec_align_load_cost. */
1296 4, /* vec_unalign_load_cost. */
1297 4, /* vec_store_cost. */
1298 2, /* cond_taken_branch_cost. */
1299 1, /* cond_not_taken_branch_cost. */
1302 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1303 very small blocks it is better to use loop. For large blocks, libcall can
1304 do nontemporary accesses and beat inline considerably. */
1305 static stringop_algs btver1_memcpy[2] = {
1306 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1307 {-1, rep_prefix_4_byte, false}}},
1308 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1309 {-1, libcall, false}}}};
1310 static stringop_algs btver1_memset[2] = {
1311 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1312 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1313 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1314 {-1, libcall, false}}}};
1315 const struct processor_costs btver1_cost = {
1316 COSTS_N_INSNS (1), /* cost of an add instruction */
1317 COSTS_N_INSNS (2), /* cost of a lea instruction */
1318 COSTS_N_INSNS (1), /* variable shift costs */
1319 COSTS_N_INSNS (1), /* constant shift costs */
1320 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1321 COSTS_N_INSNS (4), /* HI */
1322 COSTS_N_INSNS (3), /* SI */
1323 COSTS_N_INSNS (4), /* DI */
1324 COSTS_N_INSNS (5)}, /* other */
1325 0, /* cost of multiply per each bit set */
1326 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1327 COSTS_N_INSNS (35), /* HI */
1328 COSTS_N_INSNS (51), /* SI */
1329 COSTS_N_INSNS (83), /* DI */
1330 COSTS_N_INSNS (83)}, /* other */
1331 COSTS_N_INSNS (1), /* cost of movsx */
1332 COSTS_N_INSNS (1), /* cost of movzx */
1333 8, /* "large" insn */
1335 4, /* cost for loading QImode using movzbl */
1336 {3, 4, 3}, /* cost of loading integer registers
1337 in QImode, HImode and SImode.
1338 Relative to reg-reg move (2). */
1339 {3, 4, 3}, /* cost of storing integer registers */
1340 4, /* cost of reg,reg fld/fst */
1341 {4, 4, 12}, /* cost of loading fp registers
1342 in SFmode, DFmode and XFmode */
1343 {6, 6, 8}, /* cost of storing fp registers
1344 in SFmode, DFmode and XFmode */
1345 2, /* cost of moving MMX register */
1346 {3, 3}, /* cost of loading MMX registers
1347 in SImode and DImode */
1348 {4, 4}, /* cost of storing MMX registers
1349 in SImode and DImode */
1350 2, /* cost of moving SSE register */
1351 {4, 4, 3}, /* cost of loading SSE registers
1352 in SImode, DImode and TImode */
1353 {4, 4, 5}, /* cost of storing SSE registers
1354 in SImode, DImode and TImode */
1355 3, /* MMX or SSE register to integer */
1357 MOVD reg64, xmmreg Double FSTORE 4
1358 MOVD reg32, xmmreg Double FSTORE 4
1360 MOVD reg64, xmmreg Double FADD 3
1362 MOVD reg32, xmmreg Double FADD 3
1364 32, /* size of l1 cache. */
1365 512, /* size of l2 cache. */
1366 64, /* size of prefetch block */
1367 100, /* number of parallel prefetches */
1368 2, /* Branch cost */
1369 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1370 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1371 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1372 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1373 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1374 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1378 4, /* scalar_stmt_cost. */
1379 2, /* scalar load_cost. */
1380 2, /* scalar_store_cost. */
1381 6, /* vec_stmt_cost. */
1382 0, /* vec_to_scalar_cost. */
1383 2, /* scalar_to_vec_cost. */
1384 2, /* vec_align_load_cost. */
1385 2, /* vec_unalign_load_cost. */
1386 2, /* vec_store_cost. */
1387 2, /* cond_taken_branch_cost. */
1388 1, /* cond_not_taken_branch_cost. */
1391 static stringop_algs btver2_memcpy[2] = {
1392 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1393 {-1, rep_prefix_4_byte, false}}},
1394 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1395 {-1, libcall, false}}}};
1396 static stringop_algs btver2_memset[2] = {
1397 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1398 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1399 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1400 {-1, libcall, false}}}};
1401 const struct processor_costs btver2_cost = {
1402 COSTS_N_INSNS (1), /* cost of an add instruction */
1403 COSTS_N_INSNS (2), /* cost of a lea instruction */
1404 COSTS_N_INSNS (1), /* variable shift costs */
1405 COSTS_N_INSNS (1), /* constant shift costs */
1406 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1407 COSTS_N_INSNS (4), /* HI */
1408 COSTS_N_INSNS (3), /* SI */
1409 COSTS_N_INSNS (4), /* DI */
1410 COSTS_N_INSNS (5)}, /* other */
1411 0, /* cost of multiply per each bit set */
1412 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1413 COSTS_N_INSNS (35), /* HI */
1414 COSTS_N_INSNS (51), /* SI */
1415 COSTS_N_INSNS (83), /* DI */
1416 COSTS_N_INSNS (83)}, /* other */
1417 COSTS_N_INSNS (1), /* cost of movsx */
1418 COSTS_N_INSNS (1), /* cost of movzx */
1419 8, /* "large" insn */
1421 4, /* cost for loading QImode using movzbl */
1422 {3, 4, 3}, /* cost of loading integer registers
1423 in QImode, HImode and SImode.
1424 Relative to reg-reg move (2). */
1425 {3, 4, 3}, /* cost of storing integer registers */
1426 4, /* cost of reg,reg fld/fst */
1427 {4, 4, 12}, /* cost of loading fp registers
1428 in SFmode, DFmode and XFmode */
1429 {6, 6, 8}, /* cost of storing fp registers
1430 in SFmode, DFmode and XFmode */
1431 2, /* cost of moving MMX register */
1432 {3, 3}, /* cost of loading MMX registers
1433 in SImode and DImode */
1434 {4, 4}, /* cost of storing MMX registers
1435 in SImode and DImode */
1436 2, /* cost of moving SSE register */
1437 {4, 4, 3}, /* cost of loading SSE registers
1438 in SImode, DImode and TImode */
1439 {4, 4, 5}, /* cost of storing SSE registers
1440 in SImode, DImode and TImode */
1441 3, /* MMX or SSE register to integer */
1443 MOVD reg64, xmmreg Double FSTORE 4
1444 MOVD reg32, xmmreg Double FSTORE 4
1446 MOVD reg64, xmmreg Double FADD 3
1448 MOVD reg32, xmmreg Double FADD 3
1450 32, /* size of l1 cache. */
1451 2048, /* size of l2 cache. */
1452 64, /* size of prefetch block */
1453 100, /* number of parallel prefetches */
1454 2, /* Branch cost */
1455 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1456 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1457 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1458 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1459 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1460 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1463 4, /* scalar_stmt_cost. */
1464 2, /* scalar load_cost. */
1465 2, /* scalar_store_cost. */
1466 6, /* vec_stmt_cost. */
1467 0, /* vec_to_scalar_cost. */
1468 2, /* scalar_to_vec_cost. */
1469 2, /* vec_align_load_cost. */
1470 2, /* vec_unalign_load_cost. */
1471 2, /* vec_store_cost. */
1472 2, /* cond_taken_branch_cost. */
1473 1, /* cond_not_taken_branch_cost. */
1476 static stringop_algs pentium4_memcpy[2] = {
1477 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1478 DUMMY_STRINGOP_ALGS};
1479 static stringop_algs pentium4_memset[2] = {
1480 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1481 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1482 DUMMY_STRINGOP_ALGS};
1485 struct processor_costs pentium4_cost = {
1486 COSTS_N_INSNS (1), /* cost of an add instruction */
1487 COSTS_N_INSNS (3), /* cost of a lea instruction */
1488 COSTS_N_INSNS (4), /* variable shift costs */
1489 COSTS_N_INSNS (4), /* constant shift costs */
1490 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1491 COSTS_N_INSNS (15), /* HI */
1492 COSTS_N_INSNS (15), /* SI */
1493 COSTS_N_INSNS (15), /* DI */
1494 COSTS_N_INSNS (15)}, /* other */
1495 0, /* cost of multiply per each bit set */
1496 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1497 COSTS_N_INSNS (56), /* HI */
1498 COSTS_N_INSNS (56), /* SI */
1499 COSTS_N_INSNS (56), /* DI */
1500 COSTS_N_INSNS (56)}, /* other */
1501 COSTS_N_INSNS (1), /* cost of movsx */
1502 COSTS_N_INSNS (1), /* cost of movzx */
1503 16, /* "large" insn */
1505 2, /* cost for loading QImode using movzbl */
1506 {4, 5, 4}, /* cost of loading integer registers
1507 in QImode, HImode and SImode.
1508 Relative to reg-reg move (2). */
1509 {2, 3, 2}, /* cost of storing integer registers */
1510 2, /* cost of reg,reg fld/fst */
1511 {2, 2, 6}, /* cost of loading fp registers
1512 in SFmode, DFmode and XFmode */
1513 {4, 4, 6}, /* cost of storing fp registers
1514 in SFmode, DFmode and XFmode */
1515 2, /* cost of moving MMX register */
1516 {2, 2}, /* cost of loading MMX registers
1517 in SImode and DImode */
1518 {2, 2}, /* cost of storing MMX registers
1519 in SImode and DImode */
1520 12, /* cost of moving SSE register */
1521 {12, 12, 12}, /* cost of loading SSE registers
1522 in SImode, DImode and TImode */
1523 {2, 2, 8}, /* cost of storing SSE registers
1524 in SImode, DImode and TImode */
1525 10, /* MMX or SSE register to integer */
1526 8, /* size of l1 cache. */
1527 256, /* size of l2 cache. */
1528 64, /* size of prefetch block */
1529 6, /* number of parallel prefetches */
1530 2, /* Branch cost */
1531 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1532 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1533 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1534 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1535 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1536 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1539 1, /* scalar_stmt_cost. */
1540 1, /* scalar load_cost. */
1541 1, /* scalar_store_cost. */
1542 1, /* vec_stmt_cost. */
1543 1, /* vec_to_scalar_cost. */
1544 1, /* scalar_to_vec_cost. */
1545 1, /* vec_align_load_cost. */
1546 2, /* vec_unalign_load_cost. */
1547 1, /* vec_store_cost. */
1548 3, /* cond_taken_branch_cost. */
1549 1, /* cond_not_taken_branch_cost. */
1552 static stringop_algs nocona_memcpy[2] = {
1553 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1554 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1555 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1557 static stringop_algs nocona_memset[2] = {
1558 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1559 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1560 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1561 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1564 struct processor_costs nocona_cost = {
1565 COSTS_N_INSNS (1), /* cost of an add instruction */
1566 COSTS_N_INSNS (1), /* cost of a lea instruction */
1567 COSTS_N_INSNS (1), /* variable shift costs */
1568 COSTS_N_INSNS (1), /* constant shift costs */
1569 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1570 COSTS_N_INSNS (10), /* HI */
1571 COSTS_N_INSNS (10), /* SI */
1572 COSTS_N_INSNS (10), /* DI */
1573 COSTS_N_INSNS (10)}, /* other */
1574 0, /* cost of multiply per each bit set */
1575 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1576 COSTS_N_INSNS (66), /* HI */
1577 COSTS_N_INSNS (66), /* SI */
1578 COSTS_N_INSNS (66), /* DI */
1579 COSTS_N_INSNS (66)}, /* other */
1580 COSTS_N_INSNS (1), /* cost of movsx */
1581 COSTS_N_INSNS (1), /* cost of movzx */
1582 16, /* "large" insn */
1583 17, /* MOVE_RATIO */
1584 4, /* cost for loading QImode using movzbl */
1585 {4, 4, 4}, /* cost of loading integer registers
1586 in QImode, HImode and SImode.
1587 Relative to reg-reg move (2). */
1588 {4, 4, 4}, /* cost of storing integer registers */
1589 3, /* cost of reg,reg fld/fst */
1590 {12, 12, 12}, /* cost of loading fp registers
1591 in SFmode, DFmode and XFmode */
1592 {4, 4, 4}, /* cost of storing fp registers
1593 in SFmode, DFmode and XFmode */
1594 6, /* cost of moving MMX register */
1595 {12, 12}, /* cost of loading MMX registers
1596 in SImode and DImode */
1597 {12, 12}, /* cost of storing MMX registers
1598 in SImode and DImode */
1599 6, /* cost of moving SSE register */
1600 {12, 12, 12}, /* cost of loading SSE registers
1601 in SImode, DImode and TImode */
1602 {12, 12, 12}, /* cost of storing SSE registers
1603 in SImode, DImode and TImode */
1604 8, /* MMX or SSE register to integer */
1605 8, /* size of l1 cache. */
1606 1024, /* size of l2 cache. */
1607 64, /* size of prefetch block */
1608 8, /* number of parallel prefetches */
1609 1, /* Branch cost */
1610 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1611 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1612 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1613 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1614 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1615 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1618 1, /* scalar_stmt_cost. */
1619 1, /* scalar load_cost. */
1620 1, /* scalar_store_cost. */
1621 1, /* vec_stmt_cost. */
1622 1, /* vec_to_scalar_cost. */
1623 1, /* scalar_to_vec_cost. */
1624 1, /* vec_align_load_cost. */
1625 2, /* vec_unalign_load_cost. */
1626 1, /* vec_store_cost. */
1627 3, /* cond_taken_branch_cost. */
1628 1, /* cond_not_taken_branch_cost. */
1631 static stringop_algs atom_memcpy[2] = {
1632 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1633 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1634 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1635 static stringop_algs atom_memset[2] = {
1636 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1637 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1638 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1639 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1641 struct processor_costs atom_cost = {
1642 COSTS_N_INSNS (1), /* cost of an add instruction */
1643 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1644 COSTS_N_INSNS (1), /* variable shift costs */
1645 COSTS_N_INSNS (1), /* constant shift costs */
1646 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1647 COSTS_N_INSNS (4), /* HI */
1648 COSTS_N_INSNS (3), /* SI */
1649 COSTS_N_INSNS (4), /* DI */
1650 COSTS_N_INSNS (2)}, /* other */
1651 0, /* cost of multiply per each bit set */
1652 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1653 COSTS_N_INSNS (26), /* HI */
1654 COSTS_N_INSNS (42), /* SI */
1655 COSTS_N_INSNS (74), /* DI */
1656 COSTS_N_INSNS (74)}, /* other */
1657 COSTS_N_INSNS (1), /* cost of movsx */
1658 COSTS_N_INSNS (1), /* cost of movzx */
1659 8, /* "large" insn */
1660 17, /* MOVE_RATIO */
1661 4, /* cost for loading QImode using movzbl */
1662 {4, 4, 4}, /* cost of loading integer registers
1663 in QImode, HImode and SImode.
1664 Relative to reg-reg move (2). */
1665 {4, 4, 4}, /* cost of storing integer registers */
1666 4, /* cost of reg,reg fld/fst */
1667 {12, 12, 12}, /* cost of loading fp registers
1668 in SFmode, DFmode and XFmode */
1669 {6, 6, 8}, /* cost of storing fp registers
1670 in SFmode, DFmode and XFmode */
1671 2, /* cost of moving MMX register */
1672 {8, 8}, /* cost of loading MMX registers
1673 in SImode and DImode */
1674 {8, 8}, /* cost of storing MMX registers
1675 in SImode and DImode */
1676 2, /* cost of moving SSE register */
1677 {8, 8, 8}, /* cost of loading SSE registers
1678 in SImode, DImode and TImode */
1679 {8, 8, 8}, /* cost of storing SSE registers
1680 in SImode, DImode and TImode */
1681 5, /* MMX or SSE register to integer */
1682 32, /* size of l1 cache. */
1683 256, /* size of l2 cache. */
1684 64, /* size of prefetch block */
1685 6, /* number of parallel prefetches */
1686 3, /* Branch cost */
1687 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1688 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1689 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1690 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1691 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1692 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1695 1, /* scalar_stmt_cost. */
1696 1, /* scalar load_cost. */
1697 1, /* scalar_store_cost. */
1698 1, /* vec_stmt_cost. */
1699 1, /* vec_to_scalar_cost. */
1700 1, /* scalar_to_vec_cost. */
1701 1, /* vec_align_load_cost. */
1702 2, /* vec_unalign_load_cost. */
1703 1, /* vec_store_cost. */
1704 3, /* cond_taken_branch_cost. */
1705 1, /* cond_not_taken_branch_cost. */
1708 static stringop_algs slm_memcpy[2] = {
1709 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1710 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1711 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1712 static stringop_algs slm_memset[2] = {
1713 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1714 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1715 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1716 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1718 struct processor_costs slm_cost = {
1719 COSTS_N_INSNS (1), /* cost of an add instruction */
1720 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1721 COSTS_N_INSNS (1), /* variable shift costs */
1722 COSTS_N_INSNS (1), /* constant shift costs */
1723 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1724 COSTS_N_INSNS (3), /* HI */
1725 COSTS_N_INSNS (3), /* SI */
1726 COSTS_N_INSNS (4), /* DI */
1727 COSTS_N_INSNS (2)}, /* other */
1728 0, /* cost of multiply per each bit set */
1729 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1730 COSTS_N_INSNS (26), /* HI */
1731 COSTS_N_INSNS (42), /* SI */
1732 COSTS_N_INSNS (74), /* DI */
1733 COSTS_N_INSNS (74)}, /* other */
1734 COSTS_N_INSNS (1), /* cost of movsx */
1735 COSTS_N_INSNS (1), /* cost of movzx */
1736 8, /* "large" insn */
1737 17, /* MOVE_RATIO */
1738 4, /* cost for loading QImode using movzbl */
1739 {4, 4, 4}, /* cost of loading integer registers
1740 in QImode, HImode and SImode.
1741 Relative to reg-reg move (2). */
1742 {4, 4, 4}, /* cost of storing integer registers */
1743 4, /* cost of reg,reg fld/fst */
1744 {12, 12, 12}, /* cost of loading fp registers
1745 in SFmode, DFmode and XFmode */
1746 {6, 6, 8}, /* cost of storing fp registers
1747 in SFmode, DFmode and XFmode */
1748 2, /* cost of moving MMX register */
1749 {8, 8}, /* cost of loading MMX registers
1750 in SImode and DImode */
1751 {8, 8}, /* cost of storing MMX registers
1752 in SImode and DImode */
1753 2, /* cost of moving SSE register */
1754 {8, 8, 8}, /* cost of loading SSE registers
1755 in SImode, DImode and TImode */
1756 {8, 8, 8}, /* cost of storing SSE registers
1757 in SImode, DImode and TImode */
1758 5, /* MMX or SSE register to integer */
1759 32, /* size of l1 cache. */
1760 256, /* size of l2 cache. */
1761 64, /* size of prefetch block */
1762 6, /* number of parallel prefetches */
1763 3, /* Branch cost */
1764 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1765 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1766 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1767 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1768 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1769 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1772 1, /* scalar_stmt_cost. */
1773 1, /* scalar load_cost. */
1774 1, /* scalar_store_cost. */
1775 1, /* vec_stmt_cost. */
1776 4, /* vec_to_scalar_cost. */
1777 1, /* scalar_to_vec_cost. */
1778 1, /* vec_align_load_cost. */
1779 2, /* vec_unalign_load_cost. */
1780 1, /* vec_store_cost. */
1781 3, /* cond_taken_branch_cost. */
1782 1, /* cond_not_taken_branch_cost. */
1785 static stringop_algs intel_memcpy[2] = {
1786 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1787 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1788 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1789 static stringop_algs intel_memset[2] = {
1790 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1791 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1792 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1793 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1795 struct processor_costs intel_cost = {
1796 COSTS_N_INSNS (1), /* cost of an add instruction */
1797 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1798 COSTS_N_INSNS (1), /* variable shift costs */
1799 COSTS_N_INSNS (1), /* constant shift costs */
1800 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1801 COSTS_N_INSNS (3), /* HI */
1802 COSTS_N_INSNS (3), /* SI */
1803 COSTS_N_INSNS (4), /* DI */
1804 COSTS_N_INSNS (2)}, /* other */
1805 0, /* cost of multiply per each bit set */
1806 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1807 COSTS_N_INSNS (26), /* HI */
1808 COSTS_N_INSNS (42), /* SI */
1809 COSTS_N_INSNS (74), /* DI */
1810 COSTS_N_INSNS (74)}, /* other */
1811 COSTS_N_INSNS (1), /* cost of movsx */
1812 COSTS_N_INSNS (1), /* cost of movzx */
1813 8, /* "large" insn */
1814 17, /* MOVE_RATIO */
1815 4, /* cost for loading QImode using movzbl */
1816 {4, 4, 4}, /* cost of loading integer registers
1817 in QImode, HImode and SImode.
1818 Relative to reg-reg move (2). */
1819 {4, 4, 4}, /* cost of storing integer registers */
1820 4, /* cost of reg,reg fld/fst */
1821 {12, 12, 12}, /* cost of loading fp registers
1822 in SFmode, DFmode and XFmode */
1823 {6, 6, 8}, /* cost of storing fp registers
1824 in SFmode, DFmode and XFmode */
1825 2, /* cost of moving MMX register */
1826 {8, 8}, /* cost of loading MMX registers
1827 in SImode and DImode */
1828 {8, 8}, /* cost of storing MMX registers
1829 in SImode and DImode */
1830 2, /* cost of moving SSE register */
1831 {8, 8, 8}, /* cost of loading SSE registers
1832 in SImode, DImode and TImode */
1833 {8, 8, 8}, /* cost of storing SSE registers
1834 in SImode, DImode and TImode */
1835 5, /* MMX or SSE register to integer */
1836 32, /* size of l1 cache. */
1837 256, /* size of l2 cache. */
1838 64, /* size of prefetch block */
1839 6, /* number of parallel prefetches */
1840 3, /* Branch cost */
1841 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1842 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1843 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1844 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1845 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1846 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1849 1, /* scalar_stmt_cost. */
1850 1, /* scalar load_cost. */
1851 1, /* scalar_store_cost. */
1852 1, /* vec_stmt_cost. */
1853 4, /* vec_to_scalar_cost. */
1854 1, /* scalar_to_vec_cost. */
1855 1, /* vec_align_load_cost. */
1856 2, /* vec_unalign_load_cost. */
1857 1, /* vec_store_cost. */
1858 3, /* cond_taken_branch_cost. */
1859 1, /* cond_not_taken_branch_cost. */
1862 /* Generic should produce code tuned for Core-i7 (and newer chips)
1863 and btver1 (and newer chips). */
1865 static stringop_algs generic_memcpy[2] = {
1866 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1867 {-1, libcall, false}}},
1868 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1869 {-1, libcall, false}}}};
1870 static stringop_algs generic_memset[2] = {
1871 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1872 {-1, libcall, false}}},
1873 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1874 {-1, libcall, false}}}};
1876 struct processor_costs generic_cost = {
1877 COSTS_N_INSNS (1), /* cost of an add instruction */
1878 /* On all chips taken into consideration lea is 2 cycles and more. With
1879 this cost however our current implementation of synth_mult results in
1880 use of unnecessary temporary registers causing regression on several
1881 SPECfp benchmarks. */
1882 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1883 COSTS_N_INSNS (1), /* variable shift costs */
1884 COSTS_N_INSNS (1), /* constant shift costs */
1885 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1886 COSTS_N_INSNS (4), /* HI */
1887 COSTS_N_INSNS (3), /* SI */
1888 COSTS_N_INSNS (4), /* DI */
1889 COSTS_N_INSNS (2)}, /* other */
1890 0, /* cost of multiply per each bit set */
1891 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1892 COSTS_N_INSNS (26), /* HI */
1893 COSTS_N_INSNS (42), /* SI */
1894 COSTS_N_INSNS (74), /* DI */
1895 COSTS_N_INSNS (74)}, /* other */
1896 COSTS_N_INSNS (1), /* cost of movsx */
1897 COSTS_N_INSNS (1), /* cost of movzx */
1898 8, /* "large" insn */
1899 17, /* MOVE_RATIO */
1900 4, /* cost for loading QImode using movzbl */
1901 {4, 4, 4}, /* cost of loading integer registers
1902 in QImode, HImode and SImode.
1903 Relative to reg-reg move (2). */
1904 {4, 4, 4}, /* cost of storing integer registers */
1905 4, /* cost of reg,reg fld/fst */
1906 {12, 12, 12}, /* cost of loading fp registers
1907 in SFmode, DFmode and XFmode */
1908 {6, 6, 8}, /* cost of storing fp registers
1909 in SFmode, DFmode and XFmode */
1910 2, /* cost of moving MMX register */
1911 {8, 8}, /* cost of loading MMX registers
1912 in SImode and DImode */
1913 {8, 8}, /* cost of storing MMX registers
1914 in SImode and DImode */
1915 2, /* cost of moving SSE register */
1916 {8, 8, 8}, /* cost of loading SSE registers
1917 in SImode, DImode and TImode */
1918 {8, 8, 8}, /* cost of storing SSE registers
1919 in SImode, DImode and TImode */
1920 5, /* MMX or SSE register to integer */
1921 32, /* size of l1 cache. */
1922 512, /* size of l2 cache. */
1923 64, /* size of prefetch block */
1924 6, /* number of parallel prefetches */
1925 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1926 value is increased to perhaps more appropriate value of 5. */
1927 3, /* Branch cost */
1928 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1929 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1930 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1931 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1932 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1933 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1936 1, /* scalar_stmt_cost. */
1937 1, /* scalar load_cost. */
1938 1, /* scalar_store_cost. */
1939 1, /* vec_stmt_cost. */
1940 1, /* vec_to_scalar_cost. */
1941 1, /* scalar_to_vec_cost. */
1942 1, /* vec_align_load_cost. */
1943 2, /* vec_unalign_load_cost. */
1944 1, /* vec_store_cost. */
1945 3, /* cond_taken_branch_cost. */
1946 1, /* cond_not_taken_branch_cost. */
1949 /* core_cost should produce code tuned for Core familly of CPUs. */
1950 static stringop_algs core_memcpy[2] = {
1951 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1952 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1953 {-1, libcall, false}}}};
1954 static stringop_algs core_memset[2] = {
1955 {libcall, {{6, loop_1_byte, true},
1957 {8192, rep_prefix_4_byte, true},
1958 {-1, libcall, false}}},
1959 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1960 {-1, libcall, false}}}};
1963 struct processor_costs core_cost = {
1964 COSTS_N_INSNS (1), /* cost of an add instruction */
1965 /* On all chips taken into consideration lea is 2 cycles and more. With
1966 this cost however our current implementation of synth_mult results in
1967 use of unnecessary temporary registers causing regression on several
1968 SPECfp benchmarks. */
1969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1970 COSTS_N_INSNS (1), /* variable shift costs */
1971 COSTS_N_INSNS (1), /* constant shift costs */
1972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1973 COSTS_N_INSNS (4), /* HI */
1974 COSTS_N_INSNS (3), /* SI */
1975 COSTS_N_INSNS (4), /* DI */
1976 COSTS_N_INSNS (2)}, /* other */
1977 0, /* cost of multiply per each bit set */
1978 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1979 COSTS_N_INSNS (26), /* HI */
1980 COSTS_N_INSNS (42), /* SI */
1981 COSTS_N_INSNS (74), /* DI */
1982 COSTS_N_INSNS (74)}, /* other */
1983 COSTS_N_INSNS (1), /* cost of movsx */
1984 COSTS_N_INSNS (1), /* cost of movzx */
1985 8, /* "large" insn */
1986 17, /* MOVE_RATIO */
1987 4, /* cost for loading QImode using movzbl */
1988 {4, 4, 4}, /* cost of loading integer registers
1989 in QImode, HImode and SImode.
1990 Relative to reg-reg move (2). */
1991 {4, 4, 4}, /* cost of storing integer registers */
1992 4, /* cost of reg,reg fld/fst */
1993 {12, 12, 12}, /* cost of loading fp registers
1994 in SFmode, DFmode and XFmode */
1995 {6, 6, 8}, /* cost of storing fp registers
1996 in SFmode, DFmode and XFmode */
1997 2, /* cost of moving MMX register */
1998 {8, 8}, /* cost of loading MMX registers
1999 in SImode and DImode */
2000 {8, 8}, /* cost of storing MMX registers
2001 in SImode and DImode */
2002 2, /* cost of moving SSE register */
2003 {8, 8, 8}, /* cost of loading SSE registers
2004 in SImode, DImode and TImode */
2005 {8, 8, 8}, /* cost of storing SSE registers
2006 in SImode, DImode and TImode */
2007 5, /* MMX or SSE register to integer */
2008 64, /* size of l1 cache. */
2009 512, /* size of l2 cache. */
2010 64, /* size of prefetch block */
2011 6, /* number of parallel prefetches */
2012 /* FIXME perhaps more appropriate value is 5. */
2013 3, /* Branch cost */
2014 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2015 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2016 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2017 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2018 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2019 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2022 1, /* scalar_stmt_cost. */
2023 1, /* scalar load_cost. */
2024 1, /* scalar_store_cost. */
2025 1, /* vec_stmt_cost. */
2026 1, /* vec_to_scalar_cost. */
2027 1, /* scalar_to_vec_cost. */
2028 1, /* vec_align_load_cost. */
2029 2, /* vec_unalign_load_cost. */
2030 1, /* vec_store_cost. */
2031 3, /* cond_taken_branch_cost. */
2032 1, /* cond_not_taken_branch_cost. */
2036 /* Set by -mtune. */
2037 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2039 /* Set by -mtune or -Os. */
2040 const struct processor_costs *ix86_cost = &pentium_cost;
2042 /* Processor feature/optimization bitmasks. */
2043 #define m_386 (1<<PROCESSOR_I386)
2044 #define m_486 (1<<PROCESSOR_I486)
2045 #define m_PENT (1<<PROCESSOR_PENTIUM)
2046 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2047 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2048 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2049 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2050 #define m_CORE2 (1<<PROCESSOR_CORE2)
2051 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2052 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2053 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2054 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2055 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2056 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2057 #define m_KNL (1<<PROCESSOR_KNL)
2058 #define m_INTEL (1<<PROCESSOR_INTEL)
2060 #define m_GEODE (1<<PROCESSOR_GEODE)
2061 #define m_K6 (1<<PROCESSOR_K6)
2062 #define m_K6_GEODE (m_K6 | m_GEODE)
2063 #define m_K8 (1<<PROCESSOR_K8)
2064 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2065 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2066 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2067 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2068 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2069 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2070 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2071 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2072 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2073 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2074 #define m_BTVER (m_BTVER1 | m_BTVER2)
2075 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2077 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2079 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2081 #define DEF_TUNE(tune, name, selector) name,
2082 #include "x86-tune.def"
2086 /* Feature tests against the various tunings. */
2087 unsigned char ix86_tune_features[X86_TUNE_LAST];
2089 /* Feature tests against the various tunings used to create ix86_tune_features
2090 based on the processor mask. */
2091 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2093 #define DEF_TUNE(tune, name, selector) selector,
2094 #include "x86-tune.def"
2098 /* Feature tests against the various architecture variations. */
2099 unsigned char ix86_arch_features[X86_ARCH_LAST];
2101 /* Feature tests against the various architecture variations, used to create
2102 ix86_arch_features based on the processor mask. */
2103 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2104 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2105 ~(m_386 | m_486 | m_PENT | m_K6),
2107 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2110 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2113 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2116 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2120 /* In case the average insn count for single function invocation is
2121 lower than this constant, emit fast (but longer) prologue and
2123 #define FAST_PROLOGUE_INSN_COUNT 20
2125 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2126 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2127 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2128 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2130 /* Array of the smallest class containing reg number REGNO, indexed by
2131 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2133 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2135 /* ax, dx, cx, bx */
2136 AREG, DREG, CREG, BREG,
2137 /* si, di, bp, sp */
2138 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2140 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2141 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2144 /* flags, fpsr, fpcr, frame */
2145 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2147 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2150 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2155 /* SSE REX registers */
2156 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2158 /* AVX-512 SSE registers */
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2163 /* Mask registers. */
2164 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2166 /* MPX bound registers */
2167 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2170 /* The "default" register map used in 32bit mode. */
2172 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2174 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2175 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2176 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2177 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2178 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2182 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2183 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2184 101, 102, 103, 104, /* bound registers */
2187 /* The "default" register map used in 64bit mode. */
2189 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2191 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2192 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2193 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2194 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2195 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2196 8,9,10,11,12,13,14,15, /* extended integer registers */
2197 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2198 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2199 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2200 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2201 126, 127, 128, 129, /* bound registers */
2204 /* Define the register numbers to be used in Dwarf debugging information.
2205 The SVR4 reference port C compiler uses the following register numbers
2206 in its Dwarf output code:
2207 0 for %eax (gcc regno = 0)
2208 1 for %ecx (gcc regno = 2)
2209 2 for %edx (gcc regno = 1)
2210 3 for %ebx (gcc regno = 3)
2211 4 for %esp (gcc regno = 7)
2212 5 for %ebp (gcc regno = 6)
2213 6 for %esi (gcc regno = 4)
2214 7 for %edi (gcc regno = 5)
2215 The following three DWARF register numbers are never generated by
2216 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2217 believes these numbers have these meanings.
2218 8 for %eip (no gcc equivalent)
2219 9 for %eflags (gcc regno = 17)
2220 10 for %trapno (no gcc equivalent)
2221 It is not at all clear how we should number the FP stack registers
2222 for the x86 architecture. If the version of SDB on x86/svr4 were
2223 a bit less brain dead with respect to floating-point then we would
2224 have a precedent to follow with respect to DWARF register numbers
2225 for x86 FP registers, but the SDB on x86/svr4 is so completely
2226 broken with respect to FP registers that it is hardly worth thinking
2227 of it as something to strive for compatibility with.
2228 The version of x86/svr4 SDB I have at the moment does (partially)
2229 seem to believe that DWARF register number 11 is associated with
2230 the x86 register %st(0), but that's about all. Higher DWARF
2231 register numbers don't seem to be associated with anything in
2232 particular, and even for DWARF regno 11, SDB only seems to under-
2233 stand that it should say that a variable lives in %st(0) (when
2234 asked via an `=' command) if we said it was in DWARF regno 11,
2235 but SDB still prints garbage when asked for the value of the
2236 variable in question (via a `/' command).
2237 (Also note that the labels SDB prints for various FP stack regs
2238 when doing an `x' command are all wrong.)
2239 Note that these problems generally don't affect the native SVR4
2240 C compiler because it doesn't allow the use of -O with -g and
2241 because when it is *not* optimizing, it allocates a memory
2242 location for each floating-point variable, and the memory
2243 location is what gets described in the DWARF AT_location
2244 attribute for the variable in question.
2245 Regardless of the severe mental illness of the x86/svr4 SDB, we
2246 do something sensible here and we use the following DWARF
2247 register numbers. Note that these are all stack-top-relative
2249 11 for %st(0) (gcc regno = 8)
2250 12 for %st(1) (gcc regno = 9)
2251 13 for %st(2) (gcc regno = 10)
2252 14 for %st(3) (gcc regno = 11)
2253 15 for %st(4) (gcc regno = 12)
2254 16 for %st(5) (gcc regno = 13)
2255 17 for %st(6) (gcc regno = 14)
2256 18 for %st(7) (gcc regno = 15)
2258 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2260 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2261 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2262 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2268 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2269 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2270 101, 102, 103, 104, /* bound registers */
2273 /* Define parameter passing and return registers. */
2275 static int const x86_64_int_parameter_registers[6] =
2277 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2280 static int const x86_64_ms_abi_int_parameter_registers[4] =
2282 CX_REG, DX_REG, R8_REG, R9_REG
2285 static int const x86_64_int_return_registers[4] =
2287 AX_REG, DX_REG, DI_REG, SI_REG
2290 /* Additional registers that are clobbered by SYSV calls. */
2292 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2296 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2297 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2300 /* Define the structure for the machine field in struct function. */
2302 struct GTY(()) stack_local_entry {
2303 unsigned short mode;
2306 struct stack_local_entry *next;
2309 /* Structure describing stack frame layout.
2310 Stack grows downward:
2316 saved static chain if ix86_static_chain_on_stack
2318 saved frame pointer if frame_pointer_needed
2319 <- HARD_FRAME_POINTER
2325 <- sse_regs_save_offset
2328 [va_arg registers] |
2332 [padding2] | = to_allocate
2341 int outgoing_arguments_size;
2343 /* The offsets relative to ARG_POINTER. */
2344 HOST_WIDE_INT frame_pointer_offset;
2345 HOST_WIDE_INT hard_frame_pointer_offset;
2346 HOST_WIDE_INT stack_pointer_offset;
2347 HOST_WIDE_INT hfp_save_offset;
2348 HOST_WIDE_INT reg_save_offset;
2349 HOST_WIDE_INT sse_reg_save_offset;
2351 /* When save_regs_using_mov is set, emit prologue using
2352 move instead of push instructions. */
2353 bool save_regs_using_mov;
2356 /* Which cpu are we scheduling for. */
2357 enum attr_cpu ix86_schedule;
2359 /* Which cpu are we optimizing for. */
2360 enum processor_type ix86_tune;
2362 /* Which instruction set architecture to use. */
2363 enum processor_type ix86_arch;
2365 /* True if processor has SSE prefetch instruction. */
2366 unsigned char x86_prefetch_sse;
2368 /* -mstackrealign option */
2369 static const char ix86_force_align_arg_pointer_string[]
2370 = "force_align_arg_pointer";
2372 static rtx (*ix86_gen_leave) (void);
2373 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2375 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2376 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2377 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2379 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2381 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2383 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2384 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2386 /* Preferred alignment for stack boundary in bits. */
2387 unsigned int ix86_preferred_stack_boundary;
2389 /* Alignment for incoming stack boundary in bits specified at
2391 static unsigned int ix86_user_incoming_stack_boundary;
2393 /* Default alignment for incoming stack boundary in bits. */
2394 static unsigned int ix86_default_incoming_stack_boundary;
2396 /* Alignment for incoming stack boundary in bits. */
2397 unsigned int ix86_incoming_stack_boundary;
2399 /* Calling abi specific va_list type nodes. */
2400 static GTY(()) tree sysv_va_list_type_node;
2401 static GTY(()) tree ms_va_list_type_node;
2403 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2404 char internal_label_prefix[16];
2405 int internal_label_prefix_len;
2407 /* Fence to use after loop using movnt. */
2410 /* Register class used for passing given 64bit part of the argument.
2411 These represent classes as documented by the PS ABI, with the exception
2412 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2413 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2415 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2416 whenever possible (upper half does contain padding). */
2417 enum x86_64_reg_class
2420 X86_64_INTEGER_CLASS,
2421 X86_64_INTEGERSI_CLASS,
2428 X86_64_COMPLEX_X87_CLASS,
2432 #define MAX_CLASSES 8
2434 /* Table of constants used by fldpi, fldln2, etc.... */
2435 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2436 static bool ext_80387_constants_init = 0;
2439 static struct machine_function * ix86_init_machine_status (void);
2440 static rtx ix86_function_value (const_tree, const_tree, bool);
2441 static bool ix86_function_value_regno_p (const unsigned int);
2442 static unsigned int ix86_function_arg_boundary (machine_mode,
2444 static rtx ix86_static_chain (const_tree, bool);
2445 static int ix86_function_regparm (const_tree, const_tree);
2446 static void ix86_compute_frame_layout (struct ix86_frame *);
2447 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2449 static void ix86_add_new_builtins (HOST_WIDE_INT);
2450 static tree ix86_canonical_va_list_type (tree);
2451 static void predict_jump (int);
2452 static unsigned int split_stack_prologue_scratch_regno (void);
2453 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2455 enum ix86_function_specific_strings
2457 IX86_FUNCTION_SPECIFIC_ARCH,
2458 IX86_FUNCTION_SPECIFIC_TUNE,
2459 IX86_FUNCTION_SPECIFIC_MAX
2462 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2463 const char *, enum fpmath_unit, bool);
2464 static void ix86_function_specific_save (struct cl_target_option *,
2465 struct gcc_options *opts);
2466 static void ix86_function_specific_restore (struct gcc_options *opts,
2467 struct cl_target_option *);
2468 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2469 static void ix86_function_specific_print (FILE *, int,
2470 struct cl_target_option *);
2471 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2472 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2473 struct gcc_options *,
2474 struct gcc_options *,
2475 struct gcc_options *);
2476 static bool ix86_can_inline_p (tree, tree);
2477 static void ix86_set_current_function (tree);
2478 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2480 static enum calling_abi ix86_function_abi (const_tree);
2483 #ifndef SUBTARGET32_DEFAULT_CPU
2484 #define SUBTARGET32_DEFAULT_CPU "i386"
2487 /* Whether -mtune= or -march= were specified */
2488 static int ix86_tune_defaulted;
2489 static int ix86_arch_specified;
2491 /* Vectorization library interface and handlers. */
2492 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2494 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2495 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2497 /* Processor target table, indexed by processor number */
2500 const char *const name; /* processor name */
2501 const struct processor_costs *cost; /* Processor costs */
2502 const int align_loop; /* Default alignments. */
2503 const int align_loop_max_skip;
2504 const int align_jump;
2505 const int align_jump_max_skip;
2506 const int align_func;
2509 /* This table must be in sync with enum processor_type in i386.h. */
2510 static const struct ptt processor_target_table[PROCESSOR_max] =
2512 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2513 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2514 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2515 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2516 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2517 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2518 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2519 {"core2", &core_cost, 16, 10, 16, 10, 16},
2520 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2521 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2522 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2523 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2524 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2525 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2526 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2527 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2528 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2529 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2530 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2531 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2532 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2533 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2534 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2535 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2536 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2537 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2541 rest_of_handle_insert_vzeroupper (void)
2545 /* vzeroupper instructions are inserted immediately after reload to
2546 account for possible spills from 256bit registers. The pass
2547 reuses mode switching infrastructure by re-running mode insertion
2548 pass, so disable entities that have already been processed. */
2549 for (i = 0; i < MAX_386_ENTITIES; i++)
2550 ix86_optimize_mode_switching[i] = 0;
2552 ix86_optimize_mode_switching[AVX_U128] = 1;
2554 /* Call optimize_mode_switching. */
2555 g->get_passes ()->execute_pass_mode_switching ();
2561 const pass_data pass_data_insert_vzeroupper =
2563 RTL_PASS, /* type */
2564 "vzeroupper", /* name */
2565 OPTGROUP_NONE, /* optinfo_flags */
2566 TV_NONE, /* tv_id */
2567 0, /* properties_required */
2568 0, /* properties_provided */
2569 0, /* properties_destroyed */
2570 0, /* todo_flags_start */
2571 TODO_df_finish, /* todo_flags_finish */
2574 class pass_insert_vzeroupper : public rtl_opt_pass
2577 pass_insert_vzeroupper(gcc::context *ctxt)
2578 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2581 /* opt_pass methods: */
2582 virtual bool gate (function *)
2584 return TARGET_AVX && !TARGET_AVX512F
2585 && TARGET_VZEROUPPER && flag_expensive_optimizations
2589 virtual unsigned int execute (function *)
2591 return rest_of_handle_insert_vzeroupper ();
2594 }; // class pass_insert_vzeroupper
2599 make_pass_insert_vzeroupper (gcc::context *ctxt)
2601 return new pass_insert_vzeroupper (ctxt);
2604 /* Return true if a red-zone is in use. */
2607 ix86_using_red_zone (void)
2609 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2612 /* Return a string that documents the current -m options. The caller is
2613 responsible for freeing the string. */
2616 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2617 const char *tune, enum fpmath_unit fpmath,
2620 struct ix86_target_opts
2622 const char *option; /* option string */
2623 HOST_WIDE_INT mask; /* isa mask options */
2626 /* This table is ordered so that options like -msse4.2 that imply
2627 preceding options while match those first. */
2628 static struct ix86_target_opts isa_opts[] =
2630 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2631 { "-mfma", OPTION_MASK_ISA_FMA },
2632 { "-mxop", OPTION_MASK_ISA_XOP },
2633 { "-mlwp", OPTION_MASK_ISA_LWP },
2634 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2635 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2636 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2637 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2638 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2639 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2640 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2641 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2642 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2643 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2644 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2645 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2646 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2647 { "-msse3", OPTION_MASK_ISA_SSE3 },
2648 { "-msse2", OPTION_MASK_ISA_SSE2 },
2649 { "-msse", OPTION_MASK_ISA_SSE },
2650 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2651 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2652 { "-mmmx", OPTION_MASK_ISA_MMX },
2653 { "-mabm", OPTION_MASK_ISA_ABM },
2654 { "-mbmi", OPTION_MASK_ISA_BMI },
2655 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2656 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2657 { "-mhle", OPTION_MASK_ISA_HLE },
2658 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2659 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2660 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2661 { "-madx", OPTION_MASK_ISA_ADX },
2662 { "-mtbm", OPTION_MASK_ISA_TBM },
2663 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2664 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2665 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2666 { "-maes", OPTION_MASK_ISA_AES },
2667 { "-msha", OPTION_MASK_ISA_SHA },
2668 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2669 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2670 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2671 { "-mf16c", OPTION_MASK_ISA_F16C },
2672 { "-mrtm", OPTION_MASK_ISA_RTM },
2673 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2674 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2675 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2676 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2677 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2678 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2679 { "-mmpx", OPTION_MASK_ISA_MPX },
2680 { "-mclwb", OPTION_MASK_ISA_CLWB },
2681 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2686 static struct ix86_target_opts flag_opts[] =
2688 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2689 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2690 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2691 { "-m80387", MASK_80387 },
2692 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2693 { "-malign-double", MASK_ALIGN_DOUBLE },
2694 { "-mcld", MASK_CLD },
2695 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2696 { "-mieee-fp", MASK_IEEE_FP },
2697 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2698 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2699 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2700 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2701 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2702 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2703 { "-mno-red-zone", MASK_NO_RED_ZONE },
2704 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2705 { "-mrecip", MASK_RECIP },
2706 { "-mrtd", MASK_RTD },
2707 { "-msseregparm", MASK_SSEREGPARM },
2708 { "-mstack-arg-probe", MASK_STACK_PROBE },
2709 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2710 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2711 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2712 { "-mvzeroupper", MASK_VZEROUPPER },
2713 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2714 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2715 { "-mprefer-avx128", MASK_PREFER_AVX128},
2718 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2721 char target_other[40];
2731 memset (opts, '\0', sizeof (opts));
2733 /* Add -march= option. */
2736 opts[num][0] = "-march=";
2737 opts[num++][1] = arch;
2740 /* Add -mtune= option. */
2743 opts[num][0] = "-mtune=";
2744 opts[num++][1] = tune;
2747 /* Add -m32/-m64/-mx32. */
2748 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2750 if ((isa & OPTION_MASK_ABI_64) != 0)
2754 isa &= ~ (OPTION_MASK_ISA_64BIT
2755 | OPTION_MASK_ABI_64
2756 | OPTION_MASK_ABI_X32);
2760 opts[num++][0] = abi;
2762 /* Pick out the options in isa options. */
2763 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2765 if ((isa & isa_opts[i].mask) != 0)
2767 opts[num++][0] = isa_opts[i].option;
2768 isa &= ~ isa_opts[i].mask;
2772 if (isa && add_nl_p)
2774 opts[num++][0] = isa_other;
2775 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2779 /* Add flag options. */
2780 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2782 if ((flags & flag_opts[i].mask) != 0)
2784 opts[num++][0] = flag_opts[i].option;
2785 flags &= ~ flag_opts[i].mask;
2789 if (flags && add_nl_p)
2791 opts[num++][0] = target_other;
2792 sprintf (target_other, "(other flags: %#x)", flags);
2795 /* Add -fpmath= option. */
2798 opts[num][0] = "-mfpmath=";
2799 switch ((int) fpmath)
2802 opts[num++][1] = "387";
2806 opts[num++][1] = "sse";
2809 case FPMATH_387 | FPMATH_SSE:
2810 opts[num++][1] = "sse+387";
2822 gcc_assert (num < ARRAY_SIZE (opts));
2824 /* Size the string. */
2826 sep_len = (add_nl_p) ? 3 : 1;
2827 for (i = 0; i < num; i++)
2830 for (j = 0; j < 2; j++)
2832 len += strlen (opts[i][j]);
2835 /* Build the string. */
2836 ret = ptr = (char *) xmalloc (len);
2839 for (i = 0; i < num; i++)
2843 for (j = 0; j < 2; j++)
2844 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2851 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2859 for (j = 0; j < 2; j++)
2862 memcpy (ptr, opts[i][j], len2[j]);
2864 line_len += len2[j];
2869 gcc_assert (ret + len >= ptr);
2874 /* Return true, if profiling code should be emitted before
2875 prologue. Otherwise it returns false.
2876 Note: For x86 with "hotfix" it is sorried. */
2878 ix86_profile_before_prologue (void)
2880 return flag_fentry != 0;
2883 /* Function that is callable from the debugger to print the current
2885 void ATTRIBUTE_UNUSED
2886 ix86_debug_options (void)
2888 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2889 ix86_arch_string, ix86_tune_string,
2894 fprintf (stderr, "%s\n\n", opts);
2898 fputs ("<no options>\n\n", stderr);
2903 static const char *stringop_alg_names[] = {
2905 #define DEF_ALG(alg, name) #name,
2906 #include "stringop.def"
2911 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2912 The string is of the following form (or comma separated list of it):
2914 strategy_alg:max_size:[align|noalign]
2916 where the full size range for the strategy is either [0, max_size] or
2917 [min_size, max_size], in which min_size is the max_size + 1 of the
2918 preceding range. The last size range must have max_size == -1.
2923 -mmemcpy-strategy=libcall:-1:noalign
2925 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2929 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2931 This is to tell the compiler to use the following strategy for memset
2932 1) when the expected size is between [1, 16], use rep_8byte strategy;
2933 2) when the size is between [17, 2048], use vector_loop;
2934 3) when the size is > 2048, use libcall. */
2936 struct stringop_size_range
2944 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2946 const struct stringop_algs *default_algs;
2947 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2948 char *curr_range_str, *next_range_str;
2952 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2954 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2956 curr_range_str = strategy_str;
2963 next_range_str = strchr (curr_range_str, ',');
2965 *next_range_str++ = '\0';
2967 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2968 alg_name, &maxs, align))
2970 error ("wrong arg %s to option %s", curr_range_str,
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2975 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2977 error ("size ranges of option %s should be increasing",
2978 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2982 for (i = 0; i < last_alg; i++)
2983 if (!strcmp (alg_name, stringop_alg_names[i]))
2988 error ("wrong stringop strategy name %s specified for option %s",
2990 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2994 if ((stringop_alg) i == rep_prefix_8_byte
2997 /* rep; movq isn't available in 32-bit code. */
2998 error ("stringop strategy name %s specified for option %s "
2999 "not supported for 32-bit code",
3001 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3005 input_ranges[n].max = maxs;
3006 input_ranges[n].alg = (stringop_alg) i;
3007 if (!strcmp (align, "align"))
3008 input_ranges[n].noalign = false;
3009 else if (!strcmp (align, "noalign"))
3010 input_ranges[n].noalign = true;
3013 error ("unknown alignment %s specified for option %s",
3014 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3018 curr_range_str = next_range_str;
3020 while (curr_range_str);
3022 if (input_ranges[n - 1].max != -1)
3024 error ("the max value for the last size range should be -1"
3026 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3030 if (n > MAX_STRINGOP_ALGS)
3032 error ("too many size ranges specified in option %s",
3033 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3037 /* Now override the default algs array. */
3038 for (i = 0; i < n; i++)
3040 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3041 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3042 = input_ranges[i].alg;
3043 *const_cast<int *>(&default_algs->size[i].noalign)
3044 = input_ranges[i].noalign;
3049 /* parse -mtune-ctrl= option. When DUMP is true,
3050 print the features that are explicitly set. */
3053 parse_mtune_ctrl_str (bool dump)
3055 if (!ix86_tune_ctrl_string)
3058 char *next_feature_string = NULL;
3059 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3060 char *orig = curr_feature_string;
3066 next_feature_string = strchr (curr_feature_string, ',');
3067 if (next_feature_string)
3068 *next_feature_string++ = '\0';
3069 if (*curr_feature_string == '^')
3071 curr_feature_string++;
3074 for (i = 0; i < X86_TUNE_LAST; i++)
3076 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3078 ix86_tune_features[i] = !clear;
3080 fprintf (stderr, "Explicitly %s feature %s\n",
3081 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3085 if (i == X86_TUNE_LAST)
3086 error ("Unknown parameter to option -mtune-ctrl: %s",
3087 clear ? curr_feature_string - 1 : curr_feature_string);
3088 curr_feature_string = next_feature_string;
3090 while (curr_feature_string);
3094 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3098 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3100 unsigned int ix86_tune_mask = 1u << ix86_tune;
3103 for (i = 0; i < X86_TUNE_LAST; ++i)
3105 if (ix86_tune_no_default)
3106 ix86_tune_features[i] = 0;
3108 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3113 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3114 for (i = 0; i < X86_TUNE_LAST; i++)
3115 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3116 ix86_tune_features[i] ? "on" : "off");
3119 parse_mtune_ctrl_str (dump);
3123 /* Override various settings based on options. If MAIN_ARGS_P, the
3124 options are from the command line, otherwise they are from
3128 ix86_option_override_internal (bool main_args_p,
3129 struct gcc_options *opts,
3130 struct gcc_options *opts_set)
3133 unsigned int ix86_arch_mask;
3134 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3139 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3140 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3141 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3142 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3143 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3144 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3145 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3146 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3147 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3148 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3149 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3150 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3151 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3152 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3153 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3154 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3155 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3156 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3157 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3158 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3159 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3160 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3161 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3162 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3163 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3164 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3165 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3166 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3167 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3168 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3169 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3170 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3171 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3172 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3173 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3174 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3175 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3176 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3177 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3178 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3179 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3180 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3181 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3182 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3183 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3184 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3185 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3186 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3187 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3188 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3189 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3190 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3191 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3192 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3193 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3194 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3195 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3196 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3199 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3200 | PTA_CX16 | PTA_FXSR)
3201 #define PTA_NEHALEM \
3202 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3203 #define PTA_WESTMERE \
3204 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3205 #define PTA_SANDYBRIDGE \
3206 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3207 #define PTA_IVYBRIDGE \
3208 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3209 #define PTA_HASWELL \
3210 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3211 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3212 #define PTA_BROADWELL \
3213 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3215 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3216 #define PTA_BONNELL \
3217 (PTA_CORE2 | PTA_MOVBE)
3218 #define PTA_SILVERMONT \
3219 (PTA_WESTMERE | PTA_MOVBE)
3221 /* if this reaches 64, need to widen struct pta flags below */
3225 const char *const name; /* processor name or nickname. */
3226 const enum processor_type processor;
3227 const enum attr_cpu schedule;
3228 const unsigned HOST_WIDE_INT flags;
3230 const processor_alias_table[] =
3232 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3233 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3234 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3235 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3236 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3237 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3238 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3239 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3240 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3241 PTA_MMX | PTA_SSE | PTA_FXSR},
3242 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3243 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3244 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3245 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3246 PTA_MMX | PTA_SSE | PTA_FXSR},
3247 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3248 PTA_MMX | PTA_SSE | PTA_FXSR},
3249 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3250 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3251 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3252 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3253 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3254 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3255 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3256 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3257 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3258 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3260 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3261 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3262 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3263 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3264 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3266 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3268 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3270 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3272 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3273 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3274 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3275 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3276 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3277 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3278 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3279 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3280 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3281 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3282 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3283 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3284 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3285 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3286 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3287 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3288 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3289 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3290 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3291 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3292 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3293 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3294 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3295 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3296 {"x86-64", PROCESSOR_K8, CPU_K8,
3297 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3298 {"k8", PROCESSOR_K8, CPU_K8,
3299 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3302 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304 {"opteron", PROCESSOR_K8, CPU_K8,
3305 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3306 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3307 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3308 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3309 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3310 {"athlon64", PROCESSOR_K8, CPU_K8,
3311 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3312 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3313 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3314 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3315 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3316 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3317 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3318 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3319 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3320 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3321 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3322 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3323 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3324 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3325 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3326 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3327 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3328 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3329 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3330 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3331 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3332 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3333 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3334 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3335 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3336 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3337 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3339 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3340 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3341 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3342 | PTA_XSAVEOPT | PTA_FSGSBASE},
3343 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3344 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3345 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3346 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3347 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3348 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3349 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3350 | PTA_MOVBE | PTA_MWAITX},
3351 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3352 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3353 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3354 | PTA_FXSR | PTA_XSAVE},
3355 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3356 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3357 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3358 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3359 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3360 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3362 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3364 | PTA_HLE /* flags are only used for -march switch. */ },
3367 /* -mrecip options. */
3370 const char *string; /* option name */
3371 unsigned int mask; /* mask bits to set */
3373 const recip_options[] =
3375 { "all", RECIP_MASK_ALL },
3376 { "none", RECIP_MASK_NONE },
3377 { "div", RECIP_MASK_DIV },
3378 { "sqrt", RECIP_MASK_SQRT },
3379 { "vec-div", RECIP_MASK_VEC_DIV },
3380 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3383 int const pta_size = ARRAY_SIZE (processor_alias_table);
3385 /* Set up prefix/suffix so the error messages refer to either the command
3386 line argument, or the attribute(target). */
3395 prefix = "option(\"";
3400 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3401 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3402 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3403 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3404 #ifdef TARGET_BI_ARCH
3407 #if TARGET_BI_ARCH == 1
3408 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3409 is on and OPTION_MASK_ABI_X32 is off. We turn off
3410 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3412 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3415 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3416 on and OPTION_MASK_ABI_64 is off. We turn off
3417 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3418 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3419 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3420 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3421 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3426 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3428 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3429 OPTION_MASK_ABI_64 for TARGET_X32. */
3430 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3431 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3433 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3434 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3435 | OPTION_MASK_ABI_X32
3436 | OPTION_MASK_ABI_64);
3437 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3439 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3440 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3441 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3442 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3445 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3446 SUBTARGET_OVERRIDE_OPTIONS;
3449 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3450 SUBSUBTARGET_OVERRIDE_OPTIONS;
3453 /* -fPIC is the default for x86_64. */
3454 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3455 opts->x_flag_pic = 2;
3457 /* Need to check -mtune=generic first. */
3458 if (opts->x_ix86_tune_string)
3460 /* As special support for cross compilers we read -mtune=native
3461 as -mtune=generic. With native compilers we won't see the
3462 -mtune=native, as it was changed by the driver. */
3463 if (!strcmp (opts->x_ix86_tune_string, "native"))
3465 opts->x_ix86_tune_string = "generic";
3467 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3468 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3469 "%stune=k8%s or %stune=generic%s instead as appropriate",
3470 prefix, suffix, prefix, suffix, prefix, suffix);
3474 if (opts->x_ix86_arch_string)
3475 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3476 if (!opts->x_ix86_tune_string)
3478 opts->x_ix86_tune_string
3479 = processor_target_table[TARGET_CPU_DEFAULT].name;
3480 ix86_tune_defaulted = 1;
3483 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3484 or defaulted. We need to use a sensible tune option. */
3485 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3487 opts->x_ix86_tune_string = "generic";
3491 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3492 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 /* rep; movq isn't available in 32-bit code. */
3495 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3496 opts->x_ix86_stringop_alg = no_stringop;
3499 if (!opts->x_ix86_arch_string)
3500 opts->x_ix86_arch_string
3501 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3502 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3504 ix86_arch_specified = 1;
3506 if (opts_set->x_ix86_pmode)
3508 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3509 && opts->x_ix86_pmode == PMODE_SI)
3510 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3511 && opts->x_ix86_pmode == PMODE_DI))
3512 error ("address mode %qs not supported in the %s bit mode",
3513 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3514 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3517 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3518 ? PMODE_DI : PMODE_SI;
3520 if (!opts_set->x_ix86_abi)
3521 opts->x_ix86_abi = DEFAULT_ABI;
3523 /* For targets using ms ABI enable ms-extensions, if not
3524 explicit turned off. For non-ms ABI we turn off this
3526 if (!opts_set->x_flag_ms_extensions)
3527 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3529 if (opts_set->x_ix86_cmodel)
3531 switch (opts->x_ix86_cmodel)
3535 if (opts->x_flag_pic)
3536 opts->x_ix86_cmodel = CM_SMALL_PIC;
3537 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3538 error ("code model %qs not supported in the %s bit mode",
3544 if (opts->x_flag_pic)
3545 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3546 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3547 error ("code model %qs not supported in the %s bit mode",
3549 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3550 error ("code model %qs not supported in x32 mode",
3556 if (opts->x_flag_pic)
3557 opts->x_ix86_cmodel = CM_LARGE_PIC;
3558 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3559 error ("code model %qs not supported in the %s bit mode",
3561 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3562 error ("code model %qs not supported in x32 mode",
3567 if (opts->x_flag_pic)
3568 error ("code model %s does not support PIC mode", "32");
3569 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3570 error ("code model %qs not supported in the %s bit mode",
3575 if (opts->x_flag_pic)
3577 error ("code model %s does not support PIC mode", "kernel");
3578 opts->x_ix86_cmodel = CM_32;
3580 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3581 error ("code model %qs not supported in the %s bit mode",
3591 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3592 use of rip-relative addressing. This eliminates fixups that
3593 would otherwise be needed if this object is to be placed in a
3594 DLL, and is essentially just as efficient as direct addressing. */
3595 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3596 && (TARGET_RDOS || TARGET_PECOFF))
3597 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3598 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3599 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3601 opts->x_ix86_cmodel = CM_32;
3603 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3605 error ("-masm=intel not supported in this configuration");
3606 opts->x_ix86_asm_dialect = ASM_ATT;
3608 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3609 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3610 sorry ("%i-bit mode not compiled in",
3611 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3613 for (i = 0; i < pta_size; i++)
3614 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3616 ix86_schedule = processor_alias_table[i].schedule;
3617 ix86_arch = processor_alias_table[i].processor;
3618 /* Default cpu tuning to the architecture. */
3619 ix86_tune = ix86_arch;
3621 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3622 && !(processor_alias_table[i].flags & PTA_64BIT))
3623 error ("CPU you selected does not support x86-64 "
3626 if (processor_alias_table[i].flags & PTA_MMX
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3629 if (processor_alias_table[i].flags & PTA_3DNOW
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3632 if (processor_alias_table[i].flags & PTA_3DNOW_A
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3635 if (processor_alias_table[i].flags & PTA_SSE
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3638 if (processor_alias_table[i].flags & PTA_SSE2
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3641 if (processor_alias_table[i].flags & PTA_SSE3
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3644 if (processor_alias_table[i].flags & PTA_SSSE3
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3647 if (processor_alias_table[i].flags & PTA_SSE4_1
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3650 if (processor_alias_table[i].flags & PTA_SSE4_2
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3653 if (processor_alias_table[i].flags & PTA_AVX
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3656 if (processor_alias_table[i].flags & PTA_AVX2
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3659 if (processor_alias_table[i].flags & PTA_FMA
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3662 if (processor_alias_table[i].flags & PTA_SSE4A
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3665 if (processor_alias_table[i].flags & PTA_FMA4
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3668 if (processor_alias_table[i].flags & PTA_XOP
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3671 if (processor_alias_table[i].flags & PTA_LWP
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3674 if (processor_alias_table[i].flags & PTA_ABM
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3677 if (processor_alias_table[i].flags & PTA_BMI
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3680 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3683 if (processor_alias_table[i].flags & PTA_TBM
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3686 if (processor_alias_table[i].flags & PTA_BMI2
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3689 if (processor_alias_table[i].flags & PTA_CX16
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3692 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3695 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3696 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3699 if (processor_alias_table[i].flags & PTA_MOVBE
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3702 if (processor_alias_table[i].flags & PTA_AES
3703 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3704 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3705 if (processor_alias_table[i].flags & PTA_SHA
3706 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3707 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3708 if (processor_alias_table[i].flags & PTA_PCLMUL
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3711 if (processor_alias_table[i].flags & PTA_FSGSBASE
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3714 if (processor_alias_table[i].flags & PTA_RDRND
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3717 if (processor_alias_table[i].flags & PTA_F16C
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3720 if (processor_alias_table[i].flags & PTA_RTM
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3723 if (processor_alias_table[i].flags & PTA_HLE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3726 if (processor_alias_table[i].flags & PTA_PRFCHW
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3729 if (processor_alias_table[i].flags & PTA_RDSEED
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3732 if (processor_alias_table[i].flags & PTA_ADX
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3735 if (processor_alias_table[i].flags & PTA_FXSR
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3738 if (processor_alias_table[i].flags & PTA_XSAVE
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3741 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3744 if (processor_alias_table[i].flags & PTA_AVX512F
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3747 if (processor_alias_table[i].flags & PTA_AVX512ER
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3750 if (processor_alias_table[i].flags & PTA_AVX512PF
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3753 if (processor_alias_table[i].flags & PTA_AVX512CD
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3756 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3759 if (processor_alias_table[i].flags & PTA_PCOMMIT
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3762 if (processor_alias_table[i].flags & PTA_CLWB
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3765 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3768 if (processor_alias_table[i].flags & PTA_XSAVEC
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3771 if (processor_alias_table[i].flags & PTA_XSAVES
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3774 if (processor_alias_table[i].flags & PTA_AVX512DQ
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3777 if (processor_alias_table[i].flags & PTA_AVX512BW
3778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3780 if (processor_alias_table[i].flags & PTA_AVX512VL
3781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3783 if (processor_alias_table[i].flags & PTA_MPX
3784 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3786 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3787 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3788 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3789 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3792 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3793 x86_prefetch_sse = true;
3794 if (processor_alias_table[i].flags & PTA_MWAITX
3795 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3801 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3802 error ("Intel MPX does not support x32");
3804 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3805 error ("Intel MPX does not support x32");
3807 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3808 error ("generic CPU can be used only for %stune=%s %s",
3809 prefix, suffix, sw);
3810 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3811 error ("intel CPU can be used only for %stune=%s %s",
3812 prefix, suffix, sw);
3813 else if (i == pta_size)
3814 error ("bad value (%s) for %sarch=%s %s",
3815 opts->x_ix86_arch_string, prefix, suffix, sw);
3817 ix86_arch_mask = 1u << ix86_arch;
3818 for (i = 0; i < X86_ARCH_LAST; ++i)
3819 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3821 for (i = 0; i < pta_size; i++)
3822 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3824 ix86_schedule = processor_alias_table[i].schedule;
3825 ix86_tune = processor_alias_table[i].processor;
3826 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3828 if (!(processor_alias_table[i].flags & PTA_64BIT))
3830 if (ix86_tune_defaulted)
3832 opts->x_ix86_tune_string = "x86-64";
3833 for (i = 0; i < pta_size; i++)
3834 if (! strcmp (opts->x_ix86_tune_string,
3835 processor_alias_table[i].name))
3837 ix86_schedule = processor_alias_table[i].schedule;
3838 ix86_tune = processor_alias_table[i].processor;
3841 error ("CPU you selected does not support x86-64 "
3845 /* Intel CPUs have always interpreted SSE prefetch instructions as
3846 NOPs; so, we can enable SSE prefetch instructions even when
3847 -mtune (rather than -march) points us to a processor that has them.
3848 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3849 higher processors. */
3851 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3852 x86_prefetch_sse = true;
3856 if (ix86_tune_specified && i == pta_size)
3857 error ("bad value (%s) for %stune=%s %s",
3858 opts->x_ix86_tune_string, prefix, suffix, sw);
3860 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3862 #ifndef USE_IX86_FRAME_POINTER
3863 #define USE_IX86_FRAME_POINTER 0
3866 #ifndef USE_X86_64_FRAME_POINTER
3867 #define USE_X86_64_FRAME_POINTER 0
3870 /* Set the default values for switches whose default depends on TARGET_64BIT
3871 in case they weren't overwritten by command line options. */
3872 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3874 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3875 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3876 if (opts->x_flag_asynchronous_unwind_tables
3877 && !opts_set->x_flag_unwind_tables
3878 && TARGET_64BIT_MS_ABI)
3879 opts->x_flag_unwind_tables = 1;
3880 if (opts->x_flag_asynchronous_unwind_tables == 2)
3881 opts->x_flag_unwind_tables
3882 = opts->x_flag_asynchronous_unwind_tables = 1;
3883 if (opts->x_flag_pcc_struct_return == 2)
3884 opts->x_flag_pcc_struct_return = 0;
3888 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3889 opts->x_flag_omit_frame_pointer
3890 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3891 if (opts->x_flag_asynchronous_unwind_tables == 2)
3892 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3893 if (opts->x_flag_pcc_struct_return == 2)
3894 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3897 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3898 /* TODO: ix86_cost should be chosen at instruction or function granuality
3899 so for cold code we use size_cost even in !optimize_size compilation. */
3900 if (opts->x_optimize_size)
3901 ix86_cost = &ix86_size_cost;
3903 ix86_cost = ix86_tune_cost;
3905 /* Arrange to set up i386_stack_locals for all functions. */
3906 init_machine_status = ix86_init_machine_status;
3908 /* Validate -mregparm= value. */
3909 if (opts_set->x_ix86_regparm)
3911 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3912 warning (0, "-mregparm is ignored in 64-bit mode");
3913 if (opts->x_ix86_regparm > REGPARM_MAX)
3915 error ("-mregparm=%d is not between 0 and %d",
3916 opts->x_ix86_regparm, REGPARM_MAX);
3917 opts->x_ix86_regparm = 0;
3920 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3921 opts->x_ix86_regparm = REGPARM_MAX;
3923 /* Default align_* from the processor table. */
3924 if (opts->x_align_loops == 0)
3926 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3927 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3929 if (opts->x_align_jumps == 0)
3931 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3932 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3934 if (opts->x_align_functions == 0)
3936 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3939 /* Provide default for -mbranch-cost= value. */
3940 if (!opts_set->x_ix86_branch_cost)
3941 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3943 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3945 opts->x_target_flags
3946 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3948 /* Enable by default the SSE and MMX builtins. Do allow the user to
3949 explicitly disable any of these. In particular, disabling SSE and
3950 MMX for kernel code is extremely useful. */
3951 if (!ix86_arch_specified)
3952 opts->x_ix86_isa_flags
3953 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3954 | TARGET_SUBTARGET64_ISA_DEFAULT)
3955 & ~opts->x_ix86_isa_flags_explicit);
3957 if (TARGET_RTD_P (opts->x_target_flags))
3958 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3962 opts->x_target_flags
3963 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3965 if (!ix86_arch_specified)
3966 opts->x_ix86_isa_flags
3967 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3969 /* i386 ABI does not specify red zone. It still makes sense to use it
3970 when programmer takes care to stack from being destroyed. */
3971 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3972 opts->x_target_flags |= MASK_NO_RED_ZONE;
3975 /* Keep nonleaf frame pointers. */
3976 if (opts->x_flag_omit_frame_pointer)
3977 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3978 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3979 opts->x_flag_omit_frame_pointer = 1;
3981 /* If we're doing fast math, we don't care about comparison order
3982 wrt NaNs. This lets us use a shorter comparison sequence. */
3983 if (opts->x_flag_finite_math_only)
3984 opts->x_target_flags &= ~MASK_IEEE_FP;
3986 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3987 since the insns won't need emulation. */
3988 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3989 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3991 /* Likewise, if the target doesn't have a 387, or we've specified
3992 software floating point, don't use 387 inline intrinsics. */
3993 if (!TARGET_80387_P (opts->x_target_flags))
3994 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3996 /* Turn on MMX builtins for -msse. */
3997 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3998 opts->x_ix86_isa_flags
3999 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4001 /* Enable SSE prefetch. */
4002 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4003 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4004 x86_prefetch_sse = true;
4006 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4007 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4008 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4009 opts->x_ix86_isa_flags
4010 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4012 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4013 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4014 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4015 opts->x_ix86_isa_flags
4016 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4018 /* Enable lzcnt instruction for -mabm. */
4019 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4020 opts->x_ix86_isa_flags
4021 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4023 /* Validate -mpreferred-stack-boundary= value or default it to
4024 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4025 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4026 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4028 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4029 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4030 int max = (TARGET_SEH ? 4 : 12);
4032 if (opts->x_ix86_preferred_stack_boundary_arg < min
4033 || opts->x_ix86_preferred_stack_boundary_arg > max)
4036 error ("-mpreferred-stack-boundary is not supported "
4039 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4040 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4043 ix86_preferred_stack_boundary
4044 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4047 /* Set the default value for -mstackrealign. */
4048 if (opts->x_ix86_force_align_arg_pointer == -1)
4049 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4051 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4053 /* Validate -mincoming-stack-boundary= value or default it to
4054 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4055 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4056 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4058 if (opts->x_ix86_incoming_stack_boundary_arg
4059 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2)
4060 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4061 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4062 opts->x_ix86_incoming_stack_boundary_arg,
4063 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2);
4066 ix86_user_incoming_stack_boundary
4067 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4068 ix86_incoming_stack_boundary
4069 = ix86_user_incoming_stack_boundary;
4073 #ifndef NO_PROFILE_COUNTERS
4074 if (flag_nop_mcount)
4075 error ("-mnop-mcount is not compatible with this target");
4077 if (flag_nop_mcount && flag_pic)
4078 error ("-mnop-mcount is not implemented for -fPIC");
4080 /* Accept -msseregparm only if at least SSE support is enabled. */
4081 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4082 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4083 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4085 if (opts_set->x_ix86_fpmath)
4087 if (opts->x_ix86_fpmath & FPMATH_SSE)
4089 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4091 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4092 opts->x_ix86_fpmath = FPMATH_387;
4094 else if ((opts->x_ix86_fpmath & FPMATH_387)
4095 && !TARGET_80387_P (opts->x_target_flags))
4097 warning (0, "387 instruction set disabled, using SSE arithmetics");
4098 opts->x_ix86_fpmath = FPMATH_SSE;
4102 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4103 fpmath=387. The second is however default at many targets since the
4104 extra 80bit precision of temporaries is considered to be part of ABI.
4105 Overwrite the default at least for -ffast-math.
4106 TODO: -mfpmath=both seems to produce same performing code with bit
4107 smaller binaries. It is however not clear if register allocation is
4108 ready for this setting.
4109 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4110 codegen. We may switch to 387 with -ffast-math for size optimized
4112 else if (fast_math_flags_set_p (&global_options)
4113 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4114 opts->x_ix86_fpmath = FPMATH_SSE;
4116 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4118 /* If the i387 is disabled, then do not return values in it. */
4119 if (!TARGET_80387_P (opts->x_target_flags))
4120 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4122 /* Use external vectorized library in vectorizing intrinsics. */
4123 if (opts_set->x_ix86_veclibabi_type)
4124 switch (opts->x_ix86_veclibabi_type)
4126 case ix86_veclibabi_type_svml:
4127 ix86_veclib_handler = ix86_veclibabi_svml;
4130 case ix86_veclibabi_type_acml:
4131 ix86_veclib_handler = ix86_veclibabi_acml;
4138 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4139 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4140 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4142 /* If stack probes are required, the space used for large function
4143 arguments on the stack must also be probed, so enable
4144 -maccumulate-outgoing-args so this happens in the prologue. */
4145 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4146 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4148 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4149 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4150 "for correctness", prefix, suffix);
4151 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4154 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4155 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4156 if (fixed_regs[BP_REG]
4157 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4159 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4160 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
4162 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4165 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4168 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4169 p = strchr (internal_label_prefix, 'X');
4170 internal_label_prefix_len = p - internal_label_prefix;
4174 /* When scheduling description is not available, disable scheduler pass
4175 so it won't slow down the compilation and make x87 code slower. */
4176 if (!TARGET_SCHEDULE)
4177 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4179 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4180 ix86_tune_cost->simultaneous_prefetches,
4181 opts->x_param_values,
4182 opts_set->x_param_values);
4183 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4184 ix86_tune_cost->prefetch_block,
4185 opts->x_param_values,
4186 opts_set->x_param_values);
4187 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4188 ix86_tune_cost->l1_cache_size,
4189 opts->x_param_values,
4190 opts_set->x_param_values);
4191 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4192 ix86_tune_cost->l2_cache_size,
4193 opts->x_param_values,
4194 opts_set->x_param_values);
4196 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4197 if (opts->x_flag_prefetch_loop_arrays < 0
4199 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4200 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4201 opts->x_flag_prefetch_loop_arrays = 1;
4203 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4204 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4205 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4206 targetm.expand_builtin_va_start = NULL;
4208 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4210 ix86_gen_leave = gen_leave_rex64;
4211 if (Pmode == DImode)
4213 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4214 ix86_gen_tls_local_dynamic_base_64
4215 = gen_tls_local_dynamic_base_64_di;
4219 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4220 ix86_gen_tls_local_dynamic_base_64
4221 = gen_tls_local_dynamic_base_64_si;
4225 ix86_gen_leave = gen_leave;
4227 if (Pmode == DImode)
4229 ix86_gen_add3 = gen_adddi3;
4230 ix86_gen_sub3 = gen_subdi3;
4231 ix86_gen_sub3_carry = gen_subdi3_carry;
4232 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4233 ix86_gen_andsp = gen_anddi3;
4234 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4235 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4236 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4237 ix86_gen_monitor = gen_sse3_monitor_di;
4238 ix86_gen_monitorx = gen_monitorx_di;
4242 ix86_gen_add3 = gen_addsi3;
4243 ix86_gen_sub3 = gen_subsi3;
4244 ix86_gen_sub3_carry = gen_subsi3_carry;
4245 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4246 ix86_gen_andsp = gen_andsi3;
4247 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4248 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4249 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4250 ix86_gen_monitor = gen_sse3_monitor_si;
4251 ix86_gen_monitorx = gen_monitorx_si;
4255 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4256 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4257 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4260 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4262 if (opts->x_flag_fentry > 0)
4263 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4265 opts->x_flag_fentry = 0;
4267 else if (TARGET_SEH)
4269 if (opts->x_flag_fentry == 0)
4270 sorry ("-mno-fentry isn%'t compatible with SEH");
4271 opts->x_flag_fentry = 1;
4273 else if (opts->x_flag_fentry < 0)
4275 #if defined(PROFILE_BEFORE_PROLOGUE)
4276 opts->x_flag_fentry = 1;
4278 opts->x_flag_fentry = 0;
4282 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4283 opts->x_target_flags |= MASK_VZEROUPPER;
4284 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4285 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4286 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4287 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4288 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4289 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4290 /* Enable 128-bit AVX instruction generation
4291 for the auto-vectorizer. */
4292 if (TARGET_AVX128_OPTIMAL
4293 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4294 opts->x_target_flags |= MASK_PREFER_AVX128;
4296 if (opts->x_ix86_recip_name)
4298 char *p = ASTRDUP (opts->x_ix86_recip_name);
4300 unsigned int mask, i;
4303 while ((q = strtok (p, ",")) != NULL)
4314 if (!strcmp (q, "default"))
4315 mask = RECIP_MASK_ALL;
4318 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4319 if (!strcmp (q, recip_options[i].string))
4321 mask = recip_options[i].mask;
4325 if (i == ARRAY_SIZE (recip_options))
4327 error ("unknown option for -mrecip=%s", q);
4329 mask = RECIP_MASK_NONE;
4333 opts->x_recip_mask_explicit |= mask;
4335 opts->x_recip_mask &= ~mask;
4337 opts->x_recip_mask |= mask;
4341 if (TARGET_RECIP_P (opts->x_target_flags))
4342 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4343 else if (opts_set->x_target_flags & MASK_RECIP)
4344 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4346 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4347 for 64-bit Bionic. */
4348 if (TARGET_HAS_BIONIC
4349 && !(opts_set->x_target_flags
4350 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4351 opts->x_target_flags |= (TARGET_64BIT
4352 ? MASK_LONG_DOUBLE_128
4353 : MASK_LONG_DOUBLE_64);
4355 /* Only one of them can be active. */
4356 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4357 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4359 /* Save the initial options in case the user does function specific
4362 target_option_default_node = target_option_current_node
4363 = build_target_option_node (opts);
4365 /* Handle stack protector */
4366 if (!opts_set->x_ix86_stack_protector_guard)
4367 opts->x_ix86_stack_protector_guard
4368 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4370 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4371 if (opts->x_ix86_tune_memcpy_strategy)
4373 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4374 ix86_parse_stringop_strategy_string (str, false);
4378 if (opts->x_ix86_tune_memset_strategy)
4380 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4381 ix86_parse_stringop_strategy_string (str, true);
4386 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4389 ix86_option_override (void)
4391 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4392 struct register_pass_info insert_vzeroupper_info
4393 = { pass_insert_vzeroupper, "reload",
4394 1, PASS_POS_INSERT_AFTER
4397 ix86_option_override_internal (true, &global_options, &global_options_set);
4400 /* This needs to be done at start up. It's convenient to do it here. */
4401 register_pass (&insert_vzeroupper_info);
4404 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4406 ix86_offload_options (void)
4409 return xstrdup ("-foffload-abi=lp64");
4410 return xstrdup ("-foffload-abi=ilp32");
4413 /* Update register usage after having seen the compiler flags. */
4416 ix86_conditional_register_usage (void)
4420 /* For 32-bit targets, squash the REX registers. */
4423 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4424 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4425 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4426 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4428 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4431 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4432 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4433 : TARGET_64BIT ? (1 << 2)
4436 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4438 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4440 /* Set/reset conditionally defined registers from
4441 CALL_USED_REGISTERS initializer. */
4442 if (call_used_regs[i] > 1)
4443 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4445 /* Calculate registers of CLOBBERED_REGS register set
4446 as call used registers from GENERAL_REGS register set. */
4447 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4448 && call_used_regs[i])
4449 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4452 /* If MMX is disabled, squash the registers. */
4454 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4455 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4456 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4458 /* If SSE is disabled, squash the registers. */
4460 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4461 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4462 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4464 /* If the FPU is disabled, squash the registers. */
4465 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4466 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4467 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4468 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4470 /* If AVX512F is disabled, squash the registers. */
4471 if (! TARGET_AVX512F)
4473 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4474 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4476 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4477 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4480 /* If MPX is disabled, squash the registers. */
4482 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4483 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4487 /* Save the current options */
4490 ix86_function_specific_save (struct cl_target_option *ptr,
4491 struct gcc_options *opts)
4493 ptr->arch = ix86_arch;
4494 ptr->schedule = ix86_schedule;
4495 ptr->prefetch_sse = x86_prefetch_sse;
4496 ptr->tune = ix86_tune;
4497 ptr->branch_cost = ix86_branch_cost;
4498 ptr->tune_defaulted = ix86_tune_defaulted;
4499 ptr->arch_specified = ix86_arch_specified;
4500 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4501 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4502 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4503 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4504 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4505 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4506 ptr->x_ix86_abi = opts->x_ix86_abi;
4507 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4508 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4509 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4510 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4511 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4512 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4513 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4514 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4515 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4516 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4517 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4518 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4519 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4520 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4521 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4522 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4523 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4524 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4525 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4526 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4528 /* The fields are char but the variables are not; make sure the
4529 values fit in the fields. */
4530 gcc_assert (ptr->arch == ix86_arch);
4531 gcc_assert (ptr->schedule == ix86_schedule);
4532 gcc_assert (ptr->tune == ix86_tune);
4533 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4536 /* Restore the current options */
4539 ix86_function_specific_restore (struct gcc_options *opts,
4540 struct cl_target_option *ptr)
4542 enum processor_type old_tune = ix86_tune;
4543 enum processor_type old_arch = ix86_arch;
4544 unsigned int ix86_arch_mask;
4547 /* We don't change -fPIC. */
4548 opts->x_flag_pic = flag_pic;
4550 ix86_arch = (enum processor_type) ptr->arch;
4551 ix86_schedule = (enum attr_cpu) ptr->schedule;
4552 ix86_tune = (enum processor_type) ptr->tune;
4553 x86_prefetch_sse = ptr->prefetch_sse;
4554 opts->x_ix86_branch_cost = ptr->branch_cost;
4555 ix86_tune_defaulted = ptr->tune_defaulted;
4556 ix86_arch_specified = ptr->arch_specified;
4557 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4558 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4559 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4560 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4561 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4562 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4563 opts->x_ix86_abi = ptr->x_ix86_abi;
4564 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4565 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4566 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4567 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4568 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4569 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4570 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4571 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4572 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4573 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4574 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4575 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4576 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4577 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4578 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4579 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4580 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4581 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4582 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4583 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4584 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4585 /* TODO: ix86_cost should be chosen at instruction or function granuality
4586 so for cold code we use size_cost even in !optimize_size compilation. */
4587 if (opts->x_optimize_size)
4588 ix86_cost = &ix86_size_cost;
4590 ix86_cost = ix86_tune_cost;
4592 /* Recreate the arch feature tests if the arch changed */
4593 if (old_arch != ix86_arch)
4595 ix86_arch_mask = 1u << ix86_arch;
4596 for (i = 0; i < X86_ARCH_LAST; ++i)
4597 ix86_arch_features[i]
4598 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4601 /* Recreate the tune optimization tests */
4602 if (old_tune != ix86_tune)
4603 set_ix86_tune_features (ix86_tune, false);
4606 /* Adjust target options after streaming them in. This is mainly about
4607 reconciling them with global options. */
4610 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4612 /* flag_pic is a global option, but ix86_cmodel is target saved option
4613 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4614 for PIC, or error out. */
4616 switch (ptr->x_ix86_cmodel)
4619 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4623 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4627 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4631 error ("code model %s does not support PIC mode", "kernel");
4638 switch (ptr->x_ix86_cmodel)
4641 ptr->x_ix86_cmodel = CM_SMALL;
4645 ptr->x_ix86_cmodel = CM_MEDIUM;
4649 ptr->x_ix86_cmodel = CM_LARGE;
4657 /* Print the current options */
4660 ix86_function_specific_print (FILE *file, int indent,
4661 struct cl_target_option *ptr)
4664 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4665 NULL, NULL, ptr->x_ix86_fpmath, false);
4667 gcc_assert (ptr->arch < PROCESSOR_max);
4668 fprintf (file, "%*sarch = %d (%s)\n",
4670 ptr->arch, processor_target_table[ptr->arch].name);
4672 gcc_assert (ptr->tune < PROCESSOR_max);
4673 fprintf (file, "%*stune = %d (%s)\n",
4675 ptr->tune, processor_target_table[ptr->tune].name);
4677 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4681 fprintf (file, "%*s%s\n", indent, "", target_string);
4682 free (target_string);
4687 /* Inner function to process the attribute((target(...))), take an argument and
4688 set the current options from the argument. If we have a list, recursively go
4692 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4693 struct gcc_options *opts,
4694 struct gcc_options *opts_set,
4695 struct gcc_options *enum_opts_set)
4700 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4701 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4702 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4703 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4704 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4720 enum ix86_opt_type type;
4725 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4726 IX86_ATTR_ISA ("abm", OPT_mabm),
4727 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4728 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4729 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4730 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4731 IX86_ATTR_ISA ("aes", OPT_maes),
4732 IX86_ATTR_ISA ("sha", OPT_msha),
4733 IX86_ATTR_ISA ("avx", OPT_mavx),
4734 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4735 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4736 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4737 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4738 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4739 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4740 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4741 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4742 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4743 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4744 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4745 IX86_ATTR_ISA ("sse", OPT_msse),
4746 IX86_ATTR_ISA ("sse2", OPT_msse2),
4747 IX86_ATTR_ISA ("sse3", OPT_msse3),
4748 IX86_ATTR_ISA ("sse4", OPT_msse4),
4749 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4750 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4751 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4752 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4753 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4754 IX86_ATTR_ISA ("fma", OPT_mfma),
4755 IX86_ATTR_ISA ("xop", OPT_mxop),
4756 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4757 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4758 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4759 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4760 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4761 IX86_ATTR_ISA ("hle", OPT_mhle),
4762 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4763 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4764 IX86_ATTR_ISA ("adx", OPT_madx),
4765 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4766 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4767 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4768 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4769 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4770 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4771 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4772 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4773 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4774 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4775 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4776 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4779 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4781 /* string options */
4782 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4783 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4786 IX86_ATTR_YES ("cld",
4790 IX86_ATTR_NO ("fancy-math-387",
4791 OPT_mfancy_math_387,
4792 MASK_NO_FANCY_MATH_387),
4794 IX86_ATTR_YES ("ieee-fp",
4798 IX86_ATTR_YES ("inline-all-stringops",
4799 OPT_minline_all_stringops,
4800 MASK_INLINE_ALL_STRINGOPS),
4802 IX86_ATTR_YES ("inline-stringops-dynamically",
4803 OPT_minline_stringops_dynamically,
4804 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4806 IX86_ATTR_NO ("align-stringops",
4807 OPT_mno_align_stringops,
4808 MASK_NO_ALIGN_STRINGOPS),
4810 IX86_ATTR_YES ("recip",
4816 /* If this is a list, recurse to get the options. */
4817 if (TREE_CODE (args) == TREE_LIST)
4821 for (; args; args = TREE_CHAIN (args))
4822 if (TREE_VALUE (args)
4823 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4824 p_strings, opts, opts_set,
4831 else if (TREE_CODE (args) != STRING_CST)
4833 error ("attribute %<target%> argument not a string");
4837 /* Handle multiple arguments separated by commas. */
4838 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4840 while (next_optstr && *next_optstr != '\0')
4842 char *p = next_optstr;
4844 char *comma = strchr (next_optstr, ',');
4845 const char *opt_string;
4846 size_t len, opt_len;
4851 enum ix86_opt_type type = ix86_opt_unknown;
4857 len = comma - next_optstr;
4858 next_optstr = comma + 1;
4866 /* Recognize no-xxx. */
4867 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4876 /* Find the option. */
4879 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4881 type = attrs[i].type;
4882 opt_len = attrs[i].len;
4883 if (ch == attrs[i].string[0]
4884 && ((type != ix86_opt_str && type != ix86_opt_enum)
4887 && memcmp (p, attrs[i].string, opt_len) == 0)
4890 mask = attrs[i].mask;
4891 opt_string = attrs[i].string;
4896 /* Process the option. */
4899 error ("attribute(target(\"%s\")) is unknown", orig_p);
4903 else if (type == ix86_opt_isa)
4905 struct cl_decoded_option decoded;
4907 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4908 ix86_handle_option (opts, opts_set,
4909 &decoded, input_location);
4912 else if (type == ix86_opt_yes || type == ix86_opt_no)
4914 if (type == ix86_opt_no)
4915 opt_set_p = !opt_set_p;
4918 opts->x_target_flags |= mask;
4920 opts->x_target_flags &= ~mask;
4923 else if (type == ix86_opt_str)
4927 error ("option(\"%s\") was already specified", opt_string);
4931 p_strings[opt] = xstrdup (p + opt_len);
4934 else if (type == ix86_opt_enum)
4939 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4941 set_option (opts, enum_opts_set, opt, value,
4942 p + opt_len, DK_UNSPECIFIED, input_location,
4946 error ("attribute(target(\"%s\")) is unknown", orig_p);
4958 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4961 ix86_valid_target_attribute_tree (tree args,
4962 struct gcc_options *opts,
4963 struct gcc_options *opts_set)
4965 const char *orig_arch_string = opts->x_ix86_arch_string;
4966 const char *orig_tune_string = opts->x_ix86_tune_string;
4967 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4968 int orig_tune_defaulted = ix86_tune_defaulted;
4969 int orig_arch_specified = ix86_arch_specified;
4970 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4973 struct cl_target_option *def
4974 = TREE_TARGET_OPTION (target_option_default_node);
4975 struct gcc_options enum_opts_set;
4977 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4979 /* Process each of the options on the chain. */
4980 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4981 opts_set, &enum_opts_set))
4982 return error_mark_node;
4984 /* If the changed options are different from the default, rerun
4985 ix86_option_override_internal, and then save the options away.
4986 The string options are are attribute options, and will be undone
4987 when we copy the save structure. */
4988 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4989 || opts->x_target_flags != def->x_target_flags
4990 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4991 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4992 || enum_opts_set.x_ix86_fpmath)
4994 /* If we are using the default tune= or arch=, undo the string assigned,
4995 and use the default. */
4996 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4997 opts->x_ix86_arch_string
4998 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
4999 else if (!orig_arch_specified)
5000 opts->x_ix86_arch_string = NULL;
5002 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5003 opts->x_ix86_tune_string
5004 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5005 else if (orig_tune_defaulted)
5006 opts->x_ix86_tune_string = NULL;
5008 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5009 if (enum_opts_set.x_ix86_fpmath)
5010 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5011 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5012 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5014 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5015 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5018 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5019 ix86_option_override_internal (false, opts, opts_set);
5021 /* Add any builtin functions with the new isa if any. */
5022 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5024 /* Save the current options unless we are validating options for
5026 t = build_target_option_node (opts);
5028 opts->x_ix86_arch_string = orig_arch_string;
5029 opts->x_ix86_tune_string = orig_tune_string;
5030 opts_set->x_ix86_fpmath = orig_fpmath_set;
5032 /* Free up memory allocated to hold the strings */
5033 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5034 free (option_strings[i]);
5040 /* Hook to validate attribute((target("string"))). */
5043 ix86_valid_target_attribute_p (tree fndecl,
5044 tree ARG_UNUSED (name),
5046 int ARG_UNUSED (flags))
5048 struct gcc_options func_options;
5049 tree new_target, new_optimize;
5052 /* attribute((target("default"))) does nothing, beyond
5053 affecting multi-versioning. */
5054 if (TREE_VALUE (args)
5055 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5056 && TREE_CHAIN (args) == NULL_TREE
5057 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5060 tree old_optimize = build_optimization_node (&global_options);
5062 /* Get the optimization options of the current function. */
5063 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5066 func_optimize = old_optimize;
5068 /* Init func_options. */
5069 memset (&func_options, 0, sizeof (func_options));
5070 init_options_struct (&func_options, NULL);
5071 lang_hooks.init_options_struct (&func_options);
5073 cl_optimization_restore (&func_options,
5074 TREE_OPTIMIZATION (func_optimize));
5076 /* Initialize func_options to the default before its target options can
5078 cl_target_option_restore (&func_options,
5079 TREE_TARGET_OPTION (target_option_default_node));
5081 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5082 &global_options_set);
5084 new_optimize = build_optimization_node (&func_options);
5086 if (new_target == error_mark_node)
5089 else if (fndecl && new_target)
5091 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5093 if (old_optimize != new_optimize)
5094 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5101 /* Hook to determine if one function can safely inline another. */
5104 ix86_can_inline_p (tree caller, tree callee)
5107 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5108 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5110 /* If callee has no option attributes, then it is ok to inline. */
5114 /* If caller has no option attributes, but callee does then it is not ok to
5116 else if (!caller_tree)
5121 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5122 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5124 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5125 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5127 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5128 != callee_opts->x_ix86_isa_flags)
5131 /* See if we have the same non-isa options. */
5132 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5135 /* See if arch, tune, etc. are the same. */
5136 else if (caller_opts->arch != callee_opts->arch)
5139 else if (caller_opts->tune != callee_opts->tune)
5142 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5145 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5156 /* Remember the last target of ix86_set_current_function. */
5157 static GTY(()) tree ix86_previous_fndecl;
5159 /* Set targets globals to the default (or current #pragma GCC target
5160 if active). Invalidate ix86_previous_fndecl cache. */
5163 ix86_reset_previous_fndecl (void)
5165 tree new_tree = target_option_current_node;
5166 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5167 if (TREE_TARGET_GLOBALS (new_tree))
5168 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5169 else if (new_tree == target_option_default_node)
5170 restore_target_globals (&default_target_globals);
5172 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5173 ix86_previous_fndecl = NULL_TREE;
5176 /* Establish appropriate back-end context for processing the function
5177 FNDECL. The argument might be NULL to indicate processing at top
5178 level, outside of any function scope. */
5180 ix86_set_current_function (tree fndecl)
5182 /* Only change the context if the function changes. This hook is called
5183 several times in the course of compiling a function, and we don't want to
5184 slow things down too much or call target_reinit when it isn't safe. */
5185 if (fndecl == ix86_previous_fndecl)
5189 if (ix86_previous_fndecl == NULL_TREE)
5190 old_tree = target_option_current_node;
5191 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5192 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5194 old_tree = target_option_default_node;
5196 if (fndecl == NULL_TREE)
5198 if (old_tree != target_option_current_node)
5199 ix86_reset_previous_fndecl ();
5203 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5204 if (new_tree == NULL_TREE)
5205 new_tree = target_option_default_node;
5207 if (old_tree != new_tree)
5209 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5210 if (TREE_TARGET_GLOBALS (new_tree))
5211 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5212 else if (new_tree == target_option_default_node)
5213 restore_target_globals (&default_target_globals);
5215 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5217 ix86_previous_fndecl = fndecl;
5219 /* 64-bit MS and SYSV ABI have different set of call used registers.
5220 Avoid expensive re-initialization of init_regs each time we switch
5221 function context. */
5223 && (call_used_regs[SI_REG]
5224 == (cfun->machine->call_abi == MS_ABI)))
5229 /* Return true if this goes in large data/bss. */
5232 ix86_in_large_data_p (tree exp)
5234 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5237 /* Functions are never large data. */
5238 if (TREE_CODE (exp) == FUNCTION_DECL)
5241 /* Automatic variables are never large data. */
5242 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5245 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5247 const char *section = DECL_SECTION_NAME (exp);
5248 if (strcmp (section, ".ldata") == 0
5249 || strcmp (section, ".lbss") == 0)
5255 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5257 /* If this is an incomplete type with size 0, then we can't put it
5258 in data because it might be too big when completed. Also,
5259 int_size_in_bytes returns -1 if size can vary or is larger than
5260 an integer in which case also it is safer to assume that it goes in
5262 if (size <= 0 || size > ix86_section_threshold)
5269 /* Switch to the appropriate section for output of DECL.
5270 DECL is either a `VAR_DECL' node or a constant of some sort.
5271 RELOC indicates whether forming the initial value of DECL requires
5272 link-time relocations. */
5274 ATTRIBUTE_UNUSED static section *
5275 x86_64_elf_select_section (tree decl, int reloc,
5276 unsigned HOST_WIDE_INT align)
5278 if (ix86_in_large_data_p (decl))
5280 const char *sname = NULL;
5281 unsigned int flags = SECTION_WRITE;
5282 switch (categorize_decl_for_section (decl, reloc))
5287 case SECCAT_DATA_REL:
5288 sname = ".ldata.rel";
5290 case SECCAT_DATA_REL_LOCAL:
5291 sname = ".ldata.rel.local";
5293 case SECCAT_DATA_REL_RO:
5294 sname = ".ldata.rel.ro";
5296 case SECCAT_DATA_REL_RO_LOCAL:
5297 sname = ".ldata.rel.ro.local";
5301 flags |= SECTION_BSS;
5304 case SECCAT_RODATA_MERGE_STR:
5305 case SECCAT_RODATA_MERGE_STR_INIT:
5306 case SECCAT_RODATA_MERGE_CONST:
5310 case SECCAT_SRODATA:
5317 /* We don't split these for medium model. Place them into
5318 default sections and hope for best. */
5323 /* We might get called with string constants, but get_named_section
5324 doesn't like them as they are not DECLs. Also, we need to set
5325 flags in that case. */
5327 return get_section (sname, flags, NULL);
5328 return get_named_section (decl, sname, reloc);
5331 return default_elf_select_section (decl, reloc, align);
5334 /* Select a set of attributes for section NAME based on the properties
5335 of DECL and whether or not RELOC indicates that DECL's initializer
5336 might contain runtime relocations. */
5338 static unsigned int ATTRIBUTE_UNUSED
5339 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5341 unsigned int flags = default_section_type_flags (decl, name, reloc);
5343 if (decl == NULL_TREE
5344 && (strcmp (name, ".ldata.rel.ro") == 0
5345 || strcmp (name, ".ldata.rel.ro.local") == 0))
5346 flags |= SECTION_RELRO;
5348 if (strcmp (name, ".lbss") == 0
5349 || strncmp (name, ".lbss.", 5) == 0
5350 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5351 flags |= SECTION_BSS;
5356 /* Build up a unique section name, expressed as a
5357 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5358 RELOC indicates whether the initial value of EXP requires
5359 link-time relocations. */
5361 static void ATTRIBUTE_UNUSED
5362 x86_64_elf_unique_section (tree decl, int reloc)
5364 if (ix86_in_large_data_p (decl))
5366 const char *prefix = NULL;
5367 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5368 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5370 switch (categorize_decl_for_section (decl, reloc))
5373 case SECCAT_DATA_REL:
5374 case SECCAT_DATA_REL_LOCAL:
5375 case SECCAT_DATA_REL_RO:
5376 case SECCAT_DATA_REL_RO_LOCAL:
5377 prefix = one_only ? ".ld" : ".ldata";
5380 prefix = one_only ? ".lb" : ".lbss";
5383 case SECCAT_RODATA_MERGE_STR:
5384 case SECCAT_RODATA_MERGE_STR_INIT:
5385 case SECCAT_RODATA_MERGE_CONST:
5386 prefix = one_only ? ".lr" : ".lrodata";
5388 case SECCAT_SRODATA:
5395 /* We don't split these for medium model. Place them into
5396 default sections and hope for best. */
5401 const char *name, *linkonce;
5404 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5405 name = targetm.strip_name_encoding (name);
5407 /* If we're using one_only, then there needs to be a .gnu.linkonce
5408 prefix to the section name. */
5409 linkonce = one_only ? ".gnu.linkonce" : "";
5411 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5413 set_decl_section_name (decl, string);
5417 default_unique_section (decl, reloc);
5420 #ifdef COMMON_ASM_OP
5421 /* This says how to output assembler code to declare an
5422 uninitialized external linkage data object.
5424 For medium model x86-64 we need to use .largecomm opcode for
5427 x86_elf_aligned_common (FILE *file,
5428 const char *name, unsigned HOST_WIDE_INT size,
5431 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5432 && size > (unsigned int)ix86_section_threshold)
5433 fputs ("\t.largecomm\t", file);
5435 fputs (COMMON_ASM_OP, file);
5436 assemble_name (file, name);
5437 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5438 size, align / BITS_PER_UNIT);
5442 /* Utility function for targets to use in implementing
5443 ASM_OUTPUT_ALIGNED_BSS. */
5446 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5447 unsigned HOST_WIDE_INT size, int align)
5449 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5450 && size > (unsigned int)ix86_section_threshold)
5451 switch_to_section (get_named_section (decl, ".lbss", 0));
5453 switch_to_section (bss_section);
5454 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5455 #ifdef ASM_DECLARE_OBJECT_NAME
5456 last_assemble_variable_decl = decl;
5457 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5459 /* Standard thing is just output label for the object. */
5460 ASM_OUTPUT_LABEL (file, name);
5461 #endif /* ASM_DECLARE_OBJECT_NAME */
5462 ASM_OUTPUT_SKIP (file, size ? size : 1);
5465 /* Decide whether we must probe the stack before any space allocation
5466 on this target. It's essentially TARGET_STACK_PROBE except when
5467 -fstack-check causes the stack to be already probed differently. */
5470 ix86_target_stack_probe (void)
5472 /* Do not probe the stack twice if static stack checking is enabled. */
5473 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5476 return TARGET_STACK_PROBE;
5479 /* Decide whether we can make a sibling call to a function. DECL is the
5480 declaration of the function being targeted by the call and EXP is the
5481 CALL_EXPR representing the call. */
5484 ix86_function_ok_for_sibcall (tree decl, tree exp)
5486 tree type, decl_or_type;
5489 /* If we are generating position-independent code, we cannot sibcall
5490 optimize any indirect call, or a direct call to a global function,
5491 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5495 && (!decl || !targetm.binds_local_p (decl)))
5498 /* If we need to align the outgoing stack, then sibcalling would
5499 unalign the stack, which may break the called function. */
5500 if (ix86_minimum_incoming_stack_boundary (true)
5501 < PREFERRED_STACK_BOUNDARY)
5506 decl_or_type = decl;
5507 type = TREE_TYPE (decl);
5511 /* We're looking at the CALL_EXPR, we need the type of the function. */
5512 type = CALL_EXPR_FN (exp); /* pointer expression */
5513 type = TREE_TYPE (type); /* pointer type */
5514 type = TREE_TYPE (type); /* function type */
5515 decl_or_type = type;
5518 /* Check that the return value locations are the same. Like
5519 if we are returning floats on the 80387 register stack, we cannot
5520 make a sibcall from a function that doesn't return a float to a
5521 function that does or, conversely, from a function that does return
5522 a float to a function that doesn't; the necessary stack adjustment
5523 would not be executed. This is also the place we notice
5524 differences in the return value ABI. Note that it is ok for one
5525 of the functions to have void return type as long as the return
5526 value of the other is passed in a register. */
5527 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5528 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5530 if (STACK_REG_P (a) || STACK_REG_P (b))
5532 if (!rtx_equal_p (a, b))
5535 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5537 else if (!rtx_equal_p (a, b))
5542 /* The SYSV ABI has more call-clobbered registers;
5543 disallow sibcalls from MS to SYSV. */
5544 if (cfun->machine->call_abi == MS_ABI
5545 && ix86_function_type_abi (type) == SYSV_ABI)
5550 /* If this call is indirect, we'll need to be able to use a
5551 call-clobbered register for the address of the target function.
5552 Make sure that all such registers are not used for passing
5553 parameters. Note that DLLIMPORT functions are indirect. */
5555 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5557 if (ix86_function_regparm (type, NULL) >= 3)
5559 /* ??? Need to count the actual number of registers to be used,
5560 not the possible number of registers. Fix later. */
5566 /* Otherwise okay. That also includes certain types of indirect calls. */
5570 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5571 and "sseregparm" calling convention attributes;
5572 arguments as in struct attribute_spec.handler. */
5575 ix86_handle_cconv_attribute (tree *node, tree name,
5580 if (TREE_CODE (*node) != FUNCTION_TYPE
5581 && TREE_CODE (*node) != METHOD_TYPE
5582 && TREE_CODE (*node) != FIELD_DECL
5583 && TREE_CODE (*node) != TYPE_DECL)
5585 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5587 *no_add_attrs = true;
5591 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5592 if (is_attribute_p ("regparm", name))
5596 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5598 error ("fastcall and regparm attributes are not compatible");
5601 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5603 error ("regparam and thiscall attributes are not compatible");
5606 cst = TREE_VALUE (args);
5607 if (TREE_CODE (cst) != INTEGER_CST)
5609 warning (OPT_Wattributes,
5610 "%qE attribute requires an integer constant argument",
5612 *no_add_attrs = true;
5614 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5616 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5618 *no_add_attrs = true;
5626 /* Do not warn when emulating the MS ABI. */
5627 if ((TREE_CODE (*node) != FUNCTION_TYPE
5628 && TREE_CODE (*node) != METHOD_TYPE)
5629 || ix86_function_type_abi (*node) != MS_ABI)
5630 warning (OPT_Wattributes, "%qE attribute ignored",
5632 *no_add_attrs = true;
5636 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5637 if (is_attribute_p ("fastcall", name))
5639 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5641 error ("fastcall and cdecl attributes are not compatible");
5643 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5645 error ("fastcall and stdcall attributes are not compatible");
5647 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5649 error ("fastcall and regparm attributes are not compatible");
5651 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5653 error ("fastcall and thiscall attributes are not compatible");
5657 /* Can combine stdcall with fastcall (redundant), regparm and
5659 else if (is_attribute_p ("stdcall", name))
5661 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5663 error ("stdcall and cdecl attributes are not compatible");
5665 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5667 error ("stdcall and fastcall attributes are not compatible");
5669 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5671 error ("stdcall and thiscall attributes are not compatible");
5675 /* Can combine cdecl with regparm and sseregparm. */
5676 else if (is_attribute_p ("cdecl", name))
5678 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5680 error ("stdcall and cdecl attributes are not compatible");
5682 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5684 error ("fastcall and cdecl attributes are not compatible");
5686 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5688 error ("cdecl and thiscall attributes are not compatible");
5691 else if (is_attribute_p ("thiscall", name))
5693 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5694 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5696 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5698 error ("stdcall and thiscall attributes are not compatible");
5700 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5702 error ("fastcall and thiscall attributes are not compatible");
5704 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5706 error ("cdecl and thiscall attributes are not compatible");
5710 /* Can combine sseregparm with all attributes. */
5715 /* The transactional memory builtins are implicitly regparm or fastcall
5716 depending on the ABI. Override the generic do-nothing attribute that
5717 these builtins were declared with, and replace it with one of the two
5718 attributes that we expect elsewhere. */
5721 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5722 int flags, bool *no_add_attrs)
5726 /* In no case do we want to add the placeholder attribute. */
5727 *no_add_attrs = true;
5729 /* The 64-bit ABI is unchanged for transactional memory. */
5733 /* ??? Is there a better way to validate 32-bit windows? We have
5734 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5735 if (CHECK_STACK_LIMIT > 0)
5736 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5739 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5740 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5742 decl_attributes (node, alt, flags);
5747 /* This function determines from TYPE the calling-convention. */
5750 ix86_get_callcvt (const_tree type)
5752 unsigned int ret = 0;
5757 return IX86_CALLCVT_CDECL;
5759 attrs = TYPE_ATTRIBUTES (type);
5760 if (attrs != NULL_TREE)
5762 if (lookup_attribute ("cdecl", attrs))
5763 ret |= IX86_CALLCVT_CDECL;
5764 else if (lookup_attribute ("stdcall", attrs))
5765 ret |= IX86_CALLCVT_STDCALL;
5766 else if (lookup_attribute ("fastcall", attrs))
5767 ret |= IX86_CALLCVT_FASTCALL;
5768 else if (lookup_attribute ("thiscall", attrs))
5769 ret |= IX86_CALLCVT_THISCALL;
5771 /* Regparam isn't allowed for thiscall and fastcall. */
5772 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5774 if (lookup_attribute ("regparm", attrs))
5775 ret |= IX86_CALLCVT_REGPARM;
5776 if (lookup_attribute ("sseregparm", attrs))
5777 ret |= IX86_CALLCVT_SSEREGPARM;
5780 if (IX86_BASE_CALLCVT(ret) != 0)
5784 is_stdarg = stdarg_p (type);
5785 if (TARGET_RTD && !is_stdarg)
5786 return IX86_CALLCVT_STDCALL | ret;
5790 || TREE_CODE (type) != METHOD_TYPE
5791 || ix86_function_type_abi (type) != MS_ABI)
5792 return IX86_CALLCVT_CDECL | ret;
5794 return IX86_CALLCVT_THISCALL;
5797 /* Return 0 if the attributes for two types are incompatible, 1 if they
5798 are compatible, and 2 if they are nearly compatible (which causes a
5799 warning to be generated). */
5802 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5804 unsigned int ccvt1, ccvt2;
5806 if (TREE_CODE (type1) != FUNCTION_TYPE
5807 && TREE_CODE (type1) != METHOD_TYPE)
5810 ccvt1 = ix86_get_callcvt (type1);
5811 ccvt2 = ix86_get_callcvt (type2);
5814 if (ix86_function_regparm (type1, NULL)
5815 != ix86_function_regparm (type2, NULL))
5821 /* Return the regparm value for a function with the indicated TYPE and DECL.
5822 DECL may be NULL when calling function indirectly
5823 or considering a libcall. */
5826 ix86_function_regparm (const_tree type, const_tree decl)
5833 return (ix86_function_type_abi (type) == SYSV_ABI
5834 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5835 ccvt = ix86_get_callcvt (type);
5836 regparm = ix86_regparm;
5838 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5840 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5843 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5847 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5849 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5852 /* Use register calling convention for local functions when possible. */
5854 && TREE_CODE (decl) == FUNCTION_DECL)
5856 cgraph_node *target = cgraph_node::get (decl);
5858 target = target->function_symbol ();
5860 /* Caller and callee must agree on the calling convention, so
5861 checking here just optimize means that with
5862 __attribute__((optimize (...))) caller could use regparm convention
5863 and callee not, or vice versa. Instead look at whether the callee
5864 is optimized or not. */
5865 if (target && opt_for_fn (target->decl, optimize)
5866 && !(profile_flag && !flag_fentry))
5868 cgraph_local_info *i = &target->local;
5869 if (i && i->local && i->can_change_signature)
5871 int local_regparm, globals = 0, regno;
5873 /* Make sure no regparm register is taken by a
5874 fixed register variable. */
5875 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5877 if (fixed_regs[local_regparm])
5880 /* We don't want to use regparm(3) for nested functions as
5881 these use a static chain pointer in the third argument. */
5882 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5885 /* Save a register for the split stack. */
5886 if (local_regparm == 3 && flag_split_stack)
5889 /* Each fixed register usage increases register pressure,
5890 so less registers should be used for argument passing.
5891 This functionality can be overriden by an explicit
5893 for (regno = AX_REG; regno <= DI_REG; regno++)
5894 if (fixed_regs[regno])
5898 = globals < local_regparm ? local_regparm - globals : 0;
5900 if (local_regparm > regparm)
5901 regparm = local_regparm;
5909 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5910 DFmode (2) arguments in SSE registers for a function with the
5911 indicated TYPE and DECL. DECL may be NULL when calling function
5912 indirectly or considering a libcall. Return -1 if any FP parameter
5913 should be rejected by error. This is used in siutation we imply SSE
5914 calling convetion but the function is called from another function with
5915 SSE disabled. Otherwise return 0. */
5918 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5920 gcc_assert (!TARGET_64BIT);
5922 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5923 by the sseregparm attribute. */
5924 if (TARGET_SSEREGPARM
5925 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5932 error ("calling %qD with attribute sseregparm without "
5933 "SSE/SSE2 enabled", decl);
5935 error ("calling %qT with attribute sseregparm without "
5936 "SSE/SSE2 enabled", type);
5947 cgraph_node *target = cgraph_node::get (decl);
5949 target = target->function_symbol ();
5951 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5952 (and DFmode for SSE2) arguments in SSE registers. */
5954 /* TARGET_SSE_MATH */
5955 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5956 && opt_for_fn (target->decl, optimize)
5957 && !(profile_flag && !flag_fentry))
5959 cgraph_local_info *i = &target->local;
5960 if (i && i->local && i->can_change_signature)
5962 /* Refuse to produce wrong code when local function with SSE enabled
5963 is called from SSE disabled function.
5964 FIXME: We need a way to detect these cases cross-ltrans partition
5965 and avoid using SSE calling conventions on local functions called
5966 from function with SSE disabled. For now at least delay the
5967 warning until we know we are going to produce wrong code.
5969 if (!TARGET_SSE && warn)
5971 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5972 ->x_ix86_isa_flags) ? 2 : 1;
5979 /* Return true if EAX is live at the start of the function. Used by
5980 ix86_expand_prologue to determine if we need special help before
5981 calling allocate_stack_worker. */
5984 ix86_eax_live_at_start_p (void)
5986 /* Cheat. Don't bother working forward from ix86_function_regparm
5987 to the function type to whether an actual argument is located in
5988 eax. Instead just look at cfg info, which is still close enough
5989 to correct at this point. This gives false positives for broken
5990 functions that might use uninitialized data that happens to be
5991 allocated in eax, but who cares? */
5992 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5996 ix86_keep_aggregate_return_pointer (tree fntype)
6002 attr = lookup_attribute ("callee_pop_aggregate_return",
6003 TYPE_ATTRIBUTES (fntype));
6005 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6007 /* For 32-bit MS-ABI the default is to keep aggregate
6009 if (ix86_function_type_abi (fntype) == MS_ABI)
6012 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6015 /* Value is the number of bytes of arguments automatically
6016 popped when returning from a subroutine call.
6017 FUNDECL is the declaration node of the function (as a tree),
6018 FUNTYPE is the data type of the function (as a tree),
6019 or for a library call it is an identifier node for the subroutine name.
6020 SIZE is the number of bytes of arguments passed on the stack.
6022 On the 80386, the RTD insn may be used to pop them if the number
6023 of args is fixed, but if the number is variable then the caller
6024 must pop them all. RTD can't be used for library calls now
6025 because the library is compiled with the Unix compiler.
6026 Use of RTD is a selectable option, since it is incompatible with
6027 standard Unix calling sequences. If the option is not selected,
6028 the caller must always pop the args.
6030 The attribute stdcall is equivalent to RTD on a per module basis. */
6033 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6037 /* None of the 64-bit ABIs pop arguments. */
6041 ccvt = ix86_get_callcvt (funtype);
6043 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6044 | IX86_CALLCVT_THISCALL)) != 0
6045 && ! stdarg_p (funtype))
6048 /* Lose any fake structure return argument if it is passed on the stack. */
6049 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6050 && !ix86_keep_aggregate_return_pointer (funtype))
6052 int nregs = ix86_function_regparm (funtype, fundecl);
6054 return GET_MODE_SIZE (Pmode);
6060 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6063 ix86_legitimate_combined_insn (rtx_insn *insn)
6065 /* Check operand constraints in case hard registers were propagated
6066 into insn pattern. This check prevents combine pass from
6067 generating insn patterns with invalid hard register operands.
6068 These invalid insns can eventually confuse reload to error out
6069 with a spill failure. See also PRs 46829 and 46843. */
6070 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6074 extract_insn (insn);
6075 preprocess_constraints (insn);
6077 int n_operands = recog_data.n_operands;
6078 int n_alternatives = recog_data.n_alternatives;
6079 for (i = 0; i < n_operands; i++)
6081 rtx op = recog_data.operand[i];
6082 machine_mode mode = GET_MODE (op);
6083 const operand_alternative *op_alt;
6088 /* For pre-AVX disallow unaligned loads/stores where the
6089 instructions don't support it. */
6091 && VECTOR_MODE_P (GET_MODE (op))
6092 && misaligned_operand (op, GET_MODE (op)))
6094 int min_align = get_attr_ssememalign (insn);
6099 /* A unary operator may be accepted by the predicate, but it
6100 is irrelevant for matching constraints. */
6104 if (GET_CODE (op) == SUBREG)
6106 if (REG_P (SUBREG_REG (op))
6107 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6108 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6109 GET_MODE (SUBREG_REG (op)),
6112 op = SUBREG_REG (op);
6115 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6118 op_alt = recog_op_alt;
6120 /* Operand has no constraints, anything is OK. */
6121 win = !n_alternatives;
6123 alternative_mask preferred = get_preferred_alternatives (insn);
6124 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6126 if (!TEST_BIT (preferred, j))
6128 if (op_alt[i].anything_ok
6129 || (op_alt[i].matches != -1
6131 (recog_data.operand[i],
6132 recog_data.operand[op_alt[i].matches]))
6133 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6148 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6150 static unsigned HOST_WIDE_INT
6151 ix86_asan_shadow_offset (void)
6153 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6154 : HOST_WIDE_INT_C (0x7fff8000))
6155 : (HOST_WIDE_INT_1 << 29);
6158 /* Argument support functions. */
6160 /* Return true when register may be used to pass function parameters. */
6162 ix86_function_arg_regno_p (int regno)
6165 enum calling_abi call_abi;
6166 const int *parm_regs;
6168 if (TARGET_MPX && BND_REGNO_P (regno))
6174 return (regno < REGPARM_MAX
6175 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6177 return (regno < REGPARM_MAX
6178 || (TARGET_MMX && MMX_REGNO_P (regno)
6179 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6180 || (TARGET_SSE && SSE_REGNO_P (regno)
6181 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6184 if (TARGET_SSE && SSE_REGNO_P (regno)
6185 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6188 /* TODO: The function should depend on current function ABI but
6189 builtins.c would need updating then. Therefore we use the
6191 call_abi = ix86_cfun_abi ();
6193 /* RAX is used as hidden argument to va_arg functions. */
6194 if (call_abi == SYSV_ABI && regno == AX_REG)
6197 if (call_abi == MS_ABI)
6198 parm_regs = x86_64_ms_abi_int_parameter_registers;
6200 parm_regs = x86_64_int_parameter_registers;
6202 for (i = 0; i < (call_abi == MS_ABI
6203 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6204 if (regno == parm_regs[i])
6209 /* Return if we do not know how to pass TYPE solely in registers. */
6212 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6214 if (must_pass_in_stack_var_size_or_pad (mode, type))
6217 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6218 The layout_type routine is crafty and tries to trick us into passing
6219 currently unsupported vector types on the stack by using TImode. */
6220 return (!TARGET_64BIT && mode == TImode
6221 && type && TREE_CODE (type) != VECTOR_TYPE);
6224 /* It returns the size, in bytes, of the area reserved for arguments passed
6225 in registers for the function represented by fndecl dependent to the used
6228 ix86_reg_parm_stack_space (const_tree fndecl)
6230 enum calling_abi call_abi = SYSV_ABI;
6231 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6232 call_abi = ix86_function_abi (fndecl);
6234 call_abi = ix86_function_type_abi (fndecl);
6235 if (TARGET_64BIT && call_abi == MS_ABI)
6240 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6243 ix86_function_type_abi (const_tree fntype)
6245 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6247 enum calling_abi abi = ix86_abi;
6248 if (abi == SYSV_ABI)
6250 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6254 static bool warned = false;
6257 error ("X32 does not support ms_abi attribute");
6264 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6271 /* We add this as a workaround in order to use libc_has_function
6274 ix86_libc_has_function (enum function_class fn_class)
6276 return targetm.libc_has_function (fn_class);
6280 ix86_function_ms_hook_prologue (const_tree fn)
6282 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6284 if (decl_function_context (fn) != NULL_TREE)
6285 error_at (DECL_SOURCE_LOCATION (fn),
6286 "ms_hook_prologue is not compatible with nested function");
6293 static enum calling_abi
6294 ix86_function_abi (const_tree fndecl)
6298 return ix86_function_type_abi (TREE_TYPE (fndecl));
6301 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6304 ix86_cfun_abi (void)
6308 return cfun->machine->call_abi;
6311 /* Write the extra assembler code needed to declare a function properly. */
6314 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6317 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6321 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6322 unsigned int filler_cc = 0xcccccccc;
6324 for (i = 0; i < filler_count; i += 4)
6325 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6328 #ifdef SUBTARGET_ASM_UNWIND_INIT
6329 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6332 ASM_OUTPUT_LABEL (asm_out_file, fname);
6334 /* Output magic byte marker, if hot-patch attribute is set. */
6339 /* leaq [%rsp + 0], %rsp */
6340 asm_fprintf (asm_out_file, ASM_BYTE
6341 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6345 /* movl.s %edi, %edi
6347 movl.s %esp, %ebp */
6348 asm_fprintf (asm_out_file, ASM_BYTE
6349 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6355 extern void init_regs (void);
6357 /* Implementation of call abi switching target hook. Specific to FNDECL
6358 the specific call register sets are set. See also
6359 ix86_conditional_register_usage for more details. */
6361 ix86_call_abi_override (const_tree fndecl)
6363 if (fndecl == NULL_TREE)
6364 cfun->machine->call_abi = ix86_abi;
6366 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6369 /* Return 1 if pseudo register should be created and used to hold
6370 GOT address for PIC code. */
6372 ix86_use_pseudo_pic_reg (void)
6375 && (ix86_cmodel == CM_SMALL_PIC
6382 /* Initialize large model PIC register. */
6385 ix86_init_large_pic_reg (unsigned int tmp_regno)
6387 rtx_code_label *label;
6390 gcc_assert (Pmode == DImode);
6391 label = gen_label_rtx ();
6393 LABEL_PRESERVE_P (label) = 1;
6394 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6395 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6396 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6398 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6399 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6400 pic_offset_table_rtx, tmp_reg));
6403 /* Create and initialize PIC register if required. */
6405 ix86_init_pic_reg (void)
6410 if (!ix86_use_pseudo_pic_reg ())
6417 if (ix86_cmodel == CM_LARGE_PIC)
6418 ix86_init_large_pic_reg (R11_REG);
6420 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6424 /* If there is future mcount call in the function it is more profitable
6425 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6426 rtx reg = crtl->profile
6427 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6428 : pic_offset_table_rtx;
6429 rtx insn = emit_insn (gen_set_got (reg));
6430 RTX_FRAME_RELATED_P (insn) = 1;
6432 emit_move_insn (pic_offset_table_rtx, reg);
6433 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6439 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6440 insert_insn_on_edge (seq, entry_edge);
6441 commit_one_edge_insertion (entry_edge);
6444 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6445 for a call to a function whose data type is FNTYPE.
6446 For a library call, FNTYPE is 0. */
6449 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6450 tree fntype, /* tree ptr for function decl */
6451 rtx libname, /* SYMBOL_REF of library name or 0 */
6455 struct cgraph_local_info *i = NULL;
6456 struct cgraph_node *target = NULL;
6458 memset (cum, 0, sizeof (*cum));
6462 target = cgraph_node::get (fndecl);
6465 target = target->function_symbol ();
6466 i = cgraph_node::local_info (target->decl);
6467 cum->call_abi = ix86_function_abi (target->decl);
6470 cum->call_abi = ix86_function_abi (fndecl);
6473 cum->call_abi = ix86_function_type_abi (fntype);
6475 cum->caller = caller;
6477 /* Set up the number of registers to use for passing arguments. */
6478 cum->nregs = ix86_regparm;
6481 cum->nregs = (cum->call_abi == SYSV_ABI
6482 ? X86_64_REGPARM_MAX
6483 : X86_64_MS_REGPARM_MAX);
6487 cum->sse_nregs = SSE_REGPARM_MAX;
6490 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6491 ? X86_64_SSE_REGPARM_MAX
6492 : X86_64_MS_SSE_REGPARM_MAX);
6496 cum->mmx_nregs = MMX_REGPARM_MAX;
6497 cum->warn_avx512f = true;
6498 cum->warn_avx = true;
6499 cum->warn_sse = true;
6500 cum->warn_mmx = true;
6502 /* Because type might mismatch in between caller and callee, we need to
6503 use actual type of function for local calls.
6504 FIXME: cgraph_analyze can be told to actually record if function uses
6505 va_start so for local functions maybe_vaarg can be made aggressive
6507 FIXME: once typesytem is fixed, we won't need this code anymore. */
6508 if (i && i->local && i->can_change_signature)
6509 fntype = TREE_TYPE (target->decl);
6510 cum->stdarg = stdarg_p (fntype);
6511 cum->maybe_vaarg = (fntype
6512 ? (!prototype_p (fntype) || stdarg_p (fntype))
6515 cum->bnd_regno = FIRST_BND_REG;
6516 cum->bnds_in_bt = 0;
6517 cum->force_bnd_pass = 0;
6522 /* If there are variable arguments, then we won't pass anything
6523 in registers in 32-bit mode. */
6524 if (stdarg_p (fntype))
6529 cum->warn_avx512f = false;
6530 cum->warn_avx = false;
6531 cum->warn_sse = false;
6532 cum->warn_mmx = false;
6536 /* Use ecx and edx registers if function has fastcall attribute,
6537 else look for regparm information. */
6540 unsigned int ccvt = ix86_get_callcvt (fntype);
6541 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6544 cum->fastcall = 1; /* Same first register as in fastcall. */
6546 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6552 cum->nregs = ix86_function_regparm (fntype, fndecl);
6555 /* Set up the number of SSE registers used for passing SFmode
6556 and DFmode arguments. Warn for mismatching ABI. */
6557 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6561 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6562 But in the case of vector types, it is some vector mode.
6564 When we have only some of our vector isa extensions enabled, then there
6565 are some modes for which vector_mode_supported_p is false. For these
6566 modes, the generic vector support in gcc will choose some non-vector mode
6567 in order to implement the type. By computing the natural mode, we'll
6568 select the proper ABI location for the operand and not depend on whatever
6569 the middle-end decides to do with these vector types.
6571 The midde-end can't deal with the vector types > 16 bytes. In this
6572 case, we return the original mode and warn ABI change if CUM isn't
6575 If INT_RETURN is true, warn ABI change if the vector mode isn't
6576 available for function return value. */
6579 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6582 machine_mode mode = TYPE_MODE (type);
6584 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6586 HOST_WIDE_INT size = int_size_in_bytes (type);
6587 if ((size == 8 || size == 16 || size == 32 || size == 64)
6588 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6589 && TYPE_VECTOR_SUBPARTS (type) > 1)
6591 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6593 /* There are no XFmode vector modes. */
6594 if (innermode == XFmode)
6597 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6598 mode = MIN_MODE_VECTOR_FLOAT;
6600 mode = MIN_MODE_VECTOR_INT;
6602 /* Get the mode which has this inner mode and number of units. */
6603 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6604 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6605 && GET_MODE_INNER (mode) == innermode)
6607 if (size == 64 && !TARGET_AVX512F)
6609 static bool warnedavx512f;
6610 static bool warnedavx512f_ret;
6612 if (cum && cum->warn_avx512f && !warnedavx512f)
6614 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6615 "without AVX512F enabled changes the ABI"))
6616 warnedavx512f = true;
6618 else if (in_return && !warnedavx512f_ret)
6620 if (warning (OPT_Wpsabi, "AVX512F vector return "
6621 "without AVX512F enabled changes the ABI"))
6622 warnedavx512f_ret = true;
6625 return TYPE_MODE (type);
6627 else if (size == 32 && !TARGET_AVX)
6629 static bool warnedavx;
6630 static bool warnedavx_ret;
6632 if (cum && cum->warn_avx && !warnedavx)
6634 if (warning (OPT_Wpsabi, "AVX vector argument "
6635 "without AVX enabled changes the ABI"))
6638 else if (in_return && !warnedavx_ret)
6640 if (warning (OPT_Wpsabi, "AVX vector return "
6641 "without AVX enabled changes the ABI"))
6642 warnedavx_ret = true;
6645 return TYPE_MODE (type);
6647 else if (((size == 8 && TARGET_64BIT) || size == 16)
6650 static bool warnedsse;
6651 static bool warnedsse_ret;
6653 if (cum && cum->warn_sse && !warnedsse)
6655 if (warning (OPT_Wpsabi, "SSE vector argument "
6656 "without SSE enabled changes the ABI"))
6659 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6661 if (warning (OPT_Wpsabi, "SSE vector return "
6662 "without SSE enabled changes the ABI"))
6663 warnedsse_ret = true;
6666 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6668 static bool warnedmmx;
6669 static bool warnedmmx_ret;
6671 if (cum && cum->warn_mmx && !warnedmmx)
6673 if (warning (OPT_Wpsabi, "MMX vector argument "
6674 "without MMX enabled changes the ABI"))
6677 else if (in_return && !warnedmmx_ret)
6679 if (warning (OPT_Wpsabi, "MMX vector return "
6680 "without MMX enabled changes the ABI"))
6681 warnedmmx_ret = true;
6694 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6695 this may not agree with the mode that the type system has chosen for the
6696 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6697 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6700 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6705 if (orig_mode != BLKmode)
6706 tmp = gen_rtx_REG (orig_mode, regno);
6709 tmp = gen_rtx_REG (mode, regno);
6710 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6711 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6717 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6718 of this code is to classify each 8bytes of incoming argument by the register
6719 class and assign registers accordingly. */
6721 /* Return the union class of CLASS1 and CLASS2.
6722 See the x86-64 PS ABI for details. */
6724 static enum x86_64_reg_class
6725 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6727 /* Rule #1: If both classes are equal, this is the resulting class. */
6728 if (class1 == class2)
6731 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6733 if (class1 == X86_64_NO_CLASS)
6735 if (class2 == X86_64_NO_CLASS)
6738 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6739 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6740 return X86_64_MEMORY_CLASS;
6742 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6743 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6744 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6745 return X86_64_INTEGERSI_CLASS;
6746 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6747 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6748 return X86_64_INTEGER_CLASS;
6750 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6752 if (class1 == X86_64_X87_CLASS
6753 || class1 == X86_64_X87UP_CLASS
6754 || class1 == X86_64_COMPLEX_X87_CLASS
6755 || class2 == X86_64_X87_CLASS
6756 || class2 == X86_64_X87UP_CLASS
6757 || class2 == X86_64_COMPLEX_X87_CLASS)
6758 return X86_64_MEMORY_CLASS;
6760 /* Rule #6: Otherwise class SSE is used. */
6761 return X86_64_SSE_CLASS;
6764 /* Classify the argument of type TYPE and mode MODE.
6765 CLASSES will be filled by the register class used to pass each word
6766 of the operand. The number of words is returned. In case the parameter
6767 should be passed in memory, 0 is returned. As a special case for zero
6768 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6770 BIT_OFFSET is used internally for handling records and specifies offset
6771 of the offset in bits modulo 512 to avoid overflow cases.
6773 See the x86-64 PS ABI for details.
6777 classify_argument (machine_mode mode, const_tree type,
6778 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6780 HOST_WIDE_INT bytes =
6781 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6783 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6785 /* Variable sized entities are always passed/returned in memory. */
6789 if (mode != VOIDmode
6790 && targetm.calls.must_pass_in_stack (mode, type))
6793 if (type && AGGREGATE_TYPE_P (type))
6797 enum x86_64_reg_class subclasses[MAX_CLASSES];
6799 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6803 for (i = 0; i < words; i++)
6804 classes[i] = X86_64_NO_CLASS;
6806 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6807 signalize memory class, so handle it as special case. */
6810 classes[0] = X86_64_NO_CLASS;
6814 /* Classify each field of record and merge classes. */
6815 switch (TREE_CODE (type))
6818 /* And now merge the fields of structure. */
6819 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6821 if (TREE_CODE (field) == FIELD_DECL)
6825 if (TREE_TYPE (field) == error_mark_node)
6828 /* Bitfields are always classified as integer. Handle them
6829 early, since later code would consider them to be
6830 misaligned integers. */
6831 if (DECL_BIT_FIELD (field))
6833 for (i = (int_bit_position (field)
6834 + (bit_offset % 64)) / 8 / 8;
6835 i < ((int_bit_position (field) + (bit_offset % 64))
6836 + tree_to_shwi (DECL_SIZE (field))
6839 merge_classes (X86_64_INTEGER_CLASS,
6846 type = TREE_TYPE (field);
6848 /* Flexible array member is ignored. */
6849 if (TYPE_MODE (type) == BLKmode
6850 && TREE_CODE (type) == ARRAY_TYPE
6851 && TYPE_SIZE (type) == NULL_TREE
6852 && TYPE_DOMAIN (type) != NULL_TREE
6853 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6858 if (!warned && warn_psabi)
6861 inform (input_location,
6862 "the ABI of passing struct with"
6863 " a flexible array member has"
6864 " changed in GCC 4.4");
6868 num = classify_argument (TYPE_MODE (type), type,
6870 (int_bit_position (field)
6871 + bit_offset) % 512);
6874 pos = (int_bit_position (field)
6875 + (bit_offset % 64)) / 8 / 8;
6876 for (i = 0; i < num && (i + pos) < words; i++)
6878 merge_classes (subclasses[i], classes[i + pos]);
6885 /* Arrays are handled as small records. */
6888 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6889 TREE_TYPE (type), subclasses, bit_offset);
6893 /* The partial classes are now full classes. */
6894 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6895 subclasses[0] = X86_64_SSE_CLASS;
6896 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6897 && !((bit_offset % 64) == 0 && bytes == 4))
6898 subclasses[0] = X86_64_INTEGER_CLASS;
6900 for (i = 0; i < words; i++)
6901 classes[i] = subclasses[i % num];
6906 case QUAL_UNION_TYPE:
6907 /* Unions are similar to RECORD_TYPE but offset is always 0.
6909 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6911 if (TREE_CODE (field) == FIELD_DECL)
6915 if (TREE_TYPE (field) == error_mark_node)
6918 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6919 TREE_TYPE (field), subclasses,
6923 for (i = 0; i < num && i < words; i++)
6924 classes[i] = merge_classes (subclasses[i], classes[i]);
6935 /* When size > 16 bytes, if the first one isn't
6936 X86_64_SSE_CLASS or any other ones aren't
6937 X86_64_SSEUP_CLASS, everything should be passed in
6939 if (classes[0] != X86_64_SSE_CLASS)
6942 for (i = 1; i < words; i++)
6943 if (classes[i] != X86_64_SSEUP_CLASS)
6947 /* Final merger cleanup. */
6948 for (i = 0; i < words; i++)
6950 /* If one class is MEMORY, everything should be passed in
6952 if (classes[i] == X86_64_MEMORY_CLASS)
6955 /* The X86_64_SSEUP_CLASS should be always preceded by
6956 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6957 if (classes[i] == X86_64_SSEUP_CLASS
6958 && classes[i - 1] != X86_64_SSE_CLASS
6959 && classes[i - 1] != X86_64_SSEUP_CLASS)
6961 /* The first one should never be X86_64_SSEUP_CLASS. */
6962 gcc_assert (i != 0);
6963 classes[i] = X86_64_SSE_CLASS;
6966 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6967 everything should be passed in memory. */
6968 if (classes[i] == X86_64_X87UP_CLASS
6969 && (classes[i - 1] != X86_64_X87_CLASS))
6973 /* The first one should never be X86_64_X87UP_CLASS. */
6974 gcc_assert (i != 0);
6975 if (!warned && warn_psabi)
6978 inform (input_location,
6979 "the ABI of passing union with long double"
6980 " has changed in GCC 4.4");
6988 /* Compute alignment needed. We align all types to natural boundaries with
6989 exception of XFmode that is aligned to 64bits. */
6990 if (mode != VOIDmode && mode != BLKmode)
6992 int mode_alignment = GET_MODE_BITSIZE (mode);
6995 mode_alignment = 128;
6996 else if (mode == XCmode)
6997 mode_alignment = 256;
6998 if (COMPLEX_MODE_P (mode))
6999 mode_alignment /= 2;
7000 /* Misaligned fields are always returned in memory. */
7001 if (bit_offset % mode_alignment)
7005 /* for V1xx modes, just use the base mode */
7006 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7007 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
7008 mode = GET_MODE_INNER (mode);
7010 /* Classification of atomic types. */
7015 classes[0] = X86_64_SSE_CLASS;
7018 classes[0] = X86_64_SSE_CLASS;
7019 classes[1] = X86_64_SSEUP_CLASS;
7029 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7031 /* Analyze last 128 bits only. */
7032 size = (size - 1) & 0x7f;
7036 classes[0] = X86_64_INTEGERSI_CLASS;
7041 classes[0] = X86_64_INTEGER_CLASS;
7044 else if (size < 64+32)
7046 classes[0] = X86_64_INTEGER_CLASS;
7047 classes[1] = X86_64_INTEGERSI_CLASS;
7050 else if (size < 64+64)
7052 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7060 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7064 /* OImode shouldn't be used directly. */
7069 if (!(bit_offset % 64))
7070 classes[0] = X86_64_SSESF_CLASS;
7072 classes[0] = X86_64_SSE_CLASS;
7075 classes[0] = X86_64_SSEDF_CLASS;
7078 classes[0] = X86_64_X87_CLASS;
7079 classes[1] = X86_64_X87UP_CLASS;
7082 classes[0] = X86_64_SSE_CLASS;
7083 classes[1] = X86_64_SSEUP_CLASS;
7086 classes[0] = X86_64_SSE_CLASS;
7087 if (!(bit_offset % 64))
7093 if (!warned && warn_psabi)
7096 inform (input_location,
7097 "the ABI of passing structure with complex float"
7098 " member has changed in GCC 4.4");
7100 classes[1] = X86_64_SSESF_CLASS;
7104 classes[0] = X86_64_SSEDF_CLASS;
7105 classes[1] = X86_64_SSEDF_CLASS;
7108 classes[0] = X86_64_COMPLEX_X87_CLASS;
7111 /* This modes is larger than 16 bytes. */
7119 classes[0] = X86_64_SSE_CLASS;
7120 classes[1] = X86_64_SSEUP_CLASS;
7121 classes[2] = X86_64_SSEUP_CLASS;
7122 classes[3] = X86_64_SSEUP_CLASS;
7130 classes[0] = X86_64_SSE_CLASS;
7131 classes[1] = X86_64_SSEUP_CLASS;
7132 classes[2] = X86_64_SSEUP_CLASS;
7133 classes[3] = X86_64_SSEUP_CLASS;
7134 classes[4] = X86_64_SSEUP_CLASS;
7135 classes[5] = X86_64_SSEUP_CLASS;
7136 classes[6] = X86_64_SSEUP_CLASS;
7137 classes[7] = X86_64_SSEUP_CLASS;
7145 classes[0] = X86_64_SSE_CLASS;
7146 classes[1] = X86_64_SSEUP_CLASS;
7154 classes[0] = X86_64_SSE_CLASS;
7160 gcc_assert (VECTOR_MODE_P (mode));
7165 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7167 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7168 classes[0] = X86_64_INTEGERSI_CLASS;
7170 classes[0] = X86_64_INTEGER_CLASS;
7171 classes[1] = X86_64_INTEGER_CLASS;
7172 return 1 + (bytes > 8);
7176 /* Examine the argument and return set number of register required in each
7177 class. Return true iff parameter should be passed in memory. */
7180 examine_argument (machine_mode mode, const_tree type, int in_return,
7181 int *int_nregs, int *sse_nregs)
7183 enum x86_64_reg_class regclass[MAX_CLASSES];
7184 int n = classify_argument (mode, type, regclass, 0);
7191 for (n--; n >= 0; n--)
7192 switch (regclass[n])
7194 case X86_64_INTEGER_CLASS:
7195 case X86_64_INTEGERSI_CLASS:
7198 case X86_64_SSE_CLASS:
7199 case X86_64_SSESF_CLASS:
7200 case X86_64_SSEDF_CLASS:
7203 case X86_64_NO_CLASS:
7204 case X86_64_SSEUP_CLASS:
7206 case X86_64_X87_CLASS:
7207 case X86_64_X87UP_CLASS:
7208 case X86_64_COMPLEX_X87_CLASS:
7212 case X86_64_MEMORY_CLASS:
7219 /* Construct container for the argument used by GCC interface. See
7220 FUNCTION_ARG for the detailed description. */
7223 construct_container (machine_mode mode, machine_mode orig_mode,
7224 const_tree type, int in_return, int nintregs, int nsseregs,
7225 const int *intreg, int sse_regno)
7227 /* The following variables hold the static issued_error state. */
7228 static bool issued_sse_arg_error;
7229 static bool issued_sse_ret_error;
7230 static bool issued_x87_ret_error;
7232 machine_mode tmpmode;
7234 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7235 enum x86_64_reg_class regclass[MAX_CLASSES];
7239 int needed_sseregs, needed_intregs;
7240 rtx exp[MAX_CLASSES];
7243 n = classify_argument (mode, type, regclass, 0);
7246 if (examine_argument (mode, type, in_return, &needed_intregs,
7249 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7252 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7253 some less clueful developer tries to use floating-point anyway. */
7254 if (needed_sseregs && !TARGET_SSE)
7258 if (!issued_sse_ret_error)
7260 error ("SSE register return with SSE disabled");
7261 issued_sse_ret_error = true;
7264 else if (!issued_sse_arg_error)
7266 error ("SSE register argument with SSE disabled");
7267 issued_sse_arg_error = true;
7272 /* Likewise, error if the ABI requires us to return values in the
7273 x87 registers and the user specified -mno-80387. */
7274 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7275 for (i = 0; i < n; i++)
7276 if (regclass[i] == X86_64_X87_CLASS
7277 || regclass[i] == X86_64_X87UP_CLASS
7278 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7280 if (!issued_x87_ret_error)
7282 error ("x87 register return with x87 disabled");
7283 issued_x87_ret_error = true;
7288 /* First construct simple cases. Avoid SCmode, since we want to use
7289 single register to pass this type. */
7290 if (n == 1 && mode != SCmode)
7291 switch (regclass[0])
7293 case X86_64_INTEGER_CLASS:
7294 case X86_64_INTEGERSI_CLASS:
7295 return gen_rtx_REG (mode, intreg[0]);
7296 case X86_64_SSE_CLASS:
7297 case X86_64_SSESF_CLASS:
7298 case X86_64_SSEDF_CLASS:
7299 if (mode != BLKmode)
7300 return gen_reg_or_parallel (mode, orig_mode,
7301 SSE_REGNO (sse_regno));
7303 case X86_64_X87_CLASS:
7304 case X86_64_COMPLEX_X87_CLASS:
7305 return gen_rtx_REG (mode, FIRST_STACK_REG);
7306 case X86_64_NO_CLASS:
7307 /* Zero sized array, struct or class. */
7313 && regclass[0] == X86_64_SSE_CLASS
7314 && regclass[1] == X86_64_SSEUP_CLASS
7316 return gen_reg_or_parallel (mode, orig_mode,
7317 SSE_REGNO (sse_regno));
7319 && regclass[0] == X86_64_SSE_CLASS
7320 && regclass[1] == X86_64_SSEUP_CLASS
7321 && regclass[2] == X86_64_SSEUP_CLASS
7322 && regclass[3] == X86_64_SSEUP_CLASS
7324 return gen_reg_or_parallel (mode, orig_mode,
7325 SSE_REGNO (sse_regno));
7327 && regclass[0] == X86_64_SSE_CLASS
7328 && regclass[1] == X86_64_SSEUP_CLASS
7329 && regclass[2] == X86_64_SSEUP_CLASS
7330 && regclass[3] == X86_64_SSEUP_CLASS
7331 && regclass[4] == X86_64_SSEUP_CLASS
7332 && regclass[5] == X86_64_SSEUP_CLASS
7333 && regclass[6] == X86_64_SSEUP_CLASS
7334 && regclass[7] == X86_64_SSEUP_CLASS
7336 return gen_reg_or_parallel (mode, orig_mode,
7337 SSE_REGNO (sse_regno));
7339 && regclass[0] == X86_64_X87_CLASS
7340 && regclass[1] == X86_64_X87UP_CLASS)
7341 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7344 && regclass[0] == X86_64_INTEGER_CLASS
7345 && regclass[1] == X86_64_INTEGER_CLASS
7346 && (mode == CDImode || mode == TImode)
7347 && intreg[0] + 1 == intreg[1])
7348 return gen_rtx_REG (mode, intreg[0]);
7350 /* Otherwise figure out the entries of the PARALLEL. */
7351 for (i = 0; i < n; i++)
7355 switch (regclass[i])
7357 case X86_64_NO_CLASS:
7359 case X86_64_INTEGER_CLASS:
7360 case X86_64_INTEGERSI_CLASS:
7361 /* Merge TImodes on aligned occasions here too. */
7362 if (i * 8 + 8 > bytes)
7364 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7365 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7369 /* We've requested 24 bytes we
7370 don't have mode for. Use DImode. */
7371 if (tmpmode == BLKmode)
7374 = gen_rtx_EXPR_LIST (VOIDmode,
7375 gen_rtx_REG (tmpmode, *intreg),
7379 case X86_64_SSESF_CLASS:
7381 = gen_rtx_EXPR_LIST (VOIDmode,
7382 gen_rtx_REG (SFmode,
7383 SSE_REGNO (sse_regno)),
7387 case X86_64_SSEDF_CLASS:
7389 = gen_rtx_EXPR_LIST (VOIDmode,
7390 gen_rtx_REG (DFmode,
7391 SSE_REGNO (sse_regno)),
7395 case X86_64_SSE_CLASS:
7403 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7413 && regclass[1] == X86_64_SSEUP_CLASS
7414 && regclass[2] == X86_64_SSEUP_CLASS
7415 && regclass[3] == X86_64_SSEUP_CLASS);
7421 && regclass[1] == X86_64_SSEUP_CLASS
7422 && regclass[2] == X86_64_SSEUP_CLASS
7423 && regclass[3] == X86_64_SSEUP_CLASS
7424 && regclass[4] == X86_64_SSEUP_CLASS
7425 && regclass[5] == X86_64_SSEUP_CLASS
7426 && regclass[6] == X86_64_SSEUP_CLASS
7427 && regclass[7] == X86_64_SSEUP_CLASS);
7435 = gen_rtx_EXPR_LIST (VOIDmode,
7436 gen_rtx_REG (tmpmode,
7437 SSE_REGNO (sse_regno)),
7446 /* Empty aligned struct, union or class. */
7450 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7451 for (i = 0; i < nexps; i++)
7452 XVECEXP (ret, 0, i) = exp [i];
7456 /* Update the data in CUM to advance over an argument of mode MODE
7457 and data type TYPE. (TYPE is null for libcalls where that information
7458 may not be available.)
7460 Return a number of integer regsiters advanced over. */
7463 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7464 const_tree type, HOST_WIDE_INT bytes,
7465 HOST_WIDE_INT words)
7468 bool error_p = NULL;
7484 cum->words += words;
7485 cum->nregs -= words;
7486 cum->regno += words;
7487 if (cum->nregs >= 0)
7489 if (cum->nregs <= 0)
7497 /* OImode shouldn't be used directly. */
7501 if (cum->float_in_sse == -1)
7503 if (cum->float_in_sse < 2)
7506 if (cum->float_in_sse == -1)
7508 if (cum->float_in_sse < 1)
7531 if (!type || !AGGREGATE_TYPE_P (type))
7533 cum->sse_words += words;
7534 cum->sse_nregs -= 1;
7535 cum->sse_regno += 1;
7536 if (cum->sse_nregs <= 0)
7550 if (!type || !AGGREGATE_TYPE_P (type))
7552 cum->mmx_words += words;
7553 cum->mmx_nregs -= 1;
7554 cum->mmx_regno += 1;
7555 if (cum->mmx_nregs <= 0)
7565 cum->float_in_sse = 0;
7566 error ("calling %qD with SSE calling convention without "
7567 "SSE/SSE2 enabled", cum->decl);
7568 sorry ("this is a GCC bug that can be worked around by adding "
7569 "attribute used to function called");
7576 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7577 const_tree type, HOST_WIDE_INT words, bool named)
7579 int int_nregs, sse_nregs;
7581 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7582 if (!named && (VALID_AVX512F_REG_MODE (mode)
7583 || VALID_AVX256_REG_MODE (mode)))
7586 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7587 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7589 cum->nregs -= int_nregs;
7590 cum->sse_nregs -= sse_nregs;
7591 cum->regno += int_nregs;
7592 cum->sse_regno += sse_nregs;
7597 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7598 cum->words = (cum->words + align - 1) & ~(align - 1);
7599 cum->words += words;
7605 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7606 HOST_WIDE_INT words)
7608 /* Otherwise, this should be passed indirect. */
7609 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7611 cum->words += words;
7621 /* Update the data in CUM to advance over an argument of mode MODE and
7622 data type TYPE. (TYPE is null for libcalls where that information
7623 may not be available.) */
7626 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7627 const_tree type, bool named)
7629 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7630 HOST_WIDE_INT bytes, words;
7633 if (mode == BLKmode)
7634 bytes = int_size_in_bytes (type);
7636 bytes = GET_MODE_SIZE (mode);
7637 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7640 mode = type_natural_mode (type, NULL, false);
7642 if ((type && POINTER_BOUNDS_TYPE_P (type))
7643 || POINTER_BOUNDS_MODE_P (mode))
7645 /* If we pass bounds in BT then just update remained bounds count. */
7646 if (cum->bnds_in_bt)
7652 /* Update remained number of bounds to force. */
7653 if (cum->force_bnd_pass)
7654 cum->force_bnd_pass--;
7661 /* The first arg not going to Bounds Tables resets this counter. */
7662 cum->bnds_in_bt = 0;
7663 /* For unnamed args we always pass bounds to avoid bounds mess when
7664 passed and received types do not match. If bounds do not follow
7665 unnamed arg, still pretend required number of bounds were passed. */
7666 if (cum->force_bnd_pass)
7668 cum->bnd_regno += cum->force_bnd_pass;
7669 cum->force_bnd_pass = 0;
7672 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7673 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7674 else if (TARGET_64BIT)
7675 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7677 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7679 /* For stdarg we expect bounds to be passed for each value passed
7682 cum->force_bnd_pass = nregs;
7683 /* For pointers passed in memory we expect bounds passed in Bounds
7686 cum->bnds_in_bt = chkp_type_bounds_count (type);
7689 /* Define where to put the arguments to a function.
7690 Value is zero to push the argument on the stack,
7691 or a hard register in which to store the argument.
7693 MODE is the argument's machine mode.
7694 TYPE is the data type of the argument (as a tree).
7695 This is null for libcalls where that information may
7697 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7698 the preceding args and about the function being called.
7699 NAMED is nonzero if this argument is a named parameter
7700 (otherwise it is an extra parameter matching an ellipsis). */
7703 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7704 machine_mode orig_mode, const_tree type,
7705 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7707 bool error_p = false;
7708 /* Avoid the AL settings for the Unix64 ABI. */
7709 if (mode == VOIDmode)
7725 if (words <= cum->nregs)
7727 int regno = cum->regno;
7729 /* Fastcall allocates the first two DWORD (SImode) or
7730 smaller arguments to ECX and EDX if it isn't an
7736 || (type && AGGREGATE_TYPE_P (type)))
7739 /* ECX not EAX is the first allocated register. */
7740 if (regno == AX_REG)
7743 return gen_rtx_REG (mode, regno);
7748 if (cum->float_in_sse == -1)
7750 if (cum->float_in_sse < 2)
7753 if (cum->float_in_sse == -1)
7755 if (cum->float_in_sse < 1)
7759 /* In 32bit, we pass TImode in xmm registers. */
7766 if (!type || !AGGREGATE_TYPE_P (type))
7769 return gen_reg_or_parallel (mode, orig_mode,
7770 cum->sse_regno + FIRST_SSE_REG);
7776 /* OImode and XImode shouldn't be used directly. */
7791 if (!type || !AGGREGATE_TYPE_P (type))
7794 return gen_reg_or_parallel (mode, orig_mode,
7795 cum->sse_regno + FIRST_SSE_REG);
7805 if (!type || !AGGREGATE_TYPE_P (type))
7808 return gen_reg_or_parallel (mode, orig_mode,
7809 cum->mmx_regno + FIRST_MMX_REG);
7815 cum->float_in_sse = 0;
7816 error ("calling %qD with SSE calling convention without "
7817 "SSE/SSE2 enabled", cum->decl);
7818 sorry ("this is a GCC bug that can be worked around by adding "
7819 "attribute used to function called");
7826 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7827 machine_mode orig_mode, const_tree type, bool named)
7829 /* Handle a hidden AL argument containing number of registers
7830 for varargs x86-64 functions. */
7831 if (mode == VOIDmode)
7832 return GEN_INT (cum->maybe_vaarg
7833 ? (cum->sse_nregs < 0
7834 ? X86_64_SSE_REGPARM_MAX
7855 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7861 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7863 &x86_64_int_parameter_registers [cum->regno],
7868 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7869 machine_mode orig_mode, bool named,
7870 HOST_WIDE_INT bytes)
7874 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7875 We use value of -2 to specify that current function call is MSABI. */
7876 if (mode == VOIDmode)
7877 return GEN_INT (-2);
7879 /* If we've run out of registers, it goes on the stack. */
7880 if (cum->nregs == 0)
7883 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7885 /* Only floating point modes are passed in anything but integer regs. */
7886 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7889 regno = cum->regno + FIRST_SSE_REG;
7894 /* Unnamed floating parameters are passed in both the
7895 SSE and integer registers. */
7896 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7897 t2 = gen_rtx_REG (mode, regno);
7898 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7899 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7900 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7903 /* Handle aggregated types passed in register. */
7904 if (orig_mode == BLKmode)
7906 if (bytes > 0 && bytes <= 8)
7907 mode = (bytes > 4 ? DImode : SImode);
7908 if (mode == BLKmode)
7912 return gen_reg_or_parallel (mode, orig_mode, regno);
7915 /* Return where to put the arguments to a function.
7916 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7918 MODE is the argument's machine mode. TYPE is the data type of the
7919 argument. It is null for libcalls where that information may not be
7920 available. CUM gives information about the preceding args and about
7921 the function being called. NAMED is nonzero if this argument is a
7922 named parameter (otherwise it is an extra parameter matching an
7926 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7927 const_tree type, bool named)
7929 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7930 machine_mode mode = omode;
7931 HOST_WIDE_INT bytes, words;
7934 /* All pointer bounds argumntas are handled separately here. */
7935 if ((type && POINTER_BOUNDS_TYPE_P (type))
7936 || POINTER_BOUNDS_MODE_P (mode))
7938 /* Return NULL if bounds are forced to go in Bounds Table. */
7939 if (cum->bnds_in_bt)
7941 /* Return the next available bound reg if any. */
7942 else if (cum->bnd_regno <= LAST_BND_REG)
7943 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7944 /* Return the next special slot number otherwise. */
7946 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7951 if (mode == BLKmode)
7952 bytes = int_size_in_bytes (type);
7954 bytes = GET_MODE_SIZE (mode);
7955 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7957 /* To simplify the code below, represent vector types with a vector mode
7958 even if MMX/SSE are not active. */
7959 if (type && TREE_CODE (type) == VECTOR_TYPE)
7960 mode = type_natural_mode (type, cum, false);
7962 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7963 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7964 else if (TARGET_64BIT)
7965 arg = function_arg_64 (cum, mode, omode, type, named);
7967 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7972 /* A C expression that indicates when an argument must be passed by
7973 reference. If nonzero for an argument, a copy of that argument is
7974 made in memory and a pointer to the argument is passed instead of
7975 the argument itself. The pointer is passed in whatever way is
7976 appropriate for passing a pointer to that type. */
7979 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7980 const_tree type, bool)
7982 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7984 /* Bounds are never passed by reference. */
7985 if ((type && POINTER_BOUNDS_TYPE_P (type))
7986 || POINTER_BOUNDS_MODE_P (mode))
7989 /* See Windows x64 Software Convention. */
7990 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7992 int msize = (int) GET_MODE_SIZE (mode);
7995 /* Arrays are passed by reference. */
7996 if (TREE_CODE (type) == ARRAY_TYPE)
7999 if (AGGREGATE_TYPE_P (type))
8001 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8002 are passed by reference. */
8003 msize = int_size_in_bytes (type);
8007 /* __m128 is passed by reference. */
8009 case 1: case 2: case 4: case 8:
8015 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
8021 /* Return true when TYPE should be 128bit aligned for 32bit argument
8022 passing ABI. XXX: This function is obsolete and is only used for
8023 checking psABI compatibility with previous versions of GCC. */
8026 ix86_compat_aligned_value_p (const_tree type)
8028 machine_mode mode = TYPE_MODE (type);
8029 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8033 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8035 if (TYPE_ALIGN (type) < 128)
8038 if (AGGREGATE_TYPE_P (type))
8040 /* Walk the aggregates recursively. */
8041 switch (TREE_CODE (type))
8045 case QUAL_UNION_TYPE:
8049 /* Walk all the structure fields. */
8050 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8052 if (TREE_CODE (field) == FIELD_DECL
8053 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8060 /* Just for use if some languages passes arrays by value. */
8061 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8072 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8073 XXX: This function is obsolete and is only used for checking psABI
8074 compatibility with previous versions of GCC. */
8077 ix86_compat_function_arg_boundary (machine_mode mode,
8078 const_tree type, unsigned int align)
8080 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8081 natural boundaries. */
8082 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8084 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8085 make an exception for SSE modes since these require 128bit
8088 The handling here differs from field_alignment. ICC aligns MMX
8089 arguments to 4 byte boundaries, while structure fields are aligned
8090 to 8 byte boundaries. */
8093 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8094 align = PARM_BOUNDARY;
8098 if (!ix86_compat_aligned_value_p (type))
8099 align = PARM_BOUNDARY;
8102 if (align > BIGGEST_ALIGNMENT)
8103 align = BIGGEST_ALIGNMENT;
8107 /* Return true when TYPE should be 128bit aligned for 32bit argument
8111 ix86_contains_aligned_value_p (const_tree type)
8113 machine_mode mode = TYPE_MODE (type);
8115 if (mode == XFmode || mode == XCmode)
8118 if (TYPE_ALIGN (type) < 128)
8121 if (AGGREGATE_TYPE_P (type))
8123 /* Walk the aggregates recursively. */
8124 switch (TREE_CODE (type))
8128 case QUAL_UNION_TYPE:
8132 /* Walk all the structure fields. */
8133 for (field = TYPE_FIELDS (type);
8135 field = DECL_CHAIN (field))
8137 if (TREE_CODE (field) == FIELD_DECL
8138 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8145 /* Just for use if some languages passes arrays by value. */
8146 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8155 return TYPE_ALIGN (type) >= 128;
8160 /* Gives the alignment boundary, in bits, of an argument with the
8161 specified mode and type. */
8164 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8169 /* Since the main variant type is used for call, we convert it to
8170 the main variant type. */
8171 type = TYPE_MAIN_VARIANT (type);
8172 align = TYPE_ALIGN (type);
8175 align = GET_MODE_ALIGNMENT (mode);
8176 if (align < PARM_BOUNDARY)
8177 align = PARM_BOUNDARY;
8181 unsigned int saved_align = align;
8185 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8188 if (mode == XFmode || mode == XCmode)
8189 align = PARM_BOUNDARY;
8191 else if (!ix86_contains_aligned_value_p (type))
8192 align = PARM_BOUNDARY;
8195 align = PARM_BOUNDARY;
8200 && align != ix86_compat_function_arg_boundary (mode, type,
8204 inform (input_location,
8205 "The ABI for passing parameters with %d-byte"
8206 " alignment has changed in GCC 4.6",
8207 align / BITS_PER_UNIT);
8214 /* Return true if N is a possible register number of function value. */
8217 ix86_function_value_regno_p (const unsigned int regno)
8224 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8227 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8231 return chkp_function_instrumented_p (current_function_decl);
8233 /* Complex values are returned in %st(0)/%st(1) pair. */
8236 /* TODO: The function should depend on current function ABI but
8237 builtins.c would need updating then. Therefore we use the
8239 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8241 return TARGET_FLOAT_RETURNS_IN_80387;
8243 /* Complex values are returned in %xmm0/%xmm1 pair. */
8249 if (TARGET_MACHO || TARGET_64BIT)
8257 /* Define how to find the value returned by a function.
8258 VALTYPE is the data type of the value (as a tree).
8259 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8260 otherwise, FUNC is 0. */
8263 function_value_32 (machine_mode orig_mode, machine_mode mode,
8264 const_tree fntype, const_tree fn)
8268 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8269 we normally prevent this case when mmx is not available. However
8270 some ABIs may require the result to be returned like DImode. */
8271 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8272 regno = FIRST_MMX_REG;
8274 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8275 we prevent this case when sse is not available. However some ABIs
8276 may require the result to be returned like integer TImode. */
8277 else if (mode == TImode
8278 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8279 regno = FIRST_SSE_REG;
8281 /* 32-byte vector modes in %ymm0. */
8282 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8283 regno = FIRST_SSE_REG;
8285 /* 64-byte vector modes in %zmm0. */
8286 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8287 regno = FIRST_SSE_REG;
8289 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8290 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8291 regno = FIRST_FLOAT_REG;
8293 /* Most things go in %eax. */
8296 /* Override FP return register with %xmm0 for local functions when
8297 SSE math is enabled or for functions with sseregparm attribute. */
8298 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8300 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8301 if (sse_level == -1)
8303 error ("calling %qD with SSE caling convention without "
8304 "SSE/SSE2 enabled", fn);
8305 sorry ("this is a GCC bug that can be worked around by adding "
8306 "attribute used to function called");
8308 else if ((sse_level >= 1 && mode == SFmode)
8309 || (sse_level == 2 && mode == DFmode))
8310 regno = FIRST_SSE_REG;
8313 /* OImode shouldn't be used directly. */
8314 gcc_assert (mode != OImode);
8316 return gen_rtx_REG (orig_mode, regno);
8320 function_value_64 (machine_mode orig_mode, machine_mode mode,
8325 /* Handle libcalls, which don't provide a type node. */
8326 if (valtype == NULL)
8340 regno = FIRST_SSE_REG;
8344 regno = FIRST_FLOAT_REG;
8352 return gen_rtx_REG (mode, regno);
8354 else if (POINTER_TYPE_P (valtype))
8356 /* Pointers are always returned in word_mode. */
8360 ret = construct_container (mode, orig_mode, valtype, 1,
8361 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8362 x86_64_int_return_registers, 0);
8364 /* For zero sized structures, construct_container returns NULL, but we
8365 need to keep rest of compiler happy by returning meaningful value. */
8367 ret = gen_rtx_REG (orig_mode, AX_REG);
8373 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8376 unsigned int regno = AX_REG;
8380 switch (GET_MODE_SIZE (mode))
8383 if (valtype != NULL_TREE
8384 && !VECTOR_INTEGER_TYPE_P (valtype)
8385 && !VECTOR_INTEGER_TYPE_P (valtype)
8386 && !INTEGRAL_TYPE_P (valtype)
8387 && !VECTOR_FLOAT_TYPE_P (valtype))
8389 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8390 && !COMPLEX_MODE_P (mode))
8391 regno = FIRST_SSE_REG;
8395 if (mode == SFmode || mode == DFmode)
8396 regno = FIRST_SSE_REG;
8402 return gen_rtx_REG (orig_mode, regno);
8406 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8407 machine_mode orig_mode, machine_mode mode)
8409 const_tree fn, fntype;
8412 if (fntype_or_decl && DECL_P (fntype_or_decl))
8413 fn = fntype_or_decl;
8414 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8416 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8417 || POINTER_BOUNDS_MODE_P (mode))
8418 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8419 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8420 return function_value_ms_64 (orig_mode, mode, valtype);
8421 else if (TARGET_64BIT)
8422 return function_value_64 (orig_mode, mode, valtype);
8424 return function_value_32 (orig_mode, mode, fntype, fn);
8428 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8430 machine_mode mode, orig_mode;
8432 orig_mode = TYPE_MODE (valtype);
8433 mode = type_natural_mode (valtype, NULL, true);
8434 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8437 /* Return an RTX representing a place where a function returns
8438 or recieves pointer bounds or NULL if no bounds are returned.
8440 VALTYPE is a data type of a value returned by the function.
8442 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8443 or FUNCTION_TYPE of the function.
8445 If OUTGOING is false, return a place in which the caller will
8446 see the return value. Otherwise, return a place where a
8447 function returns a value. */
8450 ix86_function_value_bounds (const_tree valtype,
8451 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8452 bool outgoing ATTRIBUTE_UNUSED)
8456 if (BOUNDED_TYPE_P (valtype))
8457 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8458 else if (chkp_type_has_pointer (valtype))
8463 unsigned i, bnd_no = 0;
8465 bitmap_obstack_initialize (NULL);
8466 slots = BITMAP_ALLOC (NULL);
8467 chkp_find_bound_slots (valtype, slots);
8469 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8471 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8472 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8473 gcc_assert (bnd_no < 2);
8474 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8477 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8479 BITMAP_FREE (slots);
8480 bitmap_obstack_release (NULL);
8488 /* Pointer function arguments and return values are promoted to
8492 ix86_promote_function_mode (const_tree type, machine_mode mode,
8493 int *punsignedp, const_tree fntype,
8496 if (type != NULL_TREE && POINTER_TYPE_P (type))
8498 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8501 return default_promote_function_mode (type, mode, punsignedp, fntype,
8505 /* Return true if a structure, union or array with MODE containing FIELD
8506 should be accessed using BLKmode. */
8509 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8511 /* Union with XFmode must be in BLKmode. */
8512 return (mode == XFmode
8513 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8514 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8518 ix86_libcall_value (machine_mode mode)
8520 return ix86_function_value_1 (NULL, NULL, mode, mode);
8523 /* Return true iff type is returned in memory. */
8526 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8528 #ifdef SUBTARGET_RETURN_IN_MEMORY
8529 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8531 const machine_mode mode = type_natural_mode (type, NULL, true);
8534 if (POINTER_BOUNDS_TYPE_P (type))
8539 if (ix86_function_type_abi (fntype) == MS_ABI)
8541 size = int_size_in_bytes (type);
8543 /* __m128 is returned in xmm0. */
8544 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8545 || INTEGRAL_TYPE_P (type)
8546 || VECTOR_FLOAT_TYPE_P (type))
8547 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8548 && !COMPLEX_MODE_P (mode)
8549 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8552 /* Otherwise, the size must be exactly in [1248]. */
8553 return size != 1 && size != 2 && size != 4 && size != 8;
8557 int needed_intregs, needed_sseregs;
8559 return examine_argument (mode, type, 1,
8560 &needed_intregs, &needed_sseregs);
8565 if (mode == BLKmode)
8568 size = int_size_in_bytes (type);
8570 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8573 if (VECTOR_MODE_P (mode) || mode == TImode)
8575 /* User-created vectors small enough to fit in EAX. */
8579 /* Unless ABI prescibes otherwise,
8580 MMX/3dNow values are returned in MM0 if available. */
8583 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8585 /* SSE values are returned in XMM0 if available. */
8589 /* AVX values are returned in YMM0 if available. */
8593 /* AVX512F values are returned in ZMM0 if available. */
8595 return !TARGET_AVX512F;
8604 /* OImode shouldn't be used directly. */
8605 gcc_assert (mode != OImode);
8613 /* Create the va_list data type. */
8615 /* Returns the calling convention specific va_list date type.
8616 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8619 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8621 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8623 /* For i386 we use plain pointer to argument area. */
8624 if (!TARGET_64BIT || abi == MS_ABI)
8625 return build_pointer_type (char_type_node);
8627 record = lang_hooks.types.make_type (RECORD_TYPE);
8628 type_decl = build_decl (BUILTINS_LOCATION,
8629 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8631 f_gpr = build_decl (BUILTINS_LOCATION,
8632 FIELD_DECL, get_identifier ("gp_offset"),
8633 unsigned_type_node);
8634 f_fpr = build_decl (BUILTINS_LOCATION,
8635 FIELD_DECL, get_identifier ("fp_offset"),
8636 unsigned_type_node);
8637 f_ovf = build_decl (BUILTINS_LOCATION,
8638 FIELD_DECL, get_identifier ("overflow_arg_area"),
8640 f_sav = build_decl (BUILTINS_LOCATION,
8641 FIELD_DECL, get_identifier ("reg_save_area"),
8644 va_list_gpr_counter_field = f_gpr;
8645 va_list_fpr_counter_field = f_fpr;
8647 DECL_FIELD_CONTEXT (f_gpr) = record;
8648 DECL_FIELD_CONTEXT (f_fpr) = record;
8649 DECL_FIELD_CONTEXT (f_ovf) = record;
8650 DECL_FIELD_CONTEXT (f_sav) = record;
8652 TYPE_STUB_DECL (record) = type_decl;
8653 TYPE_NAME (record) = type_decl;
8654 TYPE_FIELDS (record) = f_gpr;
8655 DECL_CHAIN (f_gpr) = f_fpr;
8656 DECL_CHAIN (f_fpr) = f_ovf;
8657 DECL_CHAIN (f_ovf) = f_sav;
8659 layout_type (record);
8661 /* The correct type is an array type of one element. */
8662 return build_array_type (record, build_index_type (size_zero_node));
8665 /* Setup the builtin va_list data type and for 64-bit the additional
8666 calling convention specific va_list data types. */
8669 ix86_build_builtin_va_list (void)
8671 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8673 /* Initialize abi specific va_list builtin types. */
8677 if (ix86_abi == MS_ABI)
8679 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8680 if (TREE_CODE (t) != RECORD_TYPE)
8681 t = build_variant_type_copy (t);
8682 sysv_va_list_type_node = t;
8687 if (TREE_CODE (t) != RECORD_TYPE)
8688 t = build_variant_type_copy (t);
8689 sysv_va_list_type_node = t;
8691 if (ix86_abi != MS_ABI)
8693 t = ix86_build_builtin_va_list_abi (MS_ABI);
8694 if (TREE_CODE (t) != RECORD_TYPE)
8695 t = build_variant_type_copy (t);
8696 ms_va_list_type_node = t;
8701 if (TREE_CODE (t) != RECORD_TYPE)
8702 t = build_variant_type_copy (t);
8703 ms_va_list_type_node = t;
8710 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8713 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8719 /* GPR size of varargs save area. */
8720 if (cfun->va_list_gpr_size)
8721 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8723 ix86_varargs_gpr_size = 0;
8725 /* FPR size of varargs save area. We don't need it if we don't pass
8726 anything in SSE registers. */
8727 if (TARGET_SSE && cfun->va_list_fpr_size)
8728 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8730 ix86_varargs_fpr_size = 0;
8732 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8735 save_area = frame_pointer_rtx;
8736 set = get_varargs_alias_set ();
8738 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8739 if (max > X86_64_REGPARM_MAX)
8740 max = X86_64_REGPARM_MAX;
8742 for (i = cum->regno; i < max; i++)
8744 mem = gen_rtx_MEM (word_mode,
8745 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8746 MEM_NOTRAP_P (mem) = 1;
8747 set_mem_alias_set (mem, set);
8748 emit_move_insn (mem,
8749 gen_rtx_REG (word_mode,
8750 x86_64_int_parameter_registers[i]));
8753 if (ix86_varargs_fpr_size)
8756 rtx_code_label *label;
8759 /* Now emit code to save SSE registers. The AX parameter contains number
8760 of SSE parameter registers used to call this function, though all we
8761 actually check here is the zero/non-zero status. */
8763 label = gen_label_rtx ();
8764 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8765 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8768 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8769 we used movdqa (i.e. TImode) instead? Perhaps even better would
8770 be if we could determine the real mode of the data, via a hook
8771 into pass_stdarg. Ignore all that for now. */
8773 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8774 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8776 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8777 if (max > X86_64_SSE_REGPARM_MAX)
8778 max = X86_64_SSE_REGPARM_MAX;
8780 for (i = cum->sse_regno; i < max; ++i)
8782 mem = plus_constant (Pmode, save_area,
8783 i * 16 + ix86_varargs_gpr_size);
8784 mem = gen_rtx_MEM (smode, mem);
8785 MEM_NOTRAP_P (mem) = 1;
8786 set_mem_alias_set (mem, set);
8787 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8789 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8797 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8799 alias_set_type set = get_varargs_alias_set ();
8802 /* Reset to zero, as there might be a sysv vaarg used
8804 ix86_varargs_gpr_size = 0;
8805 ix86_varargs_fpr_size = 0;
8807 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8811 mem = gen_rtx_MEM (Pmode,
8812 plus_constant (Pmode, virtual_incoming_args_rtx,
8813 i * UNITS_PER_WORD));
8814 MEM_NOTRAP_P (mem) = 1;
8815 set_mem_alias_set (mem, set);
8817 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8818 emit_move_insn (mem, reg);
8823 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8824 tree type, int *, int no_rtl)
8826 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8827 CUMULATIVE_ARGS next_cum;
8830 /* This argument doesn't appear to be used anymore. Which is good,
8831 because the old code here didn't suppress rtl generation. */
8832 gcc_assert (!no_rtl);
8837 fntype = TREE_TYPE (current_function_decl);
8839 /* For varargs, we do not want to skip the dummy va_dcl argument.
8840 For stdargs, we do want to skip the last named argument. */
8842 if (stdarg_p (fntype))
8843 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8846 if (cum->call_abi == MS_ABI)
8847 setup_incoming_varargs_ms_64 (&next_cum);
8849 setup_incoming_varargs_64 (&next_cum);
8853 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8854 enum machine_mode mode,
8856 int *pretend_size ATTRIBUTE_UNUSED,
8859 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8860 CUMULATIVE_ARGS next_cum;
8863 int bnd_reg, i, max;
8865 gcc_assert (!no_rtl);
8867 /* Do nothing if we use plain pointer to argument area. */
8868 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8871 fntype = TREE_TYPE (current_function_decl);
8873 /* For varargs, we do not want to skip the dummy va_dcl argument.
8874 For stdargs, we do want to skip the last named argument. */
8876 if (stdarg_p (fntype))
8877 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8879 save_area = frame_pointer_rtx;
8881 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8882 if (max > X86_64_REGPARM_MAX)
8883 max = X86_64_REGPARM_MAX;
8885 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8886 if (chkp_function_instrumented_p (current_function_decl))
8887 for (i = cum->regno; i < max; i++)
8889 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8890 rtx reg = gen_rtx_REG (DImode,
8891 x86_64_int_parameter_registers[i]);
8895 if (bnd_reg <= LAST_BND_REG)
8896 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8900 plus_constant (Pmode, arg_pointer_rtx,
8901 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8902 bounds = gen_reg_rtx (BNDmode);
8903 emit_insn (BNDmode == BND64mode
8904 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8905 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8908 emit_insn (BNDmode == BND64mode
8909 ? gen_bnd64_stx (addr, ptr, bounds)
8910 : gen_bnd32_stx (addr, ptr, bounds));
8917 /* Checks if TYPE is of kind va_list char *. */
8920 is_va_list_char_pointer (tree type)
8924 /* For 32-bit it is always true. */
8927 canonic = ix86_canonical_va_list_type (type);
8928 return (canonic == ms_va_list_type_node
8929 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8932 /* Implement va_start. */
8935 ix86_va_start (tree valist, rtx nextarg)
8937 HOST_WIDE_INT words, n_gpr, n_fpr;
8938 tree f_gpr, f_fpr, f_ovf, f_sav;
8939 tree gpr, fpr, ovf, sav, t;
8943 if (flag_split_stack
8944 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8946 unsigned int scratch_regno;
8948 /* When we are splitting the stack, we can't refer to the stack
8949 arguments using internal_arg_pointer, because they may be on
8950 the old stack. The split stack prologue will arrange to
8951 leave a pointer to the old stack arguments in a scratch
8952 register, which we here copy to a pseudo-register. The split
8953 stack prologue can't set the pseudo-register directly because
8954 it (the prologue) runs before any registers have been saved. */
8956 scratch_regno = split_stack_prologue_scratch_regno ();
8957 if (scratch_regno != INVALID_REGNUM)
8962 reg = gen_reg_rtx (Pmode);
8963 cfun->machine->split_stack_varargs_pointer = reg;
8966 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8970 push_topmost_sequence ();
8971 emit_insn_after (seq, entry_of_function ());
8972 pop_topmost_sequence ();
8976 /* Only 64bit target needs something special. */
8977 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8979 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8980 std_expand_builtin_va_start (valist, nextarg);
8985 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8986 next = expand_binop (ptr_mode, add_optab,
8987 cfun->machine->split_stack_varargs_pointer,
8988 crtl->args.arg_offset_rtx,
8989 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8990 convert_move (va_r, next, 0);
8992 /* Store zero bounds for va_list. */
8993 if (chkp_function_instrumented_p (current_function_decl))
8994 chkp_expand_bounds_reset_for_mem (valist,
8995 make_tree (TREE_TYPE (valist),
9002 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9003 f_fpr = DECL_CHAIN (f_gpr);
9004 f_ovf = DECL_CHAIN (f_fpr);
9005 f_sav = DECL_CHAIN (f_ovf);
9007 valist = build_simple_mem_ref (valist);
9008 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9009 /* The following should be folded into the MEM_REF offset. */
9010 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9012 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9014 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9016 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9019 /* Count number of gp and fp argument registers used. */
9020 words = crtl->args.info.words;
9021 n_gpr = crtl->args.info.regno;
9022 n_fpr = crtl->args.info.sse_regno;
9024 if (cfun->va_list_gpr_size)
9026 type = TREE_TYPE (gpr);
9027 t = build2 (MODIFY_EXPR, type,
9028 gpr, build_int_cst (type, n_gpr * 8));
9029 TREE_SIDE_EFFECTS (t) = 1;
9030 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9033 if (TARGET_SSE && cfun->va_list_fpr_size)
9035 type = TREE_TYPE (fpr);
9036 t = build2 (MODIFY_EXPR, type, fpr,
9037 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9038 TREE_SIDE_EFFECTS (t) = 1;
9039 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9042 /* Find the overflow area. */
9043 type = TREE_TYPE (ovf);
9044 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9045 ovf_rtx = crtl->args.internal_arg_pointer;
9047 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9048 t = make_tree (type, ovf_rtx);
9050 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9052 /* Store zero bounds for overflow area pointer. */
9053 if (chkp_function_instrumented_p (current_function_decl))
9054 chkp_expand_bounds_reset_for_mem (ovf, t);
9056 t = build2 (MODIFY_EXPR, type, ovf, t);
9057 TREE_SIDE_EFFECTS (t) = 1;
9058 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9060 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9062 /* Find the register save area.
9063 Prologue of the function save it right above stack frame. */
9064 type = TREE_TYPE (sav);
9065 t = make_tree (type, frame_pointer_rtx);
9066 if (!ix86_varargs_gpr_size)
9067 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9069 /* Store zero bounds for save area pointer. */
9070 if (chkp_function_instrumented_p (current_function_decl))
9071 chkp_expand_bounds_reset_for_mem (sav, t);
9073 t = build2 (MODIFY_EXPR, type, sav, t);
9074 TREE_SIDE_EFFECTS (t) = 1;
9075 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9079 /* Implement va_arg. */
9082 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9085 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9086 tree f_gpr, f_fpr, f_ovf, f_sav;
9087 tree gpr, fpr, ovf, sav, t;
9089 tree lab_false, lab_over = NULL_TREE;
9094 machine_mode nat_mode;
9095 unsigned int arg_boundary;
9097 /* Only 64bit target needs something special. */
9098 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9099 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9101 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9102 f_fpr = DECL_CHAIN (f_gpr);
9103 f_ovf = DECL_CHAIN (f_fpr);
9104 f_sav = DECL_CHAIN (f_ovf);
9106 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9107 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9108 valist = build_va_arg_indirect_ref (valist);
9109 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9110 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9111 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9113 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9115 type = build_pointer_type (type);
9116 size = int_size_in_bytes (type);
9117 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9119 nat_mode = type_natural_mode (type, NULL, false);
9134 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9135 if (!TARGET_64BIT_MS_ABI)
9142 container = construct_container (nat_mode, TYPE_MODE (type),
9143 type, 0, X86_64_REGPARM_MAX,
9144 X86_64_SSE_REGPARM_MAX, intreg,
9149 /* Pull the value out of the saved registers. */
9151 addr = create_tmp_var (ptr_type_node, "addr");
9155 int needed_intregs, needed_sseregs;
9157 tree int_addr, sse_addr;
9159 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9160 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9162 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9164 need_temp = (!REG_P (container)
9165 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9166 || TYPE_ALIGN (type) > 128));
9168 /* In case we are passing structure, verify that it is consecutive block
9169 on the register save area. If not we need to do moves. */
9170 if (!need_temp && !REG_P (container))
9172 /* Verify that all registers are strictly consecutive */
9173 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9177 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9179 rtx slot = XVECEXP (container, 0, i);
9180 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9181 || INTVAL (XEXP (slot, 1)) != i * 16)
9189 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9191 rtx slot = XVECEXP (container, 0, i);
9192 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9193 || INTVAL (XEXP (slot, 1)) != i * 8)
9205 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9206 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9209 /* First ensure that we fit completely in registers. */
9212 t = build_int_cst (TREE_TYPE (gpr),
9213 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9214 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9215 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9216 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9217 gimplify_and_add (t, pre_p);
9221 t = build_int_cst (TREE_TYPE (fpr),
9222 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9223 + X86_64_REGPARM_MAX * 8);
9224 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9225 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9226 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9227 gimplify_and_add (t, pre_p);
9230 /* Compute index to start of area used for integer regs. */
9233 /* int_addr = gpr + sav; */
9234 t = fold_build_pointer_plus (sav, gpr);
9235 gimplify_assign (int_addr, t, pre_p);
9239 /* sse_addr = fpr + sav; */
9240 t = fold_build_pointer_plus (sav, fpr);
9241 gimplify_assign (sse_addr, t, pre_p);
9245 int i, prev_size = 0;
9246 tree temp = create_tmp_var (type, "va_arg_tmp");
9249 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9250 gimplify_assign (addr, t, pre_p);
9252 for (i = 0; i < XVECLEN (container, 0); i++)
9254 rtx slot = XVECEXP (container, 0, i);
9255 rtx reg = XEXP (slot, 0);
9256 machine_mode mode = GET_MODE (reg);
9262 tree dest_addr, dest;
9263 int cur_size = GET_MODE_SIZE (mode);
9265 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9266 prev_size = INTVAL (XEXP (slot, 1));
9267 if (prev_size + cur_size > size)
9269 cur_size = size - prev_size;
9270 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9271 if (mode == BLKmode)
9274 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9275 if (mode == GET_MODE (reg))
9276 addr_type = build_pointer_type (piece_type);
9278 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9280 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9283 if (SSE_REGNO_P (REGNO (reg)))
9285 src_addr = sse_addr;
9286 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9290 src_addr = int_addr;
9291 src_offset = REGNO (reg) * 8;
9293 src_addr = fold_convert (addr_type, src_addr);
9294 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9296 dest_addr = fold_convert (daddr_type, addr);
9297 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9298 if (cur_size == GET_MODE_SIZE (mode))
9300 src = build_va_arg_indirect_ref (src_addr);
9301 dest = build_va_arg_indirect_ref (dest_addr);
9303 gimplify_assign (dest, src, pre_p);
9308 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9309 3, dest_addr, src_addr,
9310 size_int (cur_size));
9311 gimplify_and_add (copy, pre_p);
9313 prev_size += cur_size;
9319 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9320 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9321 gimplify_assign (gpr, t, pre_p);
9326 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9327 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9328 gimplify_assign (fpr, t, pre_p);
9331 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9333 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9336 /* ... otherwise out of the overflow area. */
9338 /* When we align parameter on stack for caller, if the parameter
9339 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9340 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9341 here with caller. */
9342 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9343 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9344 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9346 /* Care for on-stack alignment if needed. */
9347 if (arg_boundary <= 64 || size == 0)
9351 HOST_WIDE_INT align = arg_boundary / 8;
9352 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9353 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9354 build_int_cst (TREE_TYPE (t), -align));
9357 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9358 gimplify_assign (addr, t, pre_p);
9360 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9361 gimplify_assign (unshare_expr (ovf), t, pre_p);
9364 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9366 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9367 addr = fold_convert (ptrtype, addr);
9370 addr = build_va_arg_indirect_ref (addr);
9371 return build_va_arg_indirect_ref (addr);
9374 /* Return true if OPNUM's MEM should be matched
9375 in movabs* patterns. */
9378 ix86_check_movabs (rtx insn, int opnum)
9382 set = PATTERN (insn);
9383 if (GET_CODE (set) == PARALLEL)
9384 set = XVECEXP (set, 0, 0);
9385 gcc_assert (GET_CODE (set) == SET);
9386 mem = XEXP (set, opnum);
9387 while (GET_CODE (mem) == SUBREG)
9388 mem = SUBREG_REG (mem);
9389 gcc_assert (MEM_P (mem));
9390 return volatile_ok || !MEM_VOLATILE_P (mem);
9393 /* Initialize the table of extra 80387 mathematical constants. */
9396 init_ext_80387_constants (void)
9398 static const char * cst[5] =
9400 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9401 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9402 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9403 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9404 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9408 for (i = 0; i < 5; i++)
9410 real_from_string (&ext_80387_constants_table[i], cst[i]);
9411 /* Ensure each constant is rounded to XFmode precision. */
9412 real_convert (&ext_80387_constants_table[i],
9413 XFmode, &ext_80387_constants_table[i]);
9416 ext_80387_constants_init = 1;
9419 /* Return non-zero if the constant is something that
9420 can be loaded with a special instruction. */
9423 standard_80387_constant_p (rtx x)
9425 machine_mode mode = GET_MODE (x);
9429 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9432 if (x == CONST0_RTX (mode))
9434 if (x == CONST1_RTX (mode))
9437 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9439 /* For XFmode constants, try to find a special 80387 instruction when
9440 optimizing for size or on those CPUs that benefit from them. */
9442 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9446 if (! ext_80387_constants_init)
9447 init_ext_80387_constants ();
9449 for (i = 0; i < 5; i++)
9450 if (real_identical (&r, &ext_80387_constants_table[i]))
9454 /* Load of the constant -0.0 or -1.0 will be split as
9455 fldz;fchs or fld1;fchs sequence. */
9456 if (real_isnegzero (&r))
9458 if (real_identical (&r, &dconstm1))
9464 /* Return the opcode of the special instruction to be used to load
9468 standard_80387_constant_opcode (rtx x)
9470 switch (standard_80387_constant_p (x))
9494 /* Return the CONST_DOUBLE representing the 80387 constant that is
9495 loaded by the specified special instruction. The argument IDX
9496 matches the return value from standard_80387_constant_p. */
9499 standard_80387_constant_rtx (int idx)
9503 if (! ext_80387_constants_init)
9504 init_ext_80387_constants ();
9520 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9524 /* Return 1 if X is all 0s and 2 if x is all 1s
9525 in supported SSE/AVX vector mode. */
9528 standard_sse_constant_p (rtx x)
9530 machine_mode mode = GET_MODE (x);
9532 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9534 if (vector_all_ones_operand (x, mode))
9562 /* Return the opcode of the special instruction to be used to load
9566 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9568 switch (standard_sse_constant_p (x))
9571 switch (get_attr_mode (insn))
9574 return "vpxord\t%g0, %g0, %g0";
9576 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9577 : "vpxord\t%g0, %g0, %g0";
9579 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9580 : "vpxorq\t%g0, %g0, %g0";
9582 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9583 : "%vpxor\t%0, %d0";
9585 return "%vxorpd\t%0, %d0";
9587 return "%vxorps\t%0, %d0";
9590 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9591 : "vpxor\t%x0, %x0, %x0";
9593 return "vxorpd\t%x0, %x0, %x0";
9595 return "vxorps\t%x0, %x0, %x0";
9603 || get_attr_mode (insn) == MODE_XI
9604 || get_attr_mode (insn) == MODE_V8DF
9605 || get_attr_mode (insn) == MODE_V16SF)
9606 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9608 return "vpcmpeqd\t%0, %0, %0";
9610 return "pcmpeqd\t%0, %0";
9618 /* Returns true if OP contains a symbol reference */
9621 symbolic_reference_mentioned_p (rtx op)
9626 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9629 fmt = GET_RTX_FORMAT (GET_CODE (op));
9630 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9636 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9637 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9641 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9648 /* Return true if it is appropriate to emit `ret' instructions in the
9649 body of a function. Do this only if the epilogue is simple, needing a
9650 couple of insns. Prior to reloading, we can't tell how many registers
9651 must be saved, so return false then. Return false if there is no frame
9652 marker to de-allocate. */
9655 ix86_can_use_return_insn_p (void)
9657 struct ix86_frame frame;
9659 if (! reload_completed || frame_pointer_needed)
9662 /* Don't allow more than 32k pop, since that's all we can do
9663 with one instruction. */
9664 if (crtl->args.pops_args && crtl->args.size >= 32768)
9667 ix86_compute_frame_layout (&frame);
9668 return (frame.stack_pointer_offset == UNITS_PER_WORD
9669 && (frame.nregs + frame.nsseregs) == 0);
9672 /* Value should be nonzero if functions must have frame pointers.
9673 Zero means the frame pointer need not be set up (and parms may
9674 be accessed via the stack pointer) in functions that seem suitable. */
9677 ix86_frame_pointer_required (void)
9679 /* If we accessed previous frames, then the generated code expects
9680 to be able to access the saved ebp value in our frame. */
9681 if (cfun->machine->accesses_prev_frame)
9684 /* Several x86 os'es need a frame pointer for other reasons,
9685 usually pertaining to setjmp. */
9686 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9689 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9690 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9693 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9694 allocation is 4GB. */
9695 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9698 /* SSE saves require frame-pointer when stack is misaligned. */
9699 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
9702 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9703 turns off the frame pointer by default. Turn it back on now if
9704 we've not got a leaf function. */
9705 if (TARGET_OMIT_LEAF_FRAME_POINTER
9707 || ix86_current_function_calls_tls_descriptor))
9710 if (crtl->profile && !flag_fentry)
9716 /* Record that the current function accesses previous call frames. */
9719 ix86_setup_frame_addresses (void)
9721 cfun->machine->accesses_prev_frame = 1;
9724 #ifndef USE_HIDDEN_LINKONCE
9725 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9726 # define USE_HIDDEN_LINKONCE 1
9728 # define USE_HIDDEN_LINKONCE 0
9732 static int pic_labels_used;
9734 /* Fills in the label name that should be used for a pc thunk for
9735 the given register. */
9738 get_pc_thunk_name (char name[32], unsigned int regno)
9740 gcc_assert (!TARGET_64BIT);
9742 if (USE_HIDDEN_LINKONCE)
9743 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9745 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9749 /* This function generates code for -fpic that loads %ebx with
9750 the return address of the caller and then returns. */
9753 ix86_code_end (void)
9758 for (regno = AX_REG; regno <= SP_REG; regno++)
9763 if (!(pic_labels_used & (1 << regno)))
9766 get_pc_thunk_name (name, regno);
9768 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9769 get_identifier (name),
9770 build_function_type_list (void_type_node, NULL_TREE));
9771 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9772 NULL_TREE, void_type_node);
9773 TREE_PUBLIC (decl) = 1;
9774 TREE_STATIC (decl) = 1;
9775 DECL_IGNORED_P (decl) = 1;
9780 switch_to_section (darwin_sections[text_coal_section]);
9781 fputs ("\t.weak_definition\t", asm_out_file);
9782 assemble_name (asm_out_file, name);
9783 fputs ("\n\t.private_extern\t", asm_out_file);
9784 assemble_name (asm_out_file, name);
9785 putc ('\n', asm_out_file);
9786 ASM_OUTPUT_LABEL (asm_out_file, name);
9787 DECL_WEAK (decl) = 1;
9791 if (USE_HIDDEN_LINKONCE)
9793 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9795 targetm.asm_out.unique_section (decl, 0);
9796 switch_to_section (get_named_section (decl, NULL, 0));
9798 targetm.asm_out.globalize_label (asm_out_file, name);
9799 fputs ("\t.hidden\t", asm_out_file);
9800 assemble_name (asm_out_file, name);
9801 putc ('\n', asm_out_file);
9802 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9806 switch_to_section (text_section);
9807 ASM_OUTPUT_LABEL (asm_out_file, name);
9810 DECL_INITIAL (decl) = make_node (BLOCK);
9811 current_function_decl = decl;
9812 init_function_start (decl);
9813 first_function_block_is_cold = false;
9814 /* Make sure unwind info is emitted for the thunk if needed. */
9815 final_start_function (emit_barrier (), asm_out_file, 1);
9817 /* Pad stack IP move with 4 instructions (two NOPs count
9818 as one instruction). */
9819 if (TARGET_PAD_SHORT_FUNCTION)
9824 fputs ("\tnop\n", asm_out_file);
9827 xops[0] = gen_rtx_REG (Pmode, regno);
9828 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9829 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9830 output_asm_insn ("%!ret", NULL);
9831 final_end_function ();
9832 init_insn_lengths ();
9833 free_after_compilation (cfun);
9835 current_function_decl = NULL;
9838 if (flag_split_stack)
9839 file_end_indicate_split_stack ();
9842 /* Emit code for the SET_GOT patterns. */
9845 output_set_got (rtx dest, rtx label)
9851 if (TARGET_VXWORKS_RTP && flag_pic)
9853 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9854 xops[2] = gen_rtx_MEM (Pmode,
9855 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9856 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9858 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9859 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9860 an unadorned address. */
9861 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9862 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9863 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9867 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9872 /* We don't need a pic base, we're not producing pic. */
9875 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9876 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9877 targetm.asm_out.internal_label (asm_out_file, "L",
9878 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9883 get_pc_thunk_name (name, REGNO (dest));
9884 pic_labels_used |= 1 << REGNO (dest);
9886 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9887 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9888 output_asm_insn ("%!call\t%X2", xops);
9891 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9892 This is what will be referenced by the Mach-O PIC subsystem. */
9893 if (machopic_should_output_picbase_label () || !label)
9894 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9896 /* When we are restoring the pic base at the site of a nonlocal label,
9897 and we decided to emit the pic base above, we will still output a
9898 local label used for calculating the correction offset (even though
9899 the offset will be 0 in that case). */
9901 targetm.asm_out.internal_label (asm_out_file, "L",
9902 CODE_LABEL_NUMBER (label));
9907 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9912 /* Generate an "push" pattern for input ARG. */
9917 struct machine_function *m = cfun->machine;
9919 if (m->fs.cfa_reg == stack_pointer_rtx)
9920 m->fs.cfa_offset += UNITS_PER_WORD;
9921 m->fs.sp_offset += UNITS_PER_WORD;
9923 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9924 arg = gen_rtx_REG (word_mode, REGNO (arg));
9926 return gen_rtx_SET (VOIDmode,
9927 gen_rtx_MEM (word_mode,
9928 gen_rtx_PRE_DEC (Pmode,
9929 stack_pointer_rtx)),
9933 /* Generate an "pop" pattern for input ARG. */
9938 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9939 arg = gen_rtx_REG (word_mode, REGNO (arg));
9941 return gen_rtx_SET (VOIDmode,
9943 gen_rtx_MEM (word_mode,
9944 gen_rtx_POST_INC (Pmode,
9945 stack_pointer_rtx)));
9948 /* Return >= 0 if there is an unused call-clobbered register available
9949 for the entire function. */
9952 ix86_select_alt_pic_regnum (void)
9954 if (ix86_use_pseudo_pic_reg ())
9955 return INVALID_REGNUM;
9959 && !ix86_current_function_calls_tls_descriptor)
9962 /* Can't use the same register for both PIC and DRAP. */
9964 drap = REGNO (crtl->drap_reg);
9967 for (i = 2; i >= 0; --i)
9968 if (i != drap && !df_regs_ever_live_p (i))
9972 return INVALID_REGNUM;
9975 /* Return TRUE if we need to save REGNO. */
9978 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9980 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9981 && pic_offset_table_rtx)
9983 if (ix86_use_pseudo_pic_reg ())
9985 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9986 _mcount in prologue. */
9987 if (!TARGET_64BIT && flag_pic && crtl->profile)
9990 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9992 || crtl->calls_eh_return
9993 || crtl->uses_const_pool
9994 || cfun->has_nonlocal_label)
9995 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9998 if (crtl->calls_eh_return && maybe_eh_return)
10003 unsigned test = EH_RETURN_DATA_REGNO (i);
10004 if (test == INVALID_REGNUM)
10012 && regno == REGNO (crtl->drap_reg)
10013 && !cfun->machine->no_drap_save_restore)
10016 return (df_regs_ever_live_p (regno)
10017 && !call_used_regs[regno]
10018 && !fixed_regs[regno]
10019 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10022 /* Return number of saved general prupose registers. */
10025 ix86_nsaved_regs (void)
10030 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10031 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10036 /* Return number of saved SSE registrers. */
10039 ix86_nsaved_sseregs (void)
10044 if (!TARGET_64BIT_MS_ABI)
10046 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10047 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10052 /* Given FROM and TO register numbers, say whether this elimination is
10053 allowed. If stack alignment is needed, we can only replace argument
10054 pointer with hard frame pointer, or replace frame pointer with stack
10055 pointer. Otherwise, frame pointer elimination is automatically
10056 handled and all other eliminations are valid. */
10059 ix86_can_eliminate (const int from, const int to)
10061 if (stack_realign_fp)
10062 return ((from == ARG_POINTER_REGNUM
10063 && to == HARD_FRAME_POINTER_REGNUM)
10064 || (from == FRAME_POINTER_REGNUM
10065 && to == STACK_POINTER_REGNUM));
10067 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10070 /* Return the offset between two registers, one to be eliminated, and the other
10071 its replacement, at the start of a routine. */
10074 ix86_initial_elimination_offset (int from, int to)
10076 struct ix86_frame frame;
10077 ix86_compute_frame_layout (&frame);
10079 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10080 return frame.hard_frame_pointer_offset;
10081 else if (from == FRAME_POINTER_REGNUM
10082 && to == HARD_FRAME_POINTER_REGNUM)
10083 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10086 gcc_assert (to == STACK_POINTER_REGNUM);
10088 if (from == ARG_POINTER_REGNUM)
10089 return frame.stack_pointer_offset;
10091 gcc_assert (from == FRAME_POINTER_REGNUM);
10092 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10096 /* In a dynamically-aligned function, we can't know the offset from
10097 stack pointer to frame pointer, so we must ensure that setjmp
10098 eliminates fp against the hard fp (%ebp) rather than trying to
10099 index from %esp up to the top of the frame across a gap that is
10100 of unknown (at compile-time) size. */
10102 ix86_builtin_setjmp_frame_value (void)
10104 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10107 /* When using -fsplit-stack, the allocation routines set a field in
10108 the TCB to the bottom of the stack plus this much space, measured
10111 #define SPLIT_STACK_AVAILABLE 256
10113 /* Fill structure ix86_frame about frame of currently computed function. */
10116 ix86_compute_frame_layout (struct ix86_frame *frame)
10118 unsigned HOST_WIDE_INT stack_alignment_needed;
10119 HOST_WIDE_INT offset;
10120 unsigned HOST_WIDE_INT preferred_alignment;
10121 HOST_WIDE_INT size = get_frame_size ();
10122 HOST_WIDE_INT to_allocate;
10124 frame->nregs = ix86_nsaved_regs ();
10125 frame->nsseregs = ix86_nsaved_sseregs ();
10127 /* 64-bit MS ABI seem to require stack alignment to be always 16,
10128 except for function prologues, leaf functions and when the defult
10129 incoming stack boundary is overriden at command line or via
10130 force_align_arg_pointer attribute. */
10131 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10132 && (!crtl->is_leaf || cfun->calls_alloca != 0
10133 || ix86_current_function_calls_tls_descriptor
10134 || ix86_incoming_stack_boundary < 128))
10136 crtl->preferred_stack_boundary = 128;
10137 crtl->stack_alignment_needed = 128;
10140 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10141 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10143 gcc_assert (!size || stack_alignment_needed);
10144 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10145 gcc_assert (preferred_alignment <= stack_alignment_needed);
10147 /* For SEH we have to limit the amount of code movement into the prologue.
10148 At present we do this via a BLOCKAGE, at which point there's very little
10149 scheduling that can be done, which means that there's very little point
10150 in doing anything except PUSHs. */
10152 cfun->machine->use_fast_prologue_epilogue = false;
10154 /* During reload iteration the amount of registers saved can change.
10155 Recompute the value as needed. Do not recompute when amount of registers
10156 didn't change as reload does multiple calls to the function and does not
10157 expect the decision to change within single iteration. */
10158 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10159 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10161 int count = frame->nregs;
10162 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10164 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10166 /* The fast prologue uses move instead of push to save registers. This
10167 is significantly longer, but also executes faster as modern hardware
10168 can execute the moves in parallel, but can't do that for push/pop.
10170 Be careful about choosing what prologue to emit: When function takes
10171 many instructions to execute we may use slow version as well as in
10172 case function is known to be outside hot spot (this is known with
10173 feedback only). Weight the size of function by number of registers
10174 to save as it is cheap to use one or two push instructions but very
10175 slow to use many of them. */
10177 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10178 if (node->frequency < NODE_FREQUENCY_NORMAL
10179 || (flag_branch_probabilities
10180 && node->frequency < NODE_FREQUENCY_HOT))
10181 cfun->machine->use_fast_prologue_epilogue = false;
10183 cfun->machine->use_fast_prologue_epilogue
10184 = !expensive_function_p (count);
10187 frame->save_regs_using_mov
10188 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10189 /* If static stack checking is enabled and done with probes,
10190 the registers need to be saved before allocating the frame. */
10191 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10193 /* Skip return address. */
10194 offset = UNITS_PER_WORD;
10196 /* Skip pushed static chain. */
10197 if (ix86_static_chain_on_stack)
10198 offset += UNITS_PER_WORD;
10200 /* Skip saved base pointer. */
10201 if (frame_pointer_needed)
10202 offset += UNITS_PER_WORD;
10203 frame->hfp_save_offset = offset;
10205 /* The traditional frame pointer location is at the top of the frame. */
10206 frame->hard_frame_pointer_offset = offset;
10208 /* Register save area */
10209 offset += frame->nregs * UNITS_PER_WORD;
10210 frame->reg_save_offset = offset;
10212 /* On SEH target, registers are pushed just before the frame pointer
10215 frame->hard_frame_pointer_offset = offset;
10217 /* Align and set SSE register save area. */
10218 if (frame->nsseregs)
10220 /* The only ABI that has saved SSE registers (Win64) also has a
10221 16-byte aligned default stack, and thus we don't need to be
10222 within the re-aligned local stack frame to save them. In case
10223 incoming stack boundary is aligned to less than 16 bytes,
10224 unaligned move of SSE register will be emitted, so there is
10225 no point to round up the SSE register save area outside the
10226 re-aligned local stack frame to 16 bytes. */
10227 if (ix86_incoming_stack_boundary >= 128)
10228 offset = (offset + 16 - 1) & -16;
10229 offset += frame->nsseregs * 16;
10231 frame->sse_reg_save_offset = offset;
10233 /* The re-aligned stack starts here. Values before this point are not
10234 directly comparable with values below this point. In order to make
10235 sure that no value happens to be the same before and after, force
10236 the alignment computation below to add a non-zero value. */
10237 if (stack_realign_fp)
10238 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10241 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10242 offset += frame->va_arg_size;
10244 /* Align start of frame for local function. */
10245 if (stack_realign_fp
10246 || offset != frame->sse_reg_save_offset
10249 || cfun->calls_alloca
10250 || ix86_current_function_calls_tls_descriptor)
10251 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10253 /* Frame pointer points here. */
10254 frame->frame_pointer_offset = offset;
10258 /* Add outgoing arguments area. Can be skipped if we eliminated
10259 all the function calls as dead code.
10260 Skipping is however impossible when function calls alloca. Alloca
10261 expander assumes that last crtl->outgoing_args_size
10262 of stack frame are unused. */
10263 if (ACCUMULATE_OUTGOING_ARGS
10264 && (!crtl->is_leaf || cfun->calls_alloca
10265 || ix86_current_function_calls_tls_descriptor))
10267 offset += crtl->outgoing_args_size;
10268 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10271 frame->outgoing_arguments_size = 0;
10273 /* Align stack boundary. Only needed if we're calling another function
10274 or using alloca. */
10275 if (!crtl->is_leaf || cfun->calls_alloca
10276 || ix86_current_function_calls_tls_descriptor)
10277 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10279 /* We've reached end of stack frame. */
10280 frame->stack_pointer_offset = offset;
10282 /* Size prologue needs to allocate. */
10283 to_allocate = offset - frame->sse_reg_save_offset;
10285 if ((!to_allocate && frame->nregs <= 1)
10286 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10287 frame->save_regs_using_mov = false;
10289 if (ix86_using_red_zone ()
10290 && crtl->sp_is_unchanging
10292 && !ix86_current_function_calls_tls_descriptor)
10294 frame->red_zone_size = to_allocate;
10295 if (frame->save_regs_using_mov)
10296 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10297 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10298 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10301 frame->red_zone_size = 0;
10302 frame->stack_pointer_offset -= frame->red_zone_size;
10304 /* The SEH frame pointer location is near the bottom of the frame.
10305 This is enforced by the fact that the difference between the
10306 stack pointer and the frame pointer is limited to 240 bytes in
10307 the unwind data structure. */
10310 HOST_WIDE_INT diff;
10312 /* If we can leave the frame pointer where it is, do so. Also, returns
10313 the establisher frame for __builtin_frame_address (0). */
10314 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10315 if (diff <= SEH_MAX_FRAME_SIZE
10316 && (diff > 240 || (diff & 15) != 0)
10317 && !crtl->accesses_prior_frames)
10319 /* Ideally we'd determine what portion of the local stack frame
10320 (within the constraint of the lowest 240) is most heavily used.
10321 But without that complication, simply bias the frame pointer
10322 by 128 bytes so as to maximize the amount of the local stack
10323 frame that is addressable with 8-bit offsets. */
10324 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10329 /* This is semi-inlined memory_address_length, but simplified
10330 since we know that we're always dealing with reg+offset, and
10331 to avoid having to create and discard all that rtl. */
10334 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10340 /* EBP and R13 cannot be encoded without an offset. */
10341 len = (regno == BP_REG || regno == R13_REG);
10343 else if (IN_RANGE (offset, -128, 127))
10346 /* ESP and R12 must be encoded with a SIB byte. */
10347 if (regno == SP_REG || regno == R12_REG)
10353 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10354 The valid base registers are taken from CFUN->MACHINE->FS. */
10357 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10359 const struct machine_function *m = cfun->machine;
10360 rtx base_reg = NULL;
10361 HOST_WIDE_INT base_offset = 0;
10363 if (m->use_fast_prologue_epilogue)
10365 /* Choose the base register most likely to allow the most scheduling
10366 opportunities. Generally FP is valid throughout the function,
10367 while DRAP must be reloaded within the epilogue. But choose either
10368 over the SP due to increased encoding size. */
10370 if (m->fs.fp_valid)
10372 base_reg = hard_frame_pointer_rtx;
10373 base_offset = m->fs.fp_offset - cfa_offset;
10375 else if (m->fs.drap_valid)
10377 base_reg = crtl->drap_reg;
10378 base_offset = 0 - cfa_offset;
10380 else if (m->fs.sp_valid)
10382 base_reg = stack_pointer_rtx;
10383 base_offset = m->fs.sp_offset - cfa_offset;
10388 HOST_WIDE_INT toffset;
10389 int len = 16, tlen;
10391 /* Choose the base register with the smallest address encoding.
10392 With a tie, choose FP > DRAP > SP. */
10393 if (m->fs.sp_valid)
10395 base_reg = stack_pointer_rtx;
10396 base_offset = m->fs.sp_offset - cfa_offset;
10397 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10399 if (m->fs.drap_valid)
10401 toffset = 0 - cfa_offset;
10402 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10405 base_reg = crtl->drap_reg;
10406 base_offset = toffset;
10410 if (m->fs.fp_valid)
10412 toffset = m->fs.fp_offset - cfa_offset;
10413 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10416 base_reg = hard_frame_pointer_rtx;
10417 base_offset = toffset;
10422 gcc_assert (base_reg != NULL);
10424 return plus_constant (Pmode, base_reg, base_offset);
10427 /* Emit code to save registers in the prologue. */
10430 ix86_emit_save_regs (void)
10432 unsigned int regno;
10435 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10436 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10438 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10439 RTX_FRAME_RELATED_P (insn) = 1;
10443 /* Emit a single register save at CFA - CFA_OFFSET. */
10446 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10447 HOST_WIDE_INT cfa_offset)
10449 struct machine_function *m = cfun->machine;
10450 rtx reg = gen_rtx_REG (mode, regno);
10451 rtx unspec = NULL_RTX;
10452 rtx mem, addr, base, insn;
10453 unsigned int align;
10455 addr = choose_baseaddr (cfa_offset);
10456 mem = gen_frame_mem (mode, addr);
10458 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
10459 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
10460 set_mem_align (mem, align);
10462 /* SSE saves are not within re-aligned local stack frame.
10463 In case INCOMING_STACK_BOUNDARY is misaligned, we have
10464 to emit unaligned store. */
10465 if (mode == V4SFmode && align < 128)
10466 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
10468 insn = emit_insn (gen_rtx_SET (VOIDmode, mem, unspec ? unspec : reg));
10469 RTX_FRAME_RELATED_P (insn) = 1;
10472 if (GET_CODE (base) == PLUS)
10473 base = XEXP (base, 0);
10474 gcc_checking_assert (REG_P (base));
10476 /* When saving registers into a re-aligned local stack frame, avoid
10477 any tricky guessing by dwarf2out. */
10478 if (m->fs.realigned)
10480 gcc_checking_assert (stack_realign_drap);
10482 if (regno == REGNO (crtl->drap_reg))
10484 /* A bit of a hack. We force the DRAP register to be saved in
10485 the re-aligned stack frame, which provides us with a copy
10486 of the CFA that will last past the prologue. Install it. */
10487 gcc_checking_assert (cfun->machine->fs.fp_valid);
10488 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10489 cfun->machine->fs.fp_offset - cfa_offset);
10490 mem = gen_rtx_MEM (mode, addr);
10491 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10495 /* The frame pointer is a stable reference within the
10496 aligned frame. Use it. */
10497 gcc_checking_assert (cfun->machine->fs.fp_valid);
10498 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10499 cfun->machine->fs.fp_offset - cfa_offset);
10500 mem = gen_rtx_MEM (mode, addr);
10501 add_reg_note (insn, REG_CFA_EXPRESSION,
10502 gen_rtx_SET (VOIDmode, mem, reg));
10506 /* The memory may not be relative to the current CFA register,
10507 which means that we may need to generate a new pattern for
10508 use by the unwind info. */
10509 else if (base != m->fs.cfa_reg)
10511 addr = plus_constant (Pmode, m->fs.cfa_reg,
10512 m->fs.cfa_offset - cfa_offset);
10513 mem = gen_rtx_MEM (mode, addr);
10514 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10517 add_reg_note (insn, REG_CFA_EXPRESSION,
10518 gen_rtx_SET (VOIDmode, mem, reg));
10521 /* Emit code to save registers using MOV insns.
10522 First register is stored at CFA - CFA_OFFSET. */
10524 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10526 unsigned int regno;
10528 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10529 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10531 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10532 cfa_offset -= UNITS_PER_WORD;
10536 /* Emit code to save SSE registers using MOV insns.
10537 First register is stored at CFA - CFA_OFFSET. */
10539 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10541 unsigned int regno;
10543 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10544 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10546 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10551 static GTY(()) rtx queued_cfa_restores;
10553 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10554 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10555 Don't add the note if the previously saved value will be left untouched
10556 within stack red-zone till return, as unwinders can find the same value
10557 in the register and on the stack. */
10560 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10562 if (!crtl->shrink_wrapped
10563 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10568 add_reg_note (insn, REG_CFA_RESTORE, reg);
10569 RTX_FRAME_RELATED_P (insn) = 1;
10572 queued_cfa_restores
10573 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10576 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10579 ix86_add_queued_cfa_restore_notes (rtx insn)
10582 if (!queued_cfa_restores)
10584 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10586 XEXP (last, 1) = REG_NOTES (insn);
10587 REG_NOTES (insn) = queued_cfa_restores;
10588 queued_cfa_restores = NULL_RTX;
10589 RTX_FRAME_RELATED_P (insn) = 1;
10592 /* Expand prologue or epilogue stack adjustment.
10593 The pattern exist to put a dependency on all ebp-based memory accesses.
10594 STYLE should be negative if instructions should be marked as frame related,
10595 zero if %r11 register is live and cannot be freely used and positive
10599 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10600 int style, bool set_cfa)
10602 struct machine_function *m = cfun->machine;
10604 bool add_frame_related_expr = false;
10606 if (Pmode == SImode)
10607 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10608 else if (x86_64_immediate_operand (offset, DImode))
10609 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10613 /* r11 is used by indirect sibcall return as well, set before the
10614 epilogue and used after the epilogue. */
10616 tmp = gen_rtx_REG (DImode, R11_REG);
10619 gcc_assert (src != hard_frame_pointer_rtx
10620 && dest != hard_frame_pointer_rtx);
10621 tmp = hard_frame_pointer_rtx;
10623 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10625 add_frame_related_expr = true;
10627 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10630 insn = emit_insn (insn);
10632 ix86_add_queued_cfa_restore_notes (insn);
10638 gcc_assert (m->fs.cfa_reg == src);
10639 m->fs.cfa_offset += INTVAL (offset);
10640 m->fs.cfa_reg = dest;
10642 r = gen_rtx_PLUS (Pmode, src, offset);
10643 r = gen_rtx_SET (VOIDmode, dest, r);
10644 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10645 RTX_FRAME_RELATED_P (insn) = 1;
10647 else if (style < 0)
10649 RTX_FRAME_RELATED_P (insn) = 1;
10650 if (add_frame_related_expr)
10652 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10653 r = gen_rtx_SET (VOIDmode, dest, r);
10654 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10658 if (dest == stack_pointer_rtx)
10660 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10661 bool valid = m->fs.sp_valid;
10663 if (src == hard_frame_pointer_rtx)
10665 valid = m->fs.fp_valid;
10666 ooffset = m->fs.fp_offset;
10668 else if (src == crtl->drap_reg)
10670 valid = m->fs.drap_valid;
10675 /* Else there are two possibilities: SP itself, which we set
10676 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10677 taken care of this by hand along the eh_return path. */
10678 gcc_checking_assert (src == stack_pointer_rtx
10679 || offset == const0_rtx);
10682 m->fs.sp_offset = ooffset - INTVAL (offset);
10683 m->fs.sp_valid = valid;
10687 /* Find an available register to be used as dynamic realign argument
10688 pointer regsiter. Such a register will be written in prologue and
10689 used in begin of body, so it must not be
10690 1. parameter passing register.
10692 We reuse static-chain register if it is available. Otherwise, we
10693 use DI for i386 and R13 for x86-64. We chose R13 since it has
10696 Return: the regno of chosen register. */
10698 static unsigned int
10699 find_drap_reg (void)
10701 tree decl = cfun->decl;
10705 /* Use R13 for nested function or function need static chain.
10706 Since function with tail call may use any caller-saved
10707 registers in epilogue, DRAP must not use caller-saved
10708 register in such case. */
10709 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10716 /* Use DI for nested function or function need static chain.
10717 Since function with tail call may use any caller-saved
10718 registers in epilogue, DRAP must not use caller-saved
10719 register in such case. */
10720 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10723 /* Reuse static chain register if it isn't used for parameter
10725 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10727 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10728 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10735 /* Handle a "force_align_arg_pointer" attribute. */
10738 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
10739 tree, int, bool *no_add_attrs)
10741 if (TREE_CODE (*node) != FUNCTION_TYPE
10742 && TREE_CODE (*node) != METHOD_TYPE
10743 && TREE_CODE (*node) != FIELD_DECL
10744 && TREE_CODE (*node) != TYPE_DECL)
10746 warning (OPT_Wattributes, "%qE attribute only applies to functions",
10748 *no_add_attrs = true;
10754 /* Return minimum incoming stack alignment. */
10756 static unsigned int
10757 ix86_minimum_incoming_stack_boundary (bool sibcall)
10759 unsigned int incoming_stack_boundary;
10761 /* Prefer the one specified at command line. */
10762 if (ix86_user_incoming_stack_boundary)
10763 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10764 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10765 if -mstackrealign is used, it isn't used for sibcall check and
10766 estimated stack alignment is 128bit. */
10768 && ix86_force_align_arg_pointer
10769 && crtl->stack_alignment_estimated == 128)
10770 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10772 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10774 /* Incoming stack alignment can be changed on individual functions
10775 via force_align_arg_pointer attribute. We use the smallest
10776 incoming stack boundary. */
10777 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10778 && lookup_attribute (ix86_force_align_arg_pointer_string,
10779 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10780 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10782 /* The incoming stack frame has to be aligned at least at
10783 parm_stack_boundary. */
10784 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10785 incoming_stack_boundary = crtl->parm_stack_boundary;
10787 /* Stack at entrance of main is aligned by runtime. We use the
10788 smallest incoming stack boundary. */
10789 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10790 && DECL_NAME (current_function_decl)
10791 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10792 && DECL_FILE_SCOPE_P (current_function_decl))
10793 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10795 return incoming_stack_boundary;
10798 /* Update incoming stack boundary and estimated stack alignment. */
10801 ix86_update_stack_boundary (void)
10803 ix86_incoming_stack_boundary
10804 = ix86_minimum_incoming_stack_boundary (false);
10806 /* x86_64 vararg needs 16byte stack alignment for register save
10810 && crtl->stack_alignment_estimated < 128)
10811 crtl->stack_alignment_estimated = 128;
10813 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
10814 if (ix86_tls_descriptor_calls_expanded_in_cfun
10815 && crtl->preferred_stack_boundary < 128)
10816 crtl->preferred_stack_boundary = 128;
10819 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10820 needed or an rtx for DRAP otherwise. */
10823 ix86_get_drap_rtx (void)
10825 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10826 crtl->need_drap = true;
10828 if (stack_realign_drap)
10830 /* Assign DRAP to vDRAP and returns vDRAP */
10831 unsigned int regno = find_drap_reg ();
10834 rtx_insn *seq, *insn;
10836 arg_ptr = gen_rtx_REG (Pmode, regno);
10837 crtl->drap_reg = arg_ptr;
10840 drap_vreg = copy_to_reg (arg_ptr);
10841 seq = get_insns ();
10844 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10847 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10848 RTX_FRAME_RELATED_P (insn) = 1;
10856 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10859 ix86_internal_arg_pointer (void)
10861 return virtual_incoming_args_rtx;
10864 struct scratch_reg {
10869 /* Return a short-lived scratch register for use on function entry.
10870 In 32-bit mode, it is valid only after the registers are saved
10871 in the prologue. This register must be released by means of
10872 release_scratch_register_on_entry once it is dead. */
10875 get_scratch_register_on_entry (struct scratch_reg *sr)
10883 /* We always use R11 in 64-bit mode. */
10888 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10890 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10892 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10893 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10894 int regparm = ix86_function_regparm (fntype, decl);
10896 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10898 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10899 for the static chain register. */
10900 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10901 && drap_regno != AX_REG)
10903 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10904 for the static chain register. */
10905 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10907 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10909 /* ecx is the static chain register. */
10910 else if (regparm < 3 && !fastcall_p && !thiscall_p
10912 && drap_regno != CX_REG)
10914 else if (ix86_save_reg (BX_REG, true))
10916 /* esi is the static chain register. */
10917 else if (!(regparm == 3 && static_chain_p)
10918 && ix86_save_reg (SI_REG, true))
10920 else if (ix86_save_reg (DI_REG, true))
10924 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10929 sr->reg = gen_rtx_REG (Pmode, regno);
10932 rtx insn = emit_insn (gen_push (sr->reg));
10933 RTX_FRAME_RELATED_P (insn) = 1;
10937 /* Release a scratch register obtained from the preceding function. */
10940 release_scratch_register_on_entry (struct scratch_reg *sr)
10944 struct machine_function *m = cfun->machine;
10945 rtx x, insn = emit_insn (gen_pop (sr->reg));
10947 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10948 RTX_FRAME_RELATED_P (insn) = 1;
10949 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10950 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10951 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10952 m->fs.sp_offset -= UNITS_PER_WORD;
10956 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10958 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10961 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10963 /* We skip the probe for the first interval + a small dope of 4 words and
10964 probe that many bytes past the specified size to maintain a protection
10965 area at the botton of the stack. */
10966 const int dope = 4 * UNITS_PER_WORD;
10967 rtx size_rtx = GEN_INT (size), last;
10969 /* See if we have a constant small number of probes to generate. If so,
10970 that's the easy case. The run-time loop is made up of 11 insns in the
10971 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10972 for n # of intervals. */
10973 if (size <= 5 * PROBE_INTERVAL)
10975 HOST_WIDE_INT i, adjust;
10976 bool first_probe = true;
10978 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10979 values of N from 1 until it exceeds SIZE. If only one probe is
10980 needed, this will not generate any code. Then adjust and probe
10981 to PROBE_INTERVAL + SIZE. */
10982 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10986 adjust = 2 * PROBE_INTERVAL + dope;
10987 first_probe = false;
10990 adjust = PROBE_INTERVAL;
10992 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10993 plus_constant (Pmode, stack_pointer_rtx,
10995 emit_stack_probe (stack_pointer_rtx);
10999 adjust = size + PROBE_INTERVAL + dope;
11001 adjust = size + PROBE_INTERVAL - i;
11003 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11004 plus_constant (Pmode, stack_pointer_rtx,
11006 emit_stack_probe (stack_pointer_rtx);
11008 /* Adjust back to account for the additional first interval. */
11009 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11010 plus_constant (Pmode, stack_pointer_rtx,
11011 PROBE_INTERVAL + dope)));
11014 /* Otherwise, do the same as above, but in a loop. Note that we must be
11015 extra careful with variables wrapping around because we might be at
11016 the very top (or the very bottom) of the address space and we have
11017 to be able to handle this case properly; in particular, we use an
11018 equality test for the loop condition. */
11021 HOST_WIDE_INT rounded_size;
11022 struct scratch_reg sr;
11024 get_scratch_register_on_entry (&sr);
11027 /* Step 1: round SIZE to the previous multiple of the interval. */
11029 rounded_size = size & -PROBE_INTERVAL;
11032 /* Step 2: compute initial and final value of the loop counter. */
11034 /* SP = SP_0 + PROBE_INTERVAL. */
11035 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11036 plus_constant (Pmode, stack_pointer_rtx,
11037 - (PROBE_INTERVAL + dope))));
11039 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11040 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11041 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
11042 gen_rtx_PLUS (Pmode, sr.reg,
11043 stack_pointer_rtx)));
11046 /* Step 3: the loop
11048 while (SP != LAST_ADDR)
11050 SP = SP + PROBE_INTERVAL
11054 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11055 values of N from 1 until it is equal to ROUNDED_SIZE. */
11057 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11060 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11061 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11063 if (size != rounded_size)
11065 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11066 plus_constant (Pmode, stack_pointer_rtx,
11067 rounded_size - size)));
11068 emit_stack_probe (stack_pointer_rtx);
11071 /* Adjust back to account for the additional first interval. */
11072 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11073 plus_constant (Pmode, stack_pointer_rtx,
11074 PROBE_INTERVAL + dope)));
11076 release_scratch_register_on_entry (&sr);
11079 /* Even if the stack pointer isn't the CFA register, we need to correctly
11080 describe the adjustments made to it, in particular differentiate the
11081 frame-related ones from the frame-unrelated ones. */
11084 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11085 XVECEXP (expr, 0, 0)
11086 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11087 plus_constant (Pmode, stack_pointer_rtx, -size));
11088 XVECEXP (expr, 0, 1)
11089 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11090 plus_constant (Pmode, stack_pointer_rtx,
11091 PROBE_INTERVAL + dope + size));
11092 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11093 RTX_FRAME_RELATED_P (last) = 1;
11095 cfun->machine->fs.sp_offset += size;
11098 /* Make sure nothing is scheduled before we are done. */
11099 emit_insn (gen_blockage ());
11102 /* Adjust the stack pointer up to REG while probing it. */
11105 output_adjust_stack_and_probe (rtx reg)
11107 static int labelno = 0;
11108 char loop_lab[32], end_lab[32];
11111 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11112 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11114 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11116 /* Jump to END_LAB if SP == LAST_ADDR. */
11117 xops[0] = stack_pointer_rtx;
11119 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11120 fputs ("\tje\t", asm_out_file);
11121 assemble_name_raw (asm_out_file, end_lab);
11122 fputc ('\n', asm_out_file);
11124 /* SP = SP + PROBE_INTERVAL. */
11125 xops[1] = GEN_INT (PROBE_INTERVAL);
11126 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11129 xops[1] = const0_rtx;
11130 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11132 fprintf (asm_out_file, "\tjmp\t");
11133 assemble_name_raw (asm_out_file, loop_lab);
11134 fputc ('\n', asm_out_file);
11136 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11141 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11142 inclusive. These are offsets from the current stack pointer. */
11145 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11147 /* See if we have a constant small number of probes to generate. If so,
11148 that's the easy case. The run-time loop is made up of 7 insns in the
11149 generic case while the compile-time loop is made up of n insns for n #
11151 if (size <= 7 * PROBE_INTERVAL)
11155 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11156 it exceeds SIZE. If only one probe is needed, this will not
11157 generate any code. Then probe at FIRST + SIZE. */
11158 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11159 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11162 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11166 /* Otherwise, do the same as above, but in a loop. Note that we must be
11167 extra careful with variables wrapping around because we might be at
11168 the very top (or the very bottom) of the address space and we have
11169 to be able to handle this case properly; in particular, we use an
11170 equality test for the loop condition. */
11173 HOST_WIDE_INT rounded_size, last;
11174 struct scratch_reg sr;
11176 get_scratch_register_on_entry (&sr);
11179 /* Step 1: round SIZE to the previous multiple of the interval. */
11181 rounded_size = size & -PROBE_INTERVAL;
11184 /* Step 2: compute initial and final value of the loop counter. */
11186 /* TEST_OFFSET = FIRST. */
11187 emit_move_insn (sr.reg, GEN_INT (-first));
11189 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11190 last = first + rounded_size;
11193 /* Step 3: the loop
11195 while (TEST_ADDR != LAST_ADDR)
11197 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11201 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11202 until it is equal to ROUNDED_SIZE. */
11204 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11207 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11208 that SIZE is equal to ROUNDED_SIZE. */
11210 if (size != rounded_size)
11211 emit_stack_probe (plus_constant (Pmode,
11212 gen_rtx_PLUS (Pmode,
11215 rounded_size - size));
11217 release_scratch_register_on_entry (&sr);
11220 /* Make sure nothing is scheduled before we are done. */
11221 emit_insn (gen_blockage ());
11224 /* Probe a range of stack addresses from REG to END, inclusive. These are
11225 offsets from the current stack pointer. */
11228 output_probe_stack_range (rtx reg, rtx end)
11230 static int labelno = 0;
11231 char loop_lab[32], end_lab[32];
11234 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11235 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11237 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11239 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11242 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11243 fputs ("\tje\t", asm_out_file);
11244 assemble_name_raw (asm_out_file, end_lab);
11245 fputc ('\n', asm_out_file);
11247 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11248 xops[1] = GEN_INT (PROBE_INTERVAL);
11249 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11251 /* Probe at TEST_ADDR. */
11252 xops[0] = stack_pointer_rtx;
11254 xops[2] = const0_rtx;
11255 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11257 fprintf (asm_out_file, "\tjmp\t");
11258 assemble_name_raw (asm_out_file, loop_lab);
11259 fputc ('\n', asm_out_file);
11261 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11266 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11267 to be generated in correct form. */
11269 ix86_finalize_stack_realign_flags (void)
11271 /* Check if stack realign is really needed after reload, and
11272 stores result in cfun */
11273 unsigned int incoming_stack_boundary
11274 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11275 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11276 unsigned int stack_realign
11277 = (incoming_stack_boundary
11278 < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
11279 ? crtl->max_used_stack_slot_alignment
11280 : crtl->stack_alignment_needed));
11282 if (crtl->stack_realign_finalized)
11284 /* After stack_realign_needed is finalized, we can't no longer
11286 gcc_assert (crtl->stack_realign_needed == stack_realign);
11290 /* If the only reason for frame_pointer_needed is that we conservatively
11291 assumed stack realignment might be needed, but in the end nothing that
11292 needed the stack alignment had been spilled, clear frame_pointer_needed
11293 and say we don't need stack realignment. */
11295 && frame_pointer_needed
11297 && flag_omit_frame_pointer
11298 && crtl->sp_is_unchanging
11299 && !ix86_current_function_calls_tls_descriptor
11300 && !crtl->accesses_prior_frames
11301 && !cfun->calls_alloca
11302 && !crtl->calls_eh_return
11303 /* See ira_setup_eliminable_regset for the rationale. */
11304 && !(STACK_CHECK_MOVING_SP
11305 && flag_stack_check
11307 && cfun->can_throw_non_call_exceptions)
11308 && !ix86_frame_pointer_required ()
11309 && get_frame_size () == 0
11310 && ix86_nsaved_sseregs () == 0
11311 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11313 HARD_REG_SET set_up_by_prologue, prologue_used;
11316 CLEAR_HARD_REG_SET (prologue_used);
11317 CLEAR_HARD_REG_SET (set_up_by_prologue);
11318 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11319 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11320 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11321 HARD_FRAME_POINTER_REGNUM);
11322 FOR_EACH_BB_FN (bb, cfun)
11325 FOR_BB_INSNS (bb, insn)
11326 if (NONDEBUG_INSN_P (insn)
11327 && requires_stack_frame_p (insn, prologue_used,
11328 set_up_by_prologue))
11330 crtl->stack_realign_needed = stack_realign;
11331 crtl->stack_realign_finalized = true;
11336 /* If drap has been set, but it actually isn't live at the start
11337 of the function, there is no reason to set it up. */
11338 if (crtl->drap_reg)
11340 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11341 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11343 crtl->drap_reg = NULL_RTX;
11344 crtl->need_drap = false;
11348 cfun->machine->no_drap_save_restore = true;
11350 frame_pointer_needed = false;
11351 stack_realign = false;
11352 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11353 crtl->stack_alignment_needed = incoming_stack_boundary;
11354 crtl->stack_alignment_estimated = incoming_stack_boundary;
11355 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11356 crtl->preferred_stack_boundary = incoming_stack_boundary;
11357 df_finish_pass (true);
11358 df_scan_alloc (NULL);
11360 df_compute_regs_ever_live (true);
11364 crtl->stack_realign_needed = stack_realign;
11365 crtl->stack_realign_finalized = true;
11368 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11371 ix86_elim_entry_set_got (rtx reg)
11373 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11374 rtx_insn *c_insn = BB_HEAD (bb);
11375 if (!NONDEBUG_INSN_P (c_insn))
11376 c_insn = next_nonnote_nondebug_insn (c_insn);
11377 if (c_insn && NONJUMP_INSN_P (c_insn))
11379 rtx pat = PATTERN (c_insn);
11380 if (GET_CODE (pat) == PARALLEL)
11382 rtx vec = XVECEXP (pat, 0, 0);
11383 if (GET_CODE (vec) == SET
11384 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11385 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11386 delete_insn (c_insn);
11391 /* Expand the prologue into a bunch of separate insns. */
11394 ix86_expand_prologue (void)
11396 struct machine_function *m = cfun->machine;
11398 struct ix86_frame frame;
11399 HOST_WIDE_INT allocate;
11400 bool int_registers_saved;
11401 bool sse_registers_saved;
11403 ix86_finalize_stack_realign_flags ();
11405 /* DRAP should not coexist with stack_realign_fp */
11406 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11408 memset (&m->fs, 0, sizeof (m->fs));
11410 /* Initialize CFA state for before the prologue. */
11411 m->fs.cfa_reg = stack_pointer_rtx;
11412 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11414 /* Track SP offset to the CFA. We continue tracking this after we've
11415 swapped the CFA register away from SP. In the case of re-alignment
11416 this is fudged; we're interested to offsets within the local frame. */
11417 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11418 m->fs.sp_valid = true;
11420 ix86_compute_frame_layout (&frame);
11422 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11424 /* We should have already generated an error for any use of
11425 ms_hook on a nested function. */
11426 gcc_checking_assert (!ix86_static_chain_on_stack);
11428 /* Check if profiling is active and we shall use profiling before
11429 prologue variant. If so sorry. */
11430 if (crtl->profile && flag_fentry != 0)
11431 sorry ("ms_hook_prologue attribute isn%'t compatible "
11432 "with -mfentry for 32-bit");
11434 /* In ix86_asm_output_function_label we emitted:
11435 8b ff movl.s %edi,%edi
11437 8b ec movl.s %esp,%ebp
11439 This matches the hookable function prologue in Win32 API
11440 functions in Microsoft Windows XP Service Pack 2 and newer.
11441 Wine uses this to enable Windows apps to hook the Win32 API
11442 functions provided by Wine.
11444 What that means is that we've already set up the frame pointer. */
11446 if (frame_pointer_needed
11447 && !(crtl->drap_reg && crtl->stack_realign_needed))
11451 /* We've decided to use the frame pointer already set up.
11452 Describe this to the unwinder by pretending that both
11453 push and mov insns happen right here.
11455 Putting the unwind info here at the end of the ms_hook
11456 is done so that we can make absolutely certain we get
11457 the required byte sequence at the start of the function,
11458 rather than relying on an assembler that can produce
11459 the exact encoding required.
11461 However it does mean (in the unpatched case) that we have
11462 a 1 insn window where the asynchronous unwind info is
11463 incorrect. However, if we placed the unwind info at
11464 its correct location we would have incorrect unwind info
11465 in the patched case. Which is probably all moot since
11466 I don't expect Wine generates dwarf2 unwind info for the
11467 system libraries that use this feature. */
11469 insn = emit_insn (gen_blockage ());
11471 push = gen_push (hard_frame_pointer_rtx);
11472 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11473 stack_pointer_rtx);
11474 RTX_FRAME_RELATED_P (push) = 1;
11475 RTX_FRAME_RELATED_P (mov) = 1;
11477 RTX_FRAME_RELATED_P (insn) = 1;
11478 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11479 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11481 /* Note that gen_push incremented m->fs.cfa_offset, even
11482 though we didn't emit the push insn here. */
11483 m->fs.cfa_reg = hard_frame_pointer_rtx;
11484 m->fs.fp_offset = m->fs.cfa_offset;
11485 m->fs.fp_valid = true;
11489 /* The frame pointer is not needed so pop %ebp again.
11490 This leaves us with a pristine state. */
11491 emit_insn (gen_pop (hard_frame_pointer_rtx));
11495 /* The first insn of a function that accepts its static chain on the
11496 stack is to push the register that would be filled in by a direct
11497 call. This insn will be skipped by the trampoline. */
11498 else if (ix86_static_chain_on_stack)
11500 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11501 emit_insn (gen_blockage ());
11503 /* We don't want to interpret this push insn as a register save,
11504 only as a stack adjustment. The real copy of the register as
11505 a save will be done later, if needed. */
11506 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11507 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11508 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11509 RTX_FRAME_RELATED_P (insn) = 1;
11512 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11513 of DRAP is needed and stack realignment is really needed after reload */
11514 if (stack_realign_drap)
11516 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11518 /* Only need to push parameter pointer reg if it is caller saved. */
11519 if (!call_used_regs[REGNO (crtl->drap_reg)])
11521 /* Push arg pointer reg */
11522 insn = emit_insn (gen_push (crtl->drap_reg));
11523 RTX_FRAME_RELATED_P (insn) = 1;
11526 /* Grab the argument pointer. */
11527 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11528 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11529 RTX_FRAME_RELATED_P (insn) = 1;
11530 m->fs.cfa_reg = crtl->drap_reg;
11531 m->fs.cfa_offset = 0;
11533 /* Align the stack. */
11534 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11536 GEN_INT (-align_bytes)));
11537 RTX_FRAME_RELATED_P (insn) = 1;
11539 /* Replicate the return address on the stack so that return
11540 address can be reached via (argp - 1) slot. This is needed
11541 to implement macro RETURN_ADDR_RTX and intrinsic function
11542 expand_builtin_return_addr etc. */
11543 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11544 t = gen_frame_mem (word_mode, t);
11545 insn = emit_insn (gen_push (t));
11546 RTX_FRAME_RELATED_P (insn) = 1;
11548 /* For the purposes of frame and register save area addressing,
11549 we've started over with a new frame. */
11550 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11551 m->fs.realigned = true;
11554 int_registers_saved = (frame.nregs == 0);
11555 sse_registers_saved = (frame.nsseregs == 0);
11557 if (frame_pointer_needed && !m->fs.fp_valid)
11559 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11560 slower on all targets. Also sdb doesn't like it. */
11561 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11562 RTX_FRAME_RELATED_P (insn) = 1;
11564 /* Push registers now, before setting the frame pointer
11566 if (!int_registers_saved
11568 && !frame.save_regs_using_mov)
11570 ix86_emit_save_regs ();
11571 int_registers_saved = true;
11572 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11575 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11577 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11578 RTX_FRAME_RELATED_P (insn) = 1;
11580 if (m->fs.cfa_reg == stack_pointer_rtx)
11581 m->fs.cfa_reg = hard_frame_pointer_rtx;
11582 m->fs.fp_offset = m->fs.sp_offset;
11583 m->fs.fp_valid = true;
11587 if (!int_registers_saved)
11589 /* If saving registers via PUSH, do so now. */
11590 if (!frame.save_regs_using_mov)
11592 ix86_emit_save_regs ();
11593 int_registers_saved = true;
11594 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11597 /* When using red zone we may start register saving before allocating
11598 the stack frame saving one cycle of the prologue. However, avoid
11599 doing this if we have to probe the stack; at least on x86_64 the
11600 stack probe can turn into a call that clobbers a red zone location. */
11601 else if (ix86_using_red_zone ()
11602 && (! TARGET_STACK_PROBE
11603 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11605 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11606 int_registers_saved = true;
11610 if (stack_realign_fp)
11612 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11613 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11615 /* The computation of the size of the re-aligned stack frame means
11616 that we must allocate the size of the register save area before
11617 performing the actual alignment. Otherwise we cannot guarantee
11618 that there's enough storage above the realignment point. */
11619 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11620 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11621 GEN_INT (m->fs.sp_offset
11622 - frame.sse_reg_save_offset),
11625 /* Align the stack. */
11626 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11628 GEN_INT (-align_bytes)));
11630 /* For the purposes of register save area addressing, the stack
11631 pointer is no longer valid. As for the value of sp_offset,
11632 see ix86_compute_frame_layout, which we need to match in order
11633 to pass verification of stack_pointer_offset at the end. */
11634 m->fs.sp_offset = (m->fs.sp_offset + align_bytes - 1) & -align_bytes;
11635 m->fs.sp_valid = false;
11638 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11640 if (flag_stack_usage_info)
11642 /* We start to count from ARG_POINTER. */
11643 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11645 /* If it was realigned, take into account the fake frame. */
11646 if (stack_realign_drap)
11648 if (ix86_static_chain_on_stack)
11649 stack_size += UNITS_PER_WORD;
11651 if (!call_used_regs[REGNO (crtl->drap_reg)])
11652 stack_size += UNITS_PER_WORD;
11654 /* This over-estimates by 1 minimal-stack-alignment-unit but
11655 mitigates that by counting in the new return address slot. */
11656 current_function_dynamic_stack_size
11657 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11660 current_function_static_stack_size = stack_size;
11663 /* On SEH target with very large frame size, allocate an area to save
11664 SSE registers (as the very large allocation won't be described). */
11666 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11667 && !sse_registers_saved)
11669 HOST_WIDE_INT sse_size =
11670 frame.sse_reg_save_offset - frame.reg_save_offset;
11672 gcc_assert (int_registers_saved);
11674 /* No need to do stack checking as the area will be immediately
11676 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11677 GEN_INT (-sse_size), -1,
11678 m->fs.cfa_reg == stack_pointer_rtx);
11679 allocate -= sse_size;
11680 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11681 sse_registers_saved = true;
11684 /* The stack has already been decremented by the instruction calling us
11685 so probe if the size is non-negative to preserve the protection area. */
11686 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11688 /* We expect the registers to be saved when probes are used. */
11689 gcc_assert (int_registers_saved);
11691 if (STACK_CHECK_MOVING_SP)
11693 if (!(crtl->is_leaf && !cfun->calls_alloca
11694 && allocate <= PROBE_INTERVAL))
11696 ix86_adjust_stack_and_probe (allocate);
11702 HOST_WIDE_INT size = allocate;
11704 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11705 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11707 if (TARGET_STACK_PROBE)
11709 if (crtl->is_leaf && !cfun->calls_alloca)
11711 if (size > PROBE_INTERVAL)
11712 ix86_emit_probe_stack_range (0, size);
11715 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11719 if (crtl->is_leaf && !cfun->calls_alloca)
11721 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11722 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11723 size - STACK_CHECK_PROTECT);
11726 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11733 else if (!ix86_target_stack_probe ()
11734 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11736 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11737 GEN_INT (-allocate), -1,
11738 m->fs.cfa_reg == stack_pointer_rtx);
11742 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11744 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11745 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11746 bool eax_live = ix86_eax_live_at_start_p ();
11747 bool r10_live = false;
11750 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11754 insn = emit_insn (gen_push (eax));
11755 allocate -= UNITS_PER_WORD;
11756 /* Note that SEH directives need to continue tracking the stack
11757 pointer even after the frame pointer has been set up. */
11758 if (sp_is_cfa_reg || TARGET_SEH)
11761 m->fs.cfa_offset += UNITS_PER_WORD;
11762 RTX_FRAME_RELATED_P (insn) = 1;
11763 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11764 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11765 plus_constant (Pmode, stack_pointer_rtx,
11766 -UNITS_PER_WORD)));
11772 r10 = gen_rtx_REG (Pmode, R10_REG);
11773 insn = emit_insn (gen_push (r10));
11774 allocate -= UNITS_PER_WORD;
11775 if (sp_is_cfa_reg || TARGET_SEH)
11778 m->fs.cfa_offset += UNITS_PER_WORD;
11779 RTX_FRAME_RELATED_P (insn) = 1;
11780 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11781 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11782 plus_constant (Pmode, stack_pointer_rtx,
11783 -UNITS_PER_WORD)));
11787 emit_move_insn (eax, GEN_INT (allocate));
11788 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11790 /* Use the fact that AX still contains ALLOCATE. */
11791 adjust_stack_insn = (Pmode == DImode
11792 ? gen_pro_epilogue_adjust_stack_di_sub
11793 : gen_pro_epilogue_adjust_stack_si_sub);
11795 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11796 stack_pointer_rtx, eax));
11798 if (sp_is_cfa_reg || TARGET_SEH)
11801 m->fs.cfa_offset += allocate;
11802 RTX_FRAME_RELATED_P (insn) = 1;
11803 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11804 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11805 plus_constant (Pmode, stack_pointer_rtx,
11808 m->fs.sp_offset += allocate;
11810 /* Use stack_pointer_rtx for relative addressing so that code
11811 works for realigned stack, too. */
11812 if (r10_live && eax_live)
11814 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11815 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11816 gen_frame_mem (word_mode, t));
11817 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11818 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11819 gen_frame_mem (word_mode, t));
11821 else if (eax_live || r10_live)
11823 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11824 emit_move_insn (gen_rtx_REG (word_mode,
11825 (eax_live ? AX_REG : R10_REG)),
11826 gen_frame_mem (word_mode, t));
11829 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11831 /* If we havn't already set up the frame pointer, do so now. */
11832 if (frame_pointer_needed && !m->fs.fp_valid)
11834 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11835 GEN_INT (frame.stack_pointer_offset
11836 - frame.hard_frame_pointer_offset));
11837 insn = emit_insn (insn);
11838 RTX_FRAME_RELATED_P (insn) = 1;
11839 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11841 if (m->fs.cfa_reg == stack_pointer_rtx)
11842 m->fs.cfa_reg = hard_frame_pointer_rtx;
11843 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11844 m->fs.fp_valid = true;
11847 if (!int_registers_saved)
11848 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11849 if (!sse_registers_saved)
11850 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11852 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11854 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11856 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11857 insn = emit_insn (gen_set_got (pic));
11858 RTX_FRAME_RELATED_P (insn) = 1;
11859 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11860 emit_insn (gen_prologue_use (pic));
11861 /* Deleting already emmitted SET_GOT if exist and allocated to
11862 REAL_PIC_OFFSET_TABLE_REGNUM. */
11863 ix86_elim_entry_set_got (pic);
11866 if (crtl->drap_reg && !crtl->stack_realign_needed)
11868 /* vDRAP is setup but after reload it turns out stack realign
11869 isn't necessary, here we will emit prologue to setup DRAP
11870 without stack realign adjustment */
11871 t = choose_baseaddr (0);
11872 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11875 /* Prevent instructions from being scheduled into register save push
11876 sequence when access to the redzone area is done through frame pointer.
11877 The offset between the frame pointer and the stack pointer is calculated
11878 relative to the value of the stack pointer at the end of the function
11879 prologue, and moving instructions that access redzone area via frame
11880 pointer inside push sequence violates this assumption. */
11881 if (frame_pointer_needed && frame.red_zone_size)
11882 emit_insn (gen_memory_blockage ());
11884 /* Emit cld instruction if stringops are used in the function. */
11885 if (TARGET_CLD && ix86_current_function_needs_cld)
11886 emit_insn (gen_cld ());
11888 /* SEH requires that the prologue end within 256 bytes of the start of
11889 the function. Prevent instruction schedules that would extend that.
11890 Further, prevent alloca modifications to the stack pointer from being
11891 combined with prologue modifications. */
11893 emit_insn (gen_prologue_use (stack_pointer_rtx));
11896 /* Emit code to restore REG using a POP insn. */
11899 ix86_emit_restore_reg_using_pop (rtx reg)
11901 struct machine_function *m = cfun->machine;
11902 rtx insn = emit_insn (gen_pop (reg));
11904 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11905 m->fs.sp_offset -= UNITS_PER_WORD;
11907 if (m->fs.cfa_reg == crtl->drap_reg
11908 && REGNO (reg) == REGNO (crtl->drap_reg))
11910 /* Previously we'd represented the CFA as an expression
11911 like *(%ebp - 8). We've just popped that value from
11912 the stack, which means we need to reset the CFA to
11913 the drap register. This will remain until we restore
11914 the stack pointer. */
11915 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11916 RTX_FRAME_RELATED_P (insn) = 1;
11918 /* This means that the DRAP register is valid for addressing too. */
11919 m->fs.drap_valid = true;
11923 if (m->fs.cfa_reg == stack_pointer_rtx)
11925 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11926 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11927 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11928 RTX_FRAME_RELATED_P (insn) = 1;
11930 m->fs.cfa_offset -= UNITS_PER_WORD;
11933 /* When the frame pointer is the CFA, and we pop it, we are
11934 swapping back to the stack pointer as the CFA. This happens
11935 for stack frames that don't allocate other data, so we assume
11936 the stack pointer is now pointing at the return address, i.e.
11937 the function entry state, which makes the offset be 1 word. */
11938 if (reg == hard_frame_pointer_rtx)
11940 m->fs.fp_valid = false;
11941 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11943 m->fs.cfa_reg = stack_pointer_rtx;
11944 m->fs.cfa_offset -= UNITS_PER_WORD;
11946 add_reg_note (insn, REG_CFA_DEF_CFA,
11947 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11948 GEN_INT (m->fs.cfa_offset)));
11949 RTX_FRAME_RELATED_P (insn) = 1;
11954 /* Emit code to restore saved registers using POP insns. */
11957 ix86_emit_restore_regs_using_pop (void)
11959 unsigned int regno;
11961 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11962 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11963 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11966 /* Emit code and notes for the LEAVE instruction. */
11969 ix86_emit_leave (void)
11971 struct machine_function *m = cfun->machine;
11972 rtx insn = emit_insn (ix86_gen_leave ());
11974 ix86_add_queued_cfa_restore_notes (insn);
11976 gcc_assert (m->fs.fp_valid);
11977 m->fs.sp_valid = true;
11978 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11979 m->fs.fp_valid = false;
11981 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11983 m->fs.cfa_reg = stack_pointer_rtx;
11984 m->fs.cfa_offset = m->fs.sp_offset;
11986 add_reg_note (insn, REG_CFA_DEF_CFA,
11987 plus_constant (Pmode, stack_pointer_rtx,
11989 RTX_FRAME_RELATED_P (insn) = 1;
11991 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11995 /* Emit code to restore saved registers using MOV insns.
11996 First register is restored from CFA - CFA_OFFSET. */
11998 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11999 bool maybe_eh_return)
12001 struct machine_function *m = cfun->machine;
12002 unsigned int regno;
12004 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12005 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12007 rtx reg = gen_rtx_REG (word_mode, regno);
12010 mem = choose_baseaddr (cfa_offset);
12011 mem = gen_frame_mem (word_mode, mem);
12012 insn = emit_move_insn (reg, mem);
12014 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12016 /* Previously we'd represented the CFA as an expression
12017 like *(%ebp - 8). We've just popped that value from
12018 the stack, which means we need to reset the CFA to
12019 the drap register. This will remain until we restore
12020 the stack pointer. */
12021 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12022 RTX_FRAME_RELATED_P (insn) = 1;
12024 /* This means that the DRAP register is valid for addressing. */
12025 m->fs.drap_valid = true;
12028 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
12030 cfa_offset -= UNITS_PER_WORD;
12034 /* Emit code to restore saved registers using MOV insns.
12035 First register is restored from CFA - CFA_OFFSET. */
12037 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12038 bool maybe_eh_return)
12040 unsigned int regno;
12042 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12043 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12045 rtx reg = gen_rtx_REG (V4SFmode, regno);
12047 unsigned int align;
12049 mem = choose_baseaddr (cfa_offset);
12050 mem = gen_rtx_MEM (V4SFmode, mem);
12052 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
12053 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
12054 set_mem_align (mem, align);
12056 /* SSE saves are not within re-aligned local stack frame.
12057 In case INCOMING_STACK_BOUNDARY is misaligned, we have
12058 to emit unaligned load. */
12061 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
12063 emit_insn (gen_rtx_SET (VOIDmode, reg, unspec));
12066 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
12068 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
12074 /* Restore function stack, frame, and registers. */
12077 ix86_expand_epilogue (int style)
12079 struct machine_function *m = cfun->machine;
12080 struct machine_frame_state frame_state_save = m->fs;
12081 struct ix86_frame frame;
12082 bool restore_regs_via_mov;
12085 ix86_finalize_stack_realign_flags ();
12086 ix86_compute_frame_layout (&frame);
12088 m->fs.sp_valid = (!frame_pointer_needed
12089 || (crtl->sp_is_unchanging
12090 && !stack_realign_fp));
12091 gcc_assert (!m->fs.sp_valid
12092 || m->fs.sp_offset == frame.stack_pointer_offset);
12094 /* The FP must be valid if the frame pointer is present. */
12095 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12096 gcc_assert (!m->fs.fp_valid
12097 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12099 /* We must have *some* valid pointer to the stack frame. */
12100 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12102 /* The DRAP is never valid at this point. */
12103 gcc_assert (!m->fs.drap_valid);
12105 /* See the comment about red zone and frame
12106 pointer usage in ix86_expand_prologue. */
12107 if (frame_pointer_needed && frame.red_zone_size)
12108 emit_insn (gen_memory_blockage ());
12110 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12111 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12113 /* Determine the CFA offset of the end of the red-zone. */
12114 m->fs.red_zone_offset = 0;
12115 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12117 /* The red-zone begins below the return address. */
12118 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12120 /* When the register save area is in the aligned portion of
12121 the stack, determine the maximum runtime displacement that
12122 matches up with the aligned frame. */
12123 if (stack_realign_drap)
12124 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12128 /* Special care must be taken for the normal return case of a function
12129 using eh_return: the eax and edx registers are marked as saved, but
12130 not restored along this path. Adjust the save location to match. */
12131 if (crtl->calls_eh_return && style != 2)
12132 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12134 /* EH_RETURN requires the use of moves to function properly. */
12135 if (crtl->calls_eh_return)
12136 restore_regs_via_mov = true;
12137 /* SEH requires the use of pops to identify the epilogue. */
12138 else if (TARGET_SEH)
12139 restore_regs_via_mov = false;
12140 /* If we're only restoring one register and sp is not valid then
12141 using a move instruction to restore the register since it's
12142 less work than reloading sp and popping the register. */
12143 else if (!m->fs.sp_valid && frame.nregs <= 1)
12144 restore_regs_via_mov = true;
12145 else if (TARGET_EPILOGUE_USING_MOVE
12146 && cfun->machine->use_fast_prologue_epilogue
12147 && (frame.nregs > 1
12148 || m->fs.sp_offset != frame.reg_save_offset))
12149 restore_regs_via_mov = true;
12150 else if (frame_pointer_needed
12152 && m->fs.sp_offset != frame.reg_save_offset)
12153 restore_regs_via_mov = true;
12154 else if (frame_pointer_needed
12155 && TARGET_USE_LEAVE
12156 && cfun->machine->use_fast_prologue_epilogue
12157 && frame.nregs == 1)
12158 restore_regs_via_mov = true;
12160 restore_regs_via_mov = false;
12162 if (restore_regs_via_mov || frame.nsseregs)
12164 /* Ensure that the entire register save area is addressable via
12165 the stack pointer, if we will restore via sp. */
12167 && m->fs.sp_offset > 0x7fffffff
12168 && !(m->fs.fp_valid || m->fs.drap_valid)
12169 && (frame.nsseregs + frame.nregs) != 0)
12171 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12172 GEN_INT (m->fs.sp_offset
12173 - frame.sse_reg_save_offset),
12175 m->fs.cfa_reg == stack_pointer_rtx);
12179 /* If there are any SSE registers to restore, then we have to do it
12180 via moves, since there's obviously no pop for SSE regs. */
12181 if (frame.nsseregs)
12182 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12185 if (restore_regs_via_mov)
12190 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12192 /* eh_return epilogues need %ecx added to the stack pointer. */
12195 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12197 /* Stack align doesn't work with eh_return. */
12198 gcc_assert (!stack_realign_drap);
12199 /* Neither does regparm nested functions. */
12200 gcc_assert (!ix86_static_chain_on_stack);
12202 if (frame_pointer_needed)
12204 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12205 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12206 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12208 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12209 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12211 /* Note that we use SA as a temporary CFA, as the return
12212 address is at the proper place relative to it. We
12213 pretend this happens at the FP restore insn because
12214 prior to this insn the FP would be stored at the wrong
12215 offset relative to SA, and after this insn we have no
12216 other reasonable register to use for the CFA. We don't
12217 bother resetting the CFA to the SP for the duration of
12218 the return insn. */
12219 add_reg_note (insn, REG_CFA_DEF_CFA,
12220 plus_constant (Pmode, sa, UNITS_PER_WORD));
12221 ix86_add_queued_cfa_restore_notes (insn);
12222 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12223 RTX_FRAME_RELATED_P (insn) = 1;
12225 m->fs.cfa_reg = sa;
12226 m->fs.cfa_offset = UNITS_PER_WORD;
12227 m->fs.fp_valid = false;
12229 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12230 const0_rtx, style, false);
12234 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12235 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12236 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12237 ix86_add_queued_cfa_restore_notes (insn);
12239 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12240 if (m->fs.cfa_offset != UNITS_PER_WORD)
12242 m->fs.cfa_offset = UNITS_PER_WORD;
12243 add_reg_note (insn, REG_CFA_DEF_CFA,
12244 plus_constant (Pmode, stack_pointer_rtx,
12246 RTX_FRAME_RELATED_P (insn) = 1;
12249 m->fs.sp_offset = UNITS_PER_WORD;
12250 m->fs.sp_valid = true;
12255 /* SEH requires that the function end with (1) a stack adjustment
12256 if necessary, (2) a sequence of pops, and (3) a return or
12257 jump instruction. Prevent insns from the function body from
12258 being scheduled into this sequence. */
12261 /* Prevent a catch region from being adjacent to the standard
12262 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12263 several other flags that would be interesting to test are
12265 if (flag_non_call_exceptions)
12266 emit_insn (gen_nops (const1_rtx));
12268 emit_insn (gen_blockage ());
12271 /* First step is to deallocate the stack frame so that we can
12272 pop the registers. Also do it on SEH target for very large
12273 frame as the emitted instructions aren't allowed by the ABI in
12275 if (!m->fs.sp_valid
12277 && (m->fs.sp_offset - frame.reg_save_offset
12278 >= SEH_MAX_FRAME_SIZE)))
12280 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12281 GEN_INT (m->fs.fp_offset
12282 - frame.reg_save_offset),
12285 else if (m->fs.sp_offset != frame.reg_save_offset)
12287 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12288 GEN_INT (m->fs.sp_offset
12289 - frame.reg_save_offset),
12291 m->fs.cfa_reg == stack_pointer_rtx);
12294 ix86_emit_restore_regs_using_pop ();
12297 /* If we used a stack pointer and haven't already got rid of it,
12299 if (m->fs.fp_valid)
12301 /* If the stack pointer is valid and pointing at the frame
12302 pointer store address, then we only need a pop. */
12303 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12304 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12305 /* Leave results in shorter dependency chains on CPUs that are
12306 able to grok it fast. */
12307 else if (TARGET_USE_LEAVE
12308 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12309 || !cfun->machine->use_fast_prologue_epilogue)
12310 ix86_emit_leave ();
12313 pro_epilogue_adjust_stack (stack_pointer_rtx,
12314 hard_frame_pointer_rtx,
12315 const0_rtx, style, !using_drap);
12316 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12322 int param_ptr_offset = UNITS_PER_WORD;
12325 gcc_assert (stack_realign_drap);
12327 if (ix86_static_chain_on_stack)
12328 param_ptr_offset += UNITS_PER_WORD;
12329 if (!call_used_regs[REGNO (crtl->drap_reg)])
12330 param_ptr_offset += UNITS_PER_WORD;
12332 insn = emit_insn (gen_rtx_SET
12333 (VOIDmode, stack_pointer_rtx,
12334 gen_rtx_PLUS (Pmode,
12336 GEN_INT (-param_ptr_offset))));
12337 m->fs.cfa_reg = stack_pointer_rtx;
12338 m->fs.cfa_offset = param_ptr_offset;
12339 m->fs.sp_offset = param_ptr_offset;
12340 m->fs.realigned = false;
12342 add_reg_note (insn, REG_CFA_DEF_CFA,
12343 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12344 GEN_INT (param_ptr_offset)));
12345 RTX_FRAME_RELATED_P (insn) = 1;
12347 if (!call_used_regs[REGNO (crtl->drap_reg)])
12348 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12351 /* At this point the stack pointer must be valid, and we must have
12352 restored all of the registers. We may not have deallocated the
12353 entire stack frame. We've delayed this until now because it may
12354 be possible to merge the local stack deallocation with the
12355 deallocation forced by ix86_static_chain_on_stack. */
12356 gcc_assert (m->fs.sp_valid);
12357 gcc_assert (!m->fs.fp_valid);
12358 gcc_assert (!m->fs.realigned);
12359 if (m->fs.sp_offset != UNITS_PER_WORD)
12361 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12362 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12366 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12368 /* Sibcall epilogues don't want a return instruction. */
12371 m->fs = frame_state_save;
12375 if (crtl->args.pops_args && crtl->args.size)
12377 rtx popc = GEN_INT (crtl->args.pops_args);
12379 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12380 address, do explicit add, and jump indirectly to the caller. */
12382 if (crtl->args.pops_args >= 65536)
12384 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12387 /* There is no "pascal" calling convention in any 64bit ABI. */
12388 gcc_assert (!TARGET_64BIT);
12390 insn = emit_insn (gen_pop (ecx));
12391 m->fs.cfa_offset -= UNITS_PER_WORD;
12392 m->fs.sp_offset -= UNITS_PER_WORD;
12394 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12395 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12396 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12397 add_reg_note (insn, REG_CFA_REGISTER,
12398 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12399 RTX_FRAME_RELATED_P (insn) = 1;
12401 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12403 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12406 emit_jump_insn (gen_simple_return_pop_internal (popc));
12409 emit_jump_insn (gen_simple_return_internal ());
12411 /* Restore the state back to the state from the prologue,
12412 so that it's correct for the next epilogue. */
12413 m->fs = frame_state_save;
12416 /* Reset from the function's potential modifications. */
12419 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12421 if (pic_offset_table_rtx
12422 && !ix86_use_pseudo_pic_reg ())
12423 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12425 /* Mach-O doesn't support labels at the end of objects, so if
12426 it looks like we might want one, insert a NOP. */
12428 rtx_insn *insn = get_last_insn ();
12429 rtx_insn *deleted_debug_label = NULL;
12432 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12434 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12435 notes only, instead set their CODE_LABEL_NUMBER to -1,
12436 otherwise there would be code generation differences
12437 in between -g and -g0. */
12438 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12439 deleted_debug_label = insn;
12440 insn = PREV_INSN (insn);
12445 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12446 fputs ("\tnop\n", file);
12447 else if (deleted_debug_label)
12448 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12449 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12450 CODE_LABEL_NUMBER (insn) = -1;
12456 /* Return a scratch register to use in the split stack prologue. The
12457 split stack prologue is used for -fsplit-stack. It is the first
12458 instructions in the function, even before the regular prologue.
12459 The scratch register can be any caller-saved register which is not
12460 used for parameters or for the static chain. */
12462 static unsigned int
12463 split_stack_prologue_scratch_regno (void)
12469 bool is_fastcall, is_thiscall;
12472 is_fastcall = (lookup_attribute ("fastcall",
12473 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12475 is_thiscall = (lookup_attribute ("thiscall",
12476 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12478 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12482 if (DECL_STATIC_CHAIN (cfun->decl))
12484 sorry ("-fsplit-stack does not support fastcall with "
12485 "nested function");
12486 return INVALID_REGNUM;
12490 else if (is_thiscall)
12492 if (!DECL_STATIC_CHAIN (cfun->decl))
12496 else if (regparm < 3)
12498 if (!DECL_STATIC_CHAIN (cfun->decl))
12504 sorry ("-fsplit-stack does not support 2 register "
12505 "parameters for a nested function");
12506 return INVALID_REGNUM;
12513 /* FIXME: We could make this work by pushing a register
12514 around the addition and comparison. */
12515 sorry ("-fsplit-stack does not support 3 register parameters");
12516 return INVALID_REGNUM;
12521 /* A SYMBOL_REF for the function which allocates new stackspace for
12524 static GTY(()) rtx split_stack_fn;
12526 /* A SYMBOL_REF for the more stack function when using the large
12529 static GTY(()) rtx split_stack_fn_large;
12531 /* Handle -fsplit-stack. These are the first instructions in the
12532 function, even before the regular prologue. */
12535 ix86_expand_split_stack_prologue (void)
12537 struct ix86_frame frame;
12538 HOST_WIDE_INT allocate;
12539 unsigned HOST_WIDE_INT args_size;
12540 rtx_code_label *label;
12541 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12542 rtx scratch_reg = NULL_RTX;
12543 rtx_code_label *varargs_label = NULL;
12546 gcc_assert (flag_split_stack && reload_completed);
12548 ix86_finalize_stack_realign_flags ();
12549 ix86_compute_frame_layout (&frame);
12550 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12552 /* This is the label we will branch to if we have enough stack
12553 space. We expect the basic block reordering pass to reverse this
12554 branch if optimizing, so that we branch in the unlikely case. */
12555 label = gen_label_rtx ();
12557 /* We need to compare the stack pointer minus the frame size with
12558 the stack boundary in the TCB. The stack boundary always gives
12559 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12560 can compare directly. Otherwise we need to do an addition. */
12562 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12563 UNSPEC_STACK_CHECK);
12564 limit = gen_rtx_CONST (Pmode, limit);
12565 limit = gen_rtx_MEM (Pmode, limit);
12566 if (allocate < SPLIT_STACK_AVAILABLE)
12567 current = stack_pointer_rtx;
12570 unsigned int scratch_regno;
12573 /* We need a scratch register to hold the stack pointer minus
12574 the required frame size. Since this is the very start of the
12575 function, the scratch register can be any caller-saved
12576 register which is not used for parameters. */
12577 offset = GEN_INT (- allocate);
12578 scratch_regno = split_stack_prologue_scratch_regno ();
12579 if (scratch_regno == INVALID_REGNUM)
12581 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12582 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12584 /* We don't use ix86_gen_add3 in this case because it will
12585 want to split to lea, but when not optimizing the insn
12586 will not be split after this point. */
12587 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12588 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12593 emit_move_insn (scratch_reg, offset);
12594 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12595 stack_pointer_rtx));
12597 current = scratch_reg;
12600 ix86_expand_branch (GEU, current, limit, label);
12601 jump_insn = get_last_insn ();
12602 JUMP_LABEL (jump_insn) = label;
12604 /* Mark the jump as very likely to be taken. */
12605 add_int_reg_note (jump_insn, REG_BR_PROB,
12606 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12608 if (split_stack_fn == NULL_RTX)
12610 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12611 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12613 fn = split_stack_fn;
12615 /* Get more stack space. We pass in the desired stack space and the
12616 size of the arguments to copy to the new stack. In 32-bit mode
12617 we push the parameters; __morestack will return on a new stack
12618 anyhow. In 64-bit mode we pass the parameters in r10 and
12620 allocate_rtx = GEN_INT (allocate);
12621 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12622 call_fusage = NULL_RTX;
12627 reg10 = gen_rtx_REG (Pmode, R10_REG);
12628 reg11 = gen_rtx_REG (Pmode, R11_REG);
12630 /* If this function uses a static chain, it will be in %r10.
12631 Preserve it across the call to __morestack. */
12632 if (DECL_STATIC_CHAIN (cfun->decl))
12636 rax = gen_rtx_REG (word_mode, AX_REG);
12637 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12638 use_reg (&call_fusage, rax);
12641 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12644 HOST_WIDE_INT argval;
12646 gcc_assert (Pmode == DImode);
12647 /* When using the large model we need to load the address
12648 into a register, and we've run out of registers. So we
12649 switch to a different calling convention, and we call a
12650 different function: __morestack_large. We pass the
12651 argument size in the upper 32 bits of r10 and pass the
12652 frame size in the lower 32 bits. */
12653 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12654 gcc_assert ((args_size & 0xffffffff) == args_size);
12656 if (split_stack_fn_large == NULL_RTX)
12658 split_stack_fn_large =
12659 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12660 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12662 if (ix86_cmodel == CM_LARGE_PIC)
12664 rtx_code_label *label;
12667 label = gen_label_rtx ();
12668 emit_label (label);
12669 LABEL_PRESERVE_P (label) = 1;
12670 emit_insn (gen_set_rip_rex64 (reg10, label));
12671 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12672 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12673 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12675 x = gen_rtx_CONST (Pmode, x);
12676 emit_move_insn (reg11, x);
12677 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12678 x = gen_const_mem (Pmode, x);
12679 emit_move_insn (reg11, x);
12682 emit_move_insn (reg11, split_stack_fn_large);
12686 argval = ((args_size << 16) << 16) + allocate;
12687 emit_move_insn (reg10, GEN_INT (argval));
12691 emit_move_insn (reg10, allocate_rtx);
12692 emit_move_insn (reg11, GEN_INT (args_size));
12693 use_reg (&call_fusage, reg11);
12696 use_reg (&call_fusage, reg10);
12700 emit_insn (gen_push (GEN_INT (args_size)));
12701 emit_insn (gen_push (allocate_rtx));
12703 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12704 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12706 add_function_usage_to (call_insn, call_fusage);
12708 /* In order to make call/return prediction work right, we now need
12709 to execute a return instruction. See
12710 libgcc/config/i386/morestack.S for the details on how this works.
12712 For flow purposes gcc must not see this as a return
12713 instruction--we need control flow to continue at the subsequent
12714 label. Therefore, we use an unspec. */
12715 gcc_assert (crtl->args.pops_args < 65536);
12716 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12718 /* If we are in 64-bit mode and this function uses a static chain,
12719 we saved %r10 in %rax before calling _morestack. */
12720 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12721 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12722 gen_rtx_REG (word_mode, AX_REG));
12724 /* If this function calls va_start, we need to store a pointer to
12725 the arguments on the old stack, because they may not have been
12726 all copied to the new stack. At this point the old stack can be
12727 found at the frame pointer value used by __morestack, because
12728 __morestack has set that up before calling back to us. Here we
12729 store that pointer in a scratch register, and in
12730 ix86_expand_prologue we store the scratch register in a stack
12732 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12734 unsigned int scratch_regno;
12738 scratch_regno = split_stack_prologue_scratch_regno ();
12739 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12740 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12744 return address within this function
12745 return address of caller of this function
12747 So we add three words to get to the stack arguments.
12751 return address within this function
12752 first argument to __morestack
12753 second argument to __morestack
12754 return address of caller of this function
12756 So we add five words to get to the stack arguments.
12758 words = TARGET_64BIT ? 3 : 5;
12759 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12760 gen_rtx_PLUS (Pmode, frame_reg,
12761 GEN_INT (words * UNITS_PER_WORD))));
12763 varargs_label = gen_label_rtx ();
12764 emit_jump_insn (gen_jump (varargs_label));
12765 JUMP_LABEL (get_last_insn ()) = varargs_label;
12770 emit_label (label);
12771 LABEL_NUSES (label) = 1;
12773 /* If this function calls va_start, we now have to set the scratch
12774 register for the case where we do not call __morestack. In this
12775 case we need to set it based on the stack pointer. */
12776 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12778 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12779 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12780 GEN_INT (UNITS_PER_WORD))));
12782 emit_label (varargs_label);
12783 LABEL_NUSES (varargs_label) = 1;
12787 /* We may have to tell the dataflow pass that the split stack prologue
12788 is initializing a scratch register. */
12791 ix86_live_on_entry (bitmap regs)
12793 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12795 gcc_assert (flag_split_stack);
12796 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12800 /* Extract the parts of an RTL expression that is a valid memory address
12801 for an instruction. Return 0 if the structure of the address is
12802 grossly off. Return -1 if the address contains ASHIFT, so it is not
12803 strictly valid, but still used for computing length of lea instruction. */
12806 ix86_decompose_address (rtx addr, struct ix86_address *out)
12808 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12809 rtx base_reg, index_reg;
12810 HOST_WIDE_INT scale = 1;
12811 rtx scale_rtx = NULL_RTX;
12814 enum ix86_address_seg seg = SEG_DEFAULT;
12816 /* Allow zero-extended SImode addresses,
12817 they will be emitted with addr32 prefix. */
12818 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12820 if (GET_CODE (addr) == ZERO_EXTEND
12821 && GET_MODE (XEXP (addr, 0)) == SImode)
12823 addr = XEXP (addr, 0);
12824 if (CONST_INT_P (addr))
12827 else if (GET_CODE (addr) == AND
12828 && const_32bit_mask (XEXP (addr, 1), DImode))
12830 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12831 if (addr == NULL_RTX)
12834 if (CONST_INT_P (addr))
12839 /* Allow SImode subregs of DImode addresses,
12840 they will be emitted with addr32 prefix. */
12841 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12843 if (GET_CODE (addr) == SUBREG
12844 && GET_MODE (SUBREG_REG (addr)) == DImode)
12846 addr = SUBREG_REG (addr);
12847 if (CONST_INT_P (addr))
12854 else if (GET_CODE (addr) == SUBREG)
12856 if (REG_P (SUBREG_REG (addr)))
12861 else if (GET_CODE (addr) == PLUS)
12863 rtx addends[4], op;
12871 addends[n++] = XEXP (op, 1);
12874 while (GET_CODE (op) == PLUS);
12879 for (i = n; i >= 0; --i)
12882 switch (GET_CODE (op))
12887 index = XEXP (op, 0);
12888 scale_rtx = XEXP (op, 1);
12894 index = XEXP (op, 0);
12895 tmp = XEXP (op, 1);
12896 if (!CONST_INT_P (tmp))
12898 scale = INTVAL (tmp);
12899 if ((unsigned HOST_WIDE_INT) scale > 3)
12901 scale = 1 << scale;
12906 if (GET_CODE (op) != UNSPEC)
12911 if (XINT (op, 1) == UNSPEC_TP
12912 && TARGET_TLS_DIRECT_SEG_REFS
12913 && seg == SEG_DEFAULT)
12914 seg = DEFAULT_TLS_SEG_REG;
12920 if (!REG_P (SUBREG_REG (op)))
12947 else if (GET_CODE (addr) == MULT)
12949 index = XEXP (addr, 0); /* index*scale */
12950 scale_rtx = XEXP (addr, 1);
12952 else if (GET_CODE (addr) == ASHIFT)
12954 /* We're called for lea too, which implements ashift on occasion. */
12955 index = XEXP (addr, 0);
12956 tmp = XEXP (addr, 1);
12957 if (!CONST_INT_P (tmp))
12959 scale = INTVAL (tmp);
12960 if ((unsigned HOST_WIDE_INT) scale > 3)
12962 scale = 1 << scale;
12966 disp = addr; /* displacement */
12972 else if (GET_CODE (index) == SUBREG
12973 && REG_P (SUBREG_REG (index)))
12979 /* Extract the integral value of scale. */
12982 if (!CONST_INT_P (scale_rtx))
12984 scale = INTVAL (scale_rtx);
12987 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12988 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12990 /* Avoid useless 0 displacement. */
12991 if (disp == const0_rtx && (base || index))
12994 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12995 if (base_reg && index_reg && scale == 1
12996 && (index_reg == arg_pointer_rtx
12997 || index_reg == frame_pointer_rtx
12998 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
13000 std::swap (base, index);
13001 std::swap (base_reg, index_reg);
13004 /* Special case: %ebp cannot be encoded as a base without a displacement.
13008 && (base_reg == hard_frame_pointer_rtx
13009 || base_reg == frame_pointer_rtx
13010 || base_reg == arg_pointer_rtx
13011 || (REG_P (base_reg)
13012 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13013 || REGNO (base_reg) == R13_REG))))
13016 /* Special case: on K6, [%esi] makes the instruction vector decoded.
13017 Avoid this by transforming to [%esi+0].
13018 Reload calls address legitimization without cfun defined, so we need
13019 to test cfun for being non-NULL. */
13020 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13021 && base_reg && !index_reg && !disp
13022 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13025 /* Special case: encode reg+reg instead of reg*2. */
13026 if (!base && index && scale == 2)
13027 base = index, base_reg = index_reg, scale = 1;
13029 /* Special case: scaling cannot be encoded without base or displacement. */
13030 if (!base && !disp && index && scale != 1)
13034 out->index = index;
13036 out->scale = scale;
13042 /* Return cost of the memory address x.
13043 For i386, it is better to use a complex address than let gcc copy
13044 the address into a reg and make a new pseudo. But not if the address
13045 requires to two regs - that would mean more pseudos with longer
13048 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13050 struct ix86_address parts;
13052 int ok = ix86_decompose_address (x, &parts);
13056 if (parts.base && GET_CODE (parts.base) == SUBREG)
13057 parts.base = SUBREG_REG (parts.base);
13058 if (parts.index && GET_CODE (parts.index) == SUBREG)
13059 parts.index = SUBREG_REG (parts.index);
13061 /* Attempt to minimize number of registers in the address by increasing
13062 address cost for each used register. We don't increase address cost
13063 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13064 is not invariant itself it most likely means that base or index is not
13065 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13066 which is not profitable for x86. */
13068 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13069 && (current_pass->type == GIMPLE_PASS
13070 || !pic_offset_table_rtx
13071 || !REG_P (parts.base)
13072 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13076 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13077 && (current_pass->type == GIMPLE_PASS
13078 || !pic_offset_table_rtx
13079 || !REG_P (parts.index)
13080 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13083 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13084 since it's predecode logic can't detect the length of instructions
13085 and it degenerates to vector decoded. Increase cost of such
13086 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13087 to split such addresses or even refuse such addresses at all.
13089 Following addressing modes are affected:
13094 The first and last case may be avoidable by explicitly coding the zero in
13095 memory address, but I don't have AMD-K6 machine handy to check this
13099 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13100 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13101 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13107 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13108 this is used for to form addresses to local data when -fPIC is in
13112 darwin_local_data_pic (rtx disp)
13114 return (GET_CODE (disp) == UNSPEC
13115 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13118 /* Determine if a given RTX is a valid constant. We already know this
13119 satisfies CONSTANT_P. */
13122 ix86_legitimate_constant_p (machine_mode, rtx x)
13124 /* Pointer bounds constants are not valid. */
13125 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13128 switch (GET_CODE (x))
13133 if (GET_CODE (x) == PLUS)
13135 if (!CONST_INT_P (XEXP (x, 1)))
13140 if (TARGET_MACHO && darwin_local_data_pic (x))
13143 /* Only some unspecs are valid as "constants". */
13144 if (GET_CODE (x) == UNSPEC)
13145 switch (XINT (x, 1))
13148 case UNSPEC_GOTOFF:
13149 case UNSPEC_PLTOFF:
13150 return TARGET_64BIT;
13152 case UNSPEC_NTPOFF:
13153 x = XVECEXP (x, 0, 0);
13154 return (GET_CODE (x) == SYMBOL_REF
13155 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13156 case UNSPEC_DTPOFF:
13157 x = XVECEXP (x, 0, 0);
13158 return (GET_CODE (x) == SYMBOL_REF
13159 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13164 /* We must have drilled down to a symbol. */
13165 if (GET_CODE (x) == LABEL_REF)
13167 if (GET_CODE (x) != SYMBOL_REF)
13172 /* TLS symbols are never valid. */
13173 if (SYMBOL_REF_TLS_MODEL (x))
13176 /* DLLIMPORT symbols are never valid. */
13177 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13178 && SYMBOL_REF_DLLIMPORT_P (x))
13182 /* mdynamic-no-pic */
13183 if (MACHO_DYNAMIC_NO_PIC_P)
13184 return machopic_symbol_defined_p (x);
13189 if (GET_MODE (x) == TImode
13190 && x != CONST0_RTX (TImode)
13196 if (!standard_sse_constant_p (x))
13203 /* Otherwise we handle everything else in the move patterns. */
13207 /* Determine if it's legal to put X into the constant pool. This
13208 is not possible for the address of thread-local symbols, which
13209 is checked above. */
13212 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13214 /* We can always put integral constants and vectors in memory. */
13215 switch (GET_CODE (x))
13225 return !ix86_legitimate_constant_p (mode, x);
13228 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13232 is_imported_p (rtx x)
13234 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13235 || GET_CODE (x) != SYMBOL_REF)
13238 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13242 /* Nonzero if the constant value X is a legitimate general operand
13243 when generating PIC code. It is given that flag_pic is on and
13244 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13247 legitimate_pic_operand_p (rtx x)
13251 switch (GET_CODE (x))
13254 inner = XEXP (x, 0);
13255 if (GET_CODE (inner) == PLUS
13256 && CONST_INT_P (XEXP (inner, 1)))
13257 inner = XEXP (inner, 0);
13259 /* Only some unspecs are valid as "constants". */
13260 if (GET_CODE (inner) == UNSPEC)
13261 switch (XINT (inner, 1))
13264 case UNSPEC_GOTOFF:
13265 case UNSPEC_PLTOFF:
13266 return TARGET_64BIT;
13268 x = XVECEXP (inner, 0, 0);
13269 return (GET_CODE (x) == SYMBOL_REF
13270 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13271 case UNSPEC_MACHOPIC_OFFSET:
13272 return legitimate_pic_address_disp_p (x);
13280 return legitimate_pic_address_disp_p (x);
13287 /* Determine if a given CONST RTX is a valid memory displacement
13291 legitimate_pic_address_disp_p (rtx disp)
13295 /* In 64bit mode we can allow direct addresses of symbols and labels
13296 when they are not dynamic symbols. */
13299 rtx op0 = disp, op1;
13301 switch (GET_CODE (disp))
13307 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13309 op0 = XEXP (XEXP (disp, 0), 0);
13310 op1 = XEXP (XEXP (disp, 0), 1);
13311 if (!CONST_INT_P (op1)
13312 || INTVAL (op1) >= 16*1024*1024
13313 || INTVAL (op1) < -16*1024*1024)
13315 if (GET_CODE (op0) == LABEL_REF)
13317 if (GET_CODE (op0) == CONST
13318 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13319 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13321 if (GET_CODE (op0) == UNSPEC
13322 && XINT (op0, 1) == UNSPEC_PCREL)
13324 if (GET_CODE (op0) != SYMBOL_REF)
13329 /* TLS references should always be enclosed in UNSPEC.
13330 The dllimported symbol needs always to be resolved. */
13331 if (SYMBOL_REF_TLS_MODEL (op0)
13332 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13337 if (is_imported_p (op0))
13340 if (SYMBOL_REF_FAR_ADDR_P (op0)
13341 || !SYMBOL_REF_LOCAL_P (op0))
13344 /* Function-symbols need to be resolved only for
13346 For the small-model we don't need to resolve anything
13348 if ((ix86_cmodel != CM_LARGE_PIC
13349 && SYMBOL_REF_FUNCTION_P (op0))
13350 || ix86_cmodel == CM_SMALL_PIC)
13352 /* Non-external symbols don't need to be resolved for
13353 large, and medium-model. */
13354 if ((ix86_cmodel == CM_LARGE_PIC
13355 || ix86_cmodel == CM_MEDIUM_PIC)
13356 && !SYMBOL_REF_EXTERNAL_P (op0))
13359 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13360 && (SYMBOL_REF_LOCAL_P (op0)
13361 || (HAVE_LD_PIE_COPYRELOC
13363 && !SYMBOL_REF_WEAK (op0)
13364 && !SYMBOL_REF_FUNCTION_P (op0)))
13365 && ix86_cmodel != CM_LARGE_PIC)
13373 if (GET_CODE (disp) != CONST)
13375 disp = XEXP (disp, 0);
13379 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13380 of GOT tables. We should not need these anyway. */
13381 if (GET_CODE (disp) != UNSPEC
13382 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13383 && XINT (disp, 1) != UNSPEC_GOTOFF
13384 && XINT (disp, 1) != UNSPEC_PCREL
13385 && XINT (disp, 1) != UNSPEC_PLTOFF))
13388 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13389 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13395 if (GET_CODE (disp) == PLUS)
13397 if (!CONST_INT_P (XEXP (disp, 1)))
13399 disp = XEXP (disp, 0);
13403 if (TARGET_MACHO && darwin_local_data_pic (disp))
13406 if (GET_CODE (disp) != UNSPEC)
13409 switch (XINT (disp, 1))
13414 /* We need to check for both symbols and labels because VxWorks loads
13415 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13417 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13418 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13419 case UNSPEC_GOTOFF:
13420 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13421 While ABI specify also 32bit relocation but we don't produce it in
13422 small PIC model at all. */
13423 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13424 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13426 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13428 case UNSPEC_GOTTPOFF:
13429 case UNSPEC_GOTNTPOFF:
13430 case UNSPEC_INDNTPOFF:
13433 disp = XVECEXP (disp, 0, 0);
13434 return (GET_CODE (disp) == SYMBOL_REF
13435 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13436 case UNSPEC_NTPOFF:
13437 disp = XVECEXP (disp, 0, 0);
13438 return (GET_CODE (disp) == SYMBOL_REF
13439 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13440 case UNSPEC_DTPOFF:
13441 disp = XVECEXP (disp, 0, 0);
13442 return (GET_CODE (disp) == SYMBOL_REF
13443 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13449 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13450 replace the input X, or the original X if no replacement is called for.
13451 The output parameter *WIN is 1 if the calling macro should goto WIN,
13452 0 if it should not. */
13455 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13458 /* Reload can generate:
13460 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13464 This RTX is rejected from ix86_legitimate_address_p due to
13465 non-strictness of base register 97. Following this rejection,
13466 reload pushes all three components into separate registers,
13467 creating invalid memory address RTX.
13469 Following code reloads only the invalid part of the
13470 memory address RTX. */
13472 if (GET_CODE (x) == PLUS
13473 && REG_P (XEXP (x, 1))
13474 && GET_CODE (XEXP (x, 0)) == PLUS
13475 && REG_P (XEXP (XEXP (x, 0), 1)))
13478 bool something_reloaded = false;
13480 base = XEXP (XEXP (x, 0), 1);
13481 if (!REG_OK_FOR_BASE_STRICT_P (base))
13483 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13484 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13485 opnum, (enum reload_type) type);
13486 something_reloaded = true;
13489 index = XEXP (x, 1);
13490 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13492 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13493 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13494 opnum, (enum reload_type) type);
13495 something_reloaded = true;
13498 gcc_assert (something_reloaded);
13505 /* Determine if op is suitable RTX for an address register.
13506 Return naked register if a register or a register subreg is
13507 found, otherwise return NULL_RTX. */
13510 ix86_validate_address_register (rtx op)
13512 machine_mode mode = GET_MODE (op);
13514 /* Only SImode or DImode registers can form the address. */
13515 if (mode != SImode && mode != DImode)
13520 else if (GET_CODE (op) == SUBREG)
13522 rtx reg = SUBREG_REG (op);
13527 mode = GET_MODE (reg);
13529 /* Don't allow SUBREGs that span more than a word. It can
13530 lead to spill failures when the register is one word out
13531 of a two word structure. */
13532 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13535 /* Allow only SUBREGs of non-eliminable hard registers. */
13536 if (register_no_elim_operand (reg, mode))
13540 /* Op is not a register. */
13544 /* Recognizes RTL expressions that are valid memory addresses for an
13545 instruction. The MODE argument is the machine mode for the MEM
13546 expression that wants to use this address.
13548 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13549 convert common non-canonical forms to canonical form so that they will
13553 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13555 struct ix86_address parts;
13556 rtx base, index, disp;
13557 HOST_WIDE_INT scale;
13558 enum ix86_address_seg seg;
13560 if (ix86_decompose_address (addr, &parts) <= 0)
13561 /* Decomposition failed. */
13565 index = parts.index;
13567 scale = parts.scale;
13570 /* Validate base register. */
13573 rtx reg = ix86_validate_address_register (base);
13575 if (reg == NULL_RTX)
13578 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13579 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13580 /* Base is not valid. */
13584 /* Validate index register. */
13587 rtx reg = ix86_validate_address_register (index);
13589 if (reg == NULL_RTX)
13592 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13593 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13594 /* Index is not valid. */
13598 /* Index and base should have the same mode. */
13600 && GET_MODE (base) != GET_MODE (index))
13603 /* Address override works only on the (%reg) part of %fs:(%reg). */
13604 if (seg != SEG_DEFAULT
13605 && ((base && GET_MODE (base) != word_mode)
13606 || (index && GET_MODE (index) != word_mode)))
13609 /* Validate scale factor. */
13613 /* Scale without index. */
13616 if (scale != 2 && scale != 4 && scale != 8)
13617 /* Scale is not a valid multiplier. */
13621 /* Validate displacement. */
13624 if (GET_CODE (disp) == CONST
13625 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13626 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13627 switch (XINT (XEXP (disp, 0), 1))
13629 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13630 used. While ABI specify also 32bit relocations, we don't produce
13631 them at all and use IP relative instead. */
13633 case UNSPEC_GOTOFF:
13634 gcc_assert (flag_pic);
13636 goto is_legitimate_pic;
13638 /* 64bit address unspec. */
13641 case UNSPEC_GOTPCREL:
13643 gcc_assert (flag_pic);
13644 goto is_legitimate_pic;
13646 case UNSPEC_GOTTPOFF:
13647 case UNSPEC_GOTNTPOFF:
13648 case UNSPEC_INDNTPOFF:
13649 case UNSPEC_NTPOFF:
13650 case UNSPEC_DTPOFF:
13653 case UNSPEC_STACK_CHECK:
13654 gcc_assert (flag_split_stack);
13658 /* Invalid address unspec. */
13662 else if (SYMBOLIC_CONST (disp)
13666 && MACHOPIC_INDIRECT
13667 && !machopic_operand_p (disp)
13673 if (TARGET_64BIT && (index || base))
13675 /* foo@dtpoff(%rX) is ok. */
13676 if (GET_CODE (disp) != CONST
13677 || GET_CODE (XEXP (disp, 0)) != PLUS
13678 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13679 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13680 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13681 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13682 /* Non-constant pic memory reference. */
13685 else if ((!TARGET_MACHO || flag_pic)
13686 && ! legitimate_pic_address_disp_p (disp))
13687 /* Displacement is an invalid pic construct. */
13690 else if (MACHO_DYNAMIC_NO_PIC_P
13691 && !ix86_legitimate_constant_p (Pmode, disp))
13692 /* displacment must be referenced via non_lazy_pointer */
13696 /* This code used to verify that a symbolic pic displacement
13697 includes the pic_offset_table_rtx register.
13699 While this is good idea, unfortunately these constructs may
13700 be created by "adds using lea" optimization for incorrect
13709 This code is nonsensical, but results in addressing
13710 GOT table with pic_offset_table_rtx base. We can't
13711 just refuse it easily, since it gets matched by
13712 "addsi3" pattern, that later gets split to lea in the
13713 case output register differs from input. While this
13714 can be handled by separate addsi pattern for this case
13715 that never results in lea, this seems to be easier and
13716 correct fix for crash to disable this test. */
13718 else if (GET_CODE (disp) != LABEL_REF
13719 && !CONST_INT_P (disp)
13720 && (GET_CODE (disp) != CONST
13721 || !ix86_legitimate_constant_p (Pmode, disp))
13722 && (GET_CODE (disp) != SYMBOL_REF
13723 || !ix86_legitimate_constant_p (Pmode, disp)))
13724 /* Displacement is not constant. */
13726 else if (TARGET_64BIT
13727 && !x86_64_immediate_operand (disp, VOIDmode))
13728 /* Displacement is out of range. */
13730 /* In x32 mode, constant addresses are sign extended to 64bit, so
13731 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13732 else if (TARGET_X32 && !(index || base)
13733 && CONST_INT_P (disp)
13734 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13738 /* Everything looks valid. */
13742 /* Determine if a given RTX is a valid constant address. */
13745 constant_address_p (rtx x)
13747 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13750 /* Return a unique alias set for the GOT. */
13752 static alias_set_type
13753 ix86_GOT_alias_set (void)
13755 static alias_set_type set = -1;
13757 set = new_alias_set ();
13761 /* Set regs_ever_live for PIC base address register
13762 to true if required. */
13764 set_pic_reg_ever_live ()
13766 if (reload_in_progress)
13767 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13770 /* Return a legitimate reference for ORIG (an address) using the
13771 register REG. If REG is 0, a new pseudo is generated.
13773 There are two types of references that must be handled:
13775 1. Global data references must load the address from the GOT, via
13776 the PIC reg. An insn is emitted to do this load, and the reg is
13779 2. Static data references, constant pool addresses, and code labels
13780 compute the address as an offset from the GOT, whose base is in
13781 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13782 differentiate them from global data objects. The returned
13783 address is the PIC reg + an unspec constant.
13785 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13786 reg also appears in the address. */
13789 legitimize_pic_address (rtx orig, rtx reg)
13792 rtx new_rtx = orig;
13795 if (TARGET_MACHO && !TARGET_64BIT)
13798 reg = gen_reg_rtx (Pmode);
13799 /* Use the generic Mach-O PIC machinery. */
13800 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13804 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13806 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13811 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13813 else if (TARGET_64BIT && !TARGET_PECOFF
13814 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13817 /* This symbol may be referenced via a displacement from the PIC
13818 base address (@GOTOFF). */
13820 set_pic_reg_ever_live ();
13821 if (GET_CODE (addr) == CONST)
13822 addr = XEXP (addr, 0);
13823 if (GET_CODE (addr) == PLUS)
13825 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13827 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13830 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13831 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13833 tmpreg = gen_reg_rtx (Pmode);
13836 emit_move_insn (tmpreg, new_rtx);
13840 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13841 tmpreg, 1, OPTAB_DIRECT);
13845 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13847 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13849 /* This symbol may be referenced via a displacement from the PIC
13850 base address (@GOTOFF). */
13852 set_pic_reg_ever_live ();
13853 if (GET_CODE (addr) == CONST)
13854 addr = XEXP (addr, 0);
13855 if (GET_CODE (addr) == PLUS)
13857 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13859 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13862 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13863 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13864 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13868 emit_move_insn (reg, new_rtx);
13872 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13873 /* We can't use @GOTOFF for text labels on VxWorks;
13874 see gotoff_operand. */
13875 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13877 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13881 /* For x64 PE-COFF there is no GOT table. So we use address
13883 if (TARGET_64BIT && TARGET_PECOFF)
13885 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13886 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13889 reg = gen_reg_rtx (Pmode);
13890 emit_move_insn (reg, new_rtx);
13893 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13895 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13896 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13897 new_rtx = gen_const_mem (Pmode, new_rtx);
13898 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13901 reg = gen_reg_rtx (Pmode);
13902 /* Use directly gen_movsi, otherwise the address is loaded
13903 into register for CSE. We don't want to CSE this addresses,
13904 instead we CSE addresses from the GOT table, so skip this. */
13905 emit_insn (gen_movsi (reg, new_rtx));
13910 /* This symbol must be referenced via a load from the
13911 Global Offset Table (@GOT). */
13913 set_pic_reg_ever_live ();
13914 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13915 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13917 new_rtx = force_reg (Pmode, new_rtx);
13918 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13919 new_rtx = gen_const_mem (Pmode, new_rtx);
13920 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13923 reg = gen_reg_rtx (Pmode);
13924 emit_move_insn (reg, new_rtx);
13930 if (CONST_INT_P (addr)
13931 && !x86_64_immediate_operand (addr, VOIDmode))
13935 emit_move_insn (reg, addr);
13939 new_rtx = force_reg (Pmode, addr);
13941 else if (GET_CODE (addr) == CONST)
13943 addr = XEXP (addr, 0);
13945 /* We must match stuff we generate before. Assume the only
13946 unspecs that can get here are ours. Not that we could do
13947 anything with them anyway.... */
13948 if (GET_CODE (addr) == UNSPEC
13949 || (GET_CODE (addr) == PLUS
13950 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13952 gcc_assert (GET_CODE (addr) == PLUS);
13954 if (GET_CODE (addr) == PLUS)
13956 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13958 /* Check first to see if this is a constant offset from a @GOTOFF
13959 symbol reference. */
13960 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13961 && CONST_INT_P (op1))
13965 set_pic_reg_ever_live ();
13966 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13968 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13969 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13970 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13974 emit_move_insn (reg, new_rtx);
13980 if (INTVAL (op1) < -16*1024*1024
13981 || INTVAL (op1) >= 16*1024*1024)
13983 if (!x86_64_immediate_operand (op1, Pmode))
13984 op1 = force_reg (Pmode, op1);
13985 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13991 rtx base = legitimize_pic_address (op0, reg);
13992 machine_mode mode = GET_MODE (base);
13994 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13996 if (CONST_INT_P (new_rtx))
13998 if (INTVAL (new_rtx) < -16*1024*1024
13999 || INTVAL (new_rtx) >= 16*1024*1024)
14001 if (!x86_64_immediate_operand (new_rtx, mode))
14002 new_rtx = force_reg (mode, new_rtx);
14004 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
14007 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
14011 /* For %rip addressing, we have to use just disp32, not
14014 && (GET_CODE (base) == SYMBOL_REF
14015 || GET_CODE (base) == LABEL_REF))
14016 base = force_reg (mode, base);
14017 if (GET_CODE (new_rtx) == PLUS
14018 && CONSTANT_P (XEXP (new_rtx, 1)))
14020 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14021 new_rtx = XEXP (new_rtx, 1);
14023 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14031 /* Load the thread pointer. If TO_REG is true, force it into a register. */
14034 get_thread_pointer (machine_mode tp_mode, bool to_reg)
14036 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14038 if (GET_MODE (tp) != tp_mode)
14040 gcc_assert (GET_MODE (tp) == SImode);
14041 gcc_assert (tp_mode == DImode);
14043 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14047 tp = copy_to_mode_reg (tp_mode, tp);
14052 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14054 static GTY(()) rtx ix86_tls_symbol;
14057 ix86_tls_get_addr (void)
14059 if (!ix86_tls_symbol)
14062 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14063 ? "___tls_get_addr" : "__tls_get_addr");
14065 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14068 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14070 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14072 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14073 gen_rtx_CONST (Pmode, unspec));
14076 return ix86_tls_symbol;
14079 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14081 static GTY(()) rtx ix86_tls_module_base_symbol;
14084 ix86_tls_module_base (void)
14086 if (!ix86_tls_module_base_symbol)
14088 ix86_tls_module_base_symbol
14089 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14091 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14092 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14095 return ix86_tls_module_base_symbol;
14098 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
14099 false if we expect this to be used for a memory address and true if
14100 we expect to load the address into a register. */
14103 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14105 rtx dest, base, off;
14106 rtx pic = NULL_RTX, tp = NULL_RTX;
14107 machine_mode tp_mode = Pmode;
14110 /* Fall back to global dynamic model if tool chain cannot support local
14112 if (TARGET_SUN_TLS && !TARGET_64BIT
14113 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14114 && model == TLS_MODEL_LOCAL_DYNAMIC)
14115 model = TLS_MODEL_GLOBAL_DYNAMIC;
14119 case TLS_MODEL_GLOBAL_DYNAMIC:
14120 dest = gen_reg_rtx (Pmode);
14124 if (flag_pic && !TARGET_PECOFF)
14125 pic = pic_offset_table_rtx;
14128 pic = gen_reg_rtx (Pmode);
14129 emit_insn (gen_set_got (pic));
14133 if (TARGET_GNU2_TLS)
14136 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14138 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14140 tp = get_thread_pointer (Pmode, true);
14141 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14143 if (GET_MODE (x) != Pmode)
14144 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14146 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14150 rtx caddr = ix86_tls_get_addr ();
14154 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14159 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14160 insns = get_insns ();
14163 if (GET_MODE (x) != Pmode)
14164 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14166 RTL_CONST_CALL_P (insns) = 1;
14167 emit_libcall_block (insns, dest, rax, x);
14170 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14174 case TLS_MODEL_LOCAL_DYNAMIC:
14175 base = gen_reg_rtx (Pmode);
14180 pic = pic_offset_table_rtx;
14183 pic = gen_reg_rtx (Pmode);
14184 emit_insn (gen_set_got (pic));
14188 if (TARGET_GNU2_TLS)
14190 rtx tmp = ix86_tls_module_base ();
14193 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14195 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14197 tp = get_thread_pointer (Pmode, true);
14198 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14199 gen_rtx_MINUS (Pmode, tmp, tp));
14203 rtx caddr = ix86_tls_get_addr ();
14207 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14213 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14214 insns = get_insns ();
14217 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14218 share the LD_BASE result with other LD model accesses. */
14219 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14220 UNSPEC_TLS_LD_BASE);
14222 RTL_CONST_CALL_P (insns) = 1;
14223 emit_libcall_block (insns, base, rax, eqv);
14226 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14229 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14230 off = gen_rtx_CONST (Pmode, off);
14232 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14234 if (TARGET_GNU2_TLS)
14236 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14238 if (GET_MODE (x) != Pmode)
14239 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14241 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14245 case TLS_MODEL_INITIAL_EXEC:
14248 if (TARGET_SUN_TLS && !TARGET_X32)
14250 /* The Sun linker took the AMD64 TLS spec literally
14251 and can only handle %rax as destination of the
14252 initial executable code sequence. */
14254 dest = gen_reg_rtx (DImode);
14255 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14259 /* Generate DImode references to avoid %fs:(%reg32)
14260 problems and linker IE->LE relaxation bug. */
14263 type = UNSPEC_GOTNTPOFF;
14267 set_pic_reg_ever_live ();
14268 pic = pic_offset_table_rtx;
14269 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14271 else if (!TARGET_ANY_GNU_TLS)
14273 pic = gen_reg_rtx (Pmode);
14274 emit_insn (gen_set_got (pic));
14275 type = UNSPEC_GOTTPOFF;
14280 type = UNSPEC_INDNTPOFF;
14283 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14284 off = gen_rtx_CONST (tp_mode, off);
14286 off = gen_rtx_PLUS (tp_mode, pic, off);
14287 off = gen_const_mem (tp_mode, off);
14288 set_mem_alias_set (off, ix86_GOT_alias_set ());
14290 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14292 base = get_thread_pointer (tp_mode,
14293 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14294 off = force_reg (tp_mode, off);
14295 return gen_rtx_PLUS (tp_mode, base, off);
14299 base = get_thread_pointer (Pmode, true);
14300 dest = gen_reg_rtx (Pmode);
14301 emit_insn (ix86_gen_sub3 (dest, base, off));
14305 case TLS_MODEL_LOCAL_EXEC:
14306 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14307 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14308 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14309 off = gen_rtx_CONST (Pmode, off);
14311 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14313 base = get_thread_pointer (Pmode,
14314 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14315 return gen_rtx_PLUS (Pmode, base, off);
14319 base = get_thread_pointer (Pmode, true);
14320 dest = gen_reg_rtx (Pmode);
14321 emit_insn (ix86_gen_sub3 (dest, base, off));
14326 gcc_unreachable ();
14332 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14333 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14334 unique refptr-DECL symbol corresponding to symbol DECL. */
14336 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14338 static inline hashval_t hash (tree_map *m) { return m->hash; }
14340 equal (tree_map *a, tree_map *b)
14342 return a->base.from == b->base.from;
14346 handle_cache_entry (tree_map *&m)
14348 extern void gt_ggc_mx (tree_map *&);
14349 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14351 else if (ggc_marked_p (m->base.from))
14354 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14358 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14361 get_dllimport_decl (tree decl, bool beimport)
14363 struct tree_map *h, in;
14365 const char *prefix;
14366 size_t namelen, prefixlen;
14371 if (!dllimport_map)
14372 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14374 in.hash = htab_hash_pointer (decl);
14375 in.base.from = decl;
14376 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14381 *loc = h = ggc_alloc<tree_map> ();
14383 h->base.from = decl;
14384 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14385 VAR_DECL, NULL, ptr_type_node);
14386 DECL_ARTIFICIAL (to) = 1;
14387 DECL_IGNORED_P (to) = 1;
14388 DECL_EXTERNAL (to) = 1;
14389 TREE_READONLY (to) = 1;
14391 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14392 name = targetm.strip_name_encoding (name);
14394 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14395 ? "*__imp_" : "*__imp__";
14397 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14398 namelen = strlen (name);
14399 prefixlen = strlen (prefix);
14400 imp_name = (char *) alloca (namelen + prefixlen + 1);
14401 memcpy (imp_name, prefix, prefixlen);
14402 memcpy (imp_name + prefixlen, name, namelen + 1);
14404 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14405 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14406 SET_SYMBOL_REF_DECL (rtl, to);
14407 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14410 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14411 #ifdef SUB_TARGET_RECORD_STUB
14412 SUB_TARGET_RECORD_STUB (name);
14416 rtl = gen_const_mem (Pmode, rtl);
14417 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14419 SET_DECL_RTL (to, rtl);
14420 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14425 /* Expand SYMBOL into its corresponding far-addresse symbol.
14426 WANT_REG is true if we require the result be a register. */
14429 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14434 gcc_assert (SYMBOL_REF_DECL (symbol));
14435 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14437 x = DECL_RTL (imp_decl);
14439 x = force_reg (Pmode, x);
14443 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14444 true if we require the result be a register. */
14447 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14452 gcc_assert (SYMBOL_REF_DECL (symbol));
14453 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14455 x = DECL_RTL (imp_decl);
14457 x = force_reg (Pmode, x);
14461 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14462 is true if we require the result be a register. */
14465 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14467 if (!TARGET_PECOFF)
14470 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14472 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14473 return legitimize_dllimport_symbol (addr, inreg);
14474 if (GET_CODE (addr) == CONST
14475 && GET_CODE (XEXP (addr, 0)) == PLUS
14476 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14477 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14479 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14480 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14484 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14486 if (GET_CODE (addr) == SYMBOL_REF
14487 && !is_imported_p (addr)
14488 && SYMBOL_REF_EXTERNAL_P (addr)
14489 && SYMBOL_REF_DECL (addr))
14490 return legitimize_pe_coff_extern_decl (addr, inreg);
14492 if (GET_CODE (addr) == CONST
14493 && GET_CODE (XEXP (addr, 0)) == PLUS
14494 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14495 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14496 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14497 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14499 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14500 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14505 /* Try machine-dependent ways of modifying an illegitimate address
14506 to be legitimate. If we find one, return the new, valid address.
14507 This macro is used in only one place: `memory_address' in explow.c.
14509 OLDX is the address as it was before break_out_memory_refs was called.
14510 In some cases it is useful to look at this to decide what needs to be done.
14512 It is always safe for this macro to do nothing. It exists to recognize
14513 opportunities to optimize the output.
14515 For the 80386, we handle X+REG by loading X into a register R and
14516 using R+REG. R will go in a general reg and indexing will be used.
14517 However, if REG is a broken-out memory address or multiplication,
14518 nothing needs to be done because REG can certainly go in a general reg.
14520 When -fpic is used, special handling is needed for symbolic references.
14521 See comments by legitimize_pic_address in i386.c for details. */
14524 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14526 bool changed = false;
14529 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14531 return legitimize_tls_address (x, (enum tls_model) log, false);
14532 if (GET_CODE (x) == CONST
14533 && GET_CODE (XEXP (x, 0)) == PLUS
14534 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14535 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14537 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14538 (enum tls_model) log, false);
14539 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14542 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14544 rtx tmp = legitimize_pe_coff_symbol (x, true);
14549 if (flag_pic && SYMBOLIC_CONST (x))
14550 return legitimize_pic_address (x, 0);
14553 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14554 return machopic_indirect_data_reference (x, 0);
14557 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14558 if (GET_CODE (x) == ASHIFT
14559 && CONST_INT_P (XEXP (x, 1))
14560 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14563 log = INTVAL (XEXP (x, 1));
14564 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14565 GEN_INT (1 << log));
14568 if (GET_CODE (x) == PLUS)
14570 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14572 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14573 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14574 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14577 log = INTVAL (XEXP (XEXP (x, 0), 1));
14578 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14579 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14580 GEN_INT (1 << log));
14583 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14584 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14585 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14588 log = INTVAL (XEXP (XEXP (x, 1), 1));
14589 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14590 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14591 GEN_INT (1 << log));
14594 /* Put multiply first if it isn't already. */
14595 if (GET_CODE (XEXP (x, 1)) == MULT)
14597 std::swap (XEXP (x, 0), XEXP (x, 1));
14601 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14602 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14603 created by virtual register instantiation, register elimination, and
14604 similar optimizations. */
14605 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14608 x = gen_rtx_PLUS (Pmode,
14609 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14610 XEXP (XEXP (x, 1), 0)),
14611 XEXP (XEXP (x, 1), 1));
14615 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14616 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14617 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14618 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14619 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14620 && CONSTANT_P (XEXP (x, 1)))
14623 rtx other = NULL_RTX;
14625 if (CONST_INT_P (XEXP (x, 1)))
14627 constant = XEXP (x, 1);
14628 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14630 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14632 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14633 other = XEXP (x, 1);
14641 x = gen_rtx_PLUS (Pmode,
14642 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14643 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14644 plus_constant (Pmode, other,
14645 INTVAL (constant)));
14649 if (changed && ix86_legitimate_address_p (mode, x, false))
14652 if (GET_CODE (XEXP (x, 0)) == MULT)
14655 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14658 if (GET_CODE (XEXP (x, 1)) == MULT)
14661 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14665 && REG_P (XEXP (x, 1))
14666 && REG_P (XEXP (x, 0)))
14669 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14672 x = legitimize_pic_address (x, 0);
14675 if (changed && ix86_legitimate_address_p (mode, x, false))
14678 if (REG_P (XEXP (x, 0)))
14680 rtx temp = gen_reg_rtx (Pmode);
14681 rtx val = force_operand (XEXP (x, 1), temp);
14684 val = convert_to_mode (Pmode, val, 1);
14685 emit_move_insn (temp, val);
14688 XEXP (x, 1) = temp;
14692 else if (REG_P (XEXP (x, 1)))
14694 rtx temp = gen_reg_rtx (Pmode);
14695 rtx val = force_operand (XEXP (x, 0), temp);
14698 val = convert_to_mode (Pmode, val, 1);
14699 emit_move_insn (temp, val);
14702 XEXP (x, 0) = temp;
14710 /* Print an integer constant expression in assembler syntax. Addition
14711 and subtraction are the only arithmetic that may appear in these
14712 expressions. FILE is the stdio stream to write to, X is the rtx, and
14713 CODE is the operand print code from the output string. */
14716 output_pic_addr_const (FILE *file, rtx x, int code)
14720 switch (GET_CODE (x))
14723 gcc_assert (flag_pic);
14728 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14729 output_addr_const (file, x);
14732 const char *name = XSTR (x, 0);
14734 /* Mark the decl as referenced so that cgraph will
14735 output the function. */
14736 if (SYMBOL_REF_DECL (x))
14737 mark_decl_referenced (SYMBOL_REF_DECL (x));
14740 if (MACHOPIC_INDIRECT
14741 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14742 name = machopic_indirection_name (x, /*stub_p=*/true);
14744 assemble_name (file, name);
14746 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14747 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14748 fputs ("@PLT", file);
14755 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14756 assemble_name (asm_out_file, buf);
14760 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14764 /* This used to output parentheses around the expression,
14765 but that does not work on the 386 (either ATT or BSD assembler). */
14766 output_pic_addr_const (file, XEXP (x, 0), code);
14770 if (GET_MODE (x) == VOIDmode)
14772 /* We can use %d if the number is <32 bits and positive. */
14773 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14774 fprintf (file, "0x%lx%08lx",
14775 (unsigned long) CONST_DOUBLE_HIGH (x),
14776 (unsigned long) CONST_DOUBLE_LOW (x));
14778 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14781 /* We can't handle floating point constants;
14782 TARGET_PRINT_OPERAND must handle them. */
14783 output_operand_lossage ("floating constant misused");
14787 /* Some assemblers need integer constants to appear first. */
14788 if (CONST_INT_P (XEXP (x, 0)))
14790 output_pic_addr_const (file, XEXP (x, 0), code);
14792 output_pic_addr_const (file, XEXP (x, 1), code);
14796 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14797 output_pic_addr_const (file, XEXP (x, 1), code);
14799 output_pic_addr_const (file, XEXP (x, 0), code);
14805 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14806 output_pic_addr_const (file, XEXP (x, 0), code);
14808 output_pic_addr_const (file, XEXP (x, 1), code);
14810 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14814 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14816 bool f = i386_asm_output_addr_const_extra (file, x);
14821 gcc_assert (XVECLEN (x, 0) == 1);
14822 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14823 switch (XINT (x, 1))
14826 fputs ("@GOT", file);
14828 case UNSPEC_GOTOFF:
14829 fputs ("@GOTOFF", file);
14831 case UNSPEC_PLTOFF:
14832 fputs ("@PLTOFF", file);
14835 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14836 "(%rip)" : "[rip]", file);
14838 case UNSPEC_GOTPCREL:
14839 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14840 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14842 case UNSPEC_GOTTPOFF:
14843 /* FIXME: This might be @TPOFF in Sun ld too. */
14844 fputs ("@gottpoff", file);
14847 fputs ("@tpoff", file);
14849 case UNSPEC_NTPOFF:
14851 fputs ("@tpoff", file);
14853 fputs ("@ntpoff", file);
14855 case UNSPEC_DTPOFF:
14856 fputs ("@dtpoff", file);
14858 case UNSPEC_GOTNTPOFF:
14860 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14861 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14863 fputs ("@gotntpoff", file);
14865 case UNSPEC_INDNTPOFF:
14866 fputs ("@indntpoff", file);
14869 case UNSPEC_MACHOPIC_OFFSET:
14871 machopic_output_function_base_name (file);
14875 output_operand_lossage ("invalid UNSPEC as operand");
14881 output_operand_lossage ("invalid expression as operand");
14885 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14886 We need to emit DTP-relative relocations. */
14888 static void ATTRIBUTE_UNUSED
14889 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14891 fputs (ASM_LONG, file);
14892 output_addr_const (file, x);
14893 fputs ("@dtpoff", file);
14899 fputs (", 0", file);
14902 gcc_unreachable ();
14906 /* Return true if X is a representation of the PIC register. This copes
14907 with calls from ix86_find_base_term, where the register might have
14908 been replaced by a cselib value. */
14911 ix86_pic_register_p (rtx x)
14913 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14914 return (pic_offset_table_rtx
14915 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14916 else if (!REG_P (x))
14918 else if (pic_offset_table_rtx)
14920 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14922 if (HARD_REGISTER_P (x)
14923 && !HARD_REGISTER_P (pic_offset_table_rtx)
14924 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14929 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14932 /* Helper function for ix86_delegitimize_address.
14933 Attempt to delegitimize TLS local-exec accesses. */
14936 ix86_delegitimize_tls_address (rtx orig_x)
14938 rtx x = orig_x, unspec;
14939 struct ix86_address addr;
14941 if (!TARGET_TLS_DIRECT_SEG_REFS)
14945 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14947 if (ix86_decompose_address (x, &addr) == 0
14948 || addr.seg != DEFAULT_TLS_SEG_REG
14949 || addr.disp == NULL_RTX
14950 || GET_CODE (addr.disp) != CONST)
14952 unspec = XEXP (addr.disp, 0);
14953 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14954 unspec = XEXP (unspec, 0);
14955 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14957 x = XVECEXP (unspec, 0, 0);
14958 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14959 if (unspec != XEXP (addr.disp, 0))
14960 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14963 rtx idx = addr.index;
14964 if (addr.scale != 1)
14965 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14966 x = gen_rtx_PLUS (Pmode, idx, x);
14969 x = gen_rtx_PLUS (Pmode, addr.base, x);
14970 if (MEM_P (orig_x))
14971 x = replace_equiv_address_nv (orig_x, x);
14975 /* In the name of slightly smaller debug output, and to cater to
14976 general assembler lossage, recognize PIC+GOTOFF and turn it back
14977 into a direct symbol reference.
14979 On Darwin, this is necessary to avoid a crash, because Darwin
14980 has a different PIC label for each routine but the DWARF debugging
14981 information is not associated with any particular routine, so it's
14982 necessary to remove references to the PIC label from RTL stored by
14983 the DWARF output code. */
14986 ix86_delegitimize_address (rtx x)
14988 rtx orig_x = delegitimize_mem_from_attrs (x);
14989 /* addend is NULL or some rtx if x is something+GOTOFF where
14990 something doesn't include the PIC register. */
14991 rtx addend = NULL_RTX;
14992 /* reg_addend is NULL or a multiple of some register. */
14993 rtx reg_addend = NULL_RTX;
14994 /* const_addend is NULL or a const_int. */
14995 rtx const_addend = NULL_RTX;
14996 /* This is the result, or NULL. */
14997 rtx result = NULL_RTX;
15006 if (GET_CODE (x) == CONST
15007 && GET_CODE (XEXP (x, 0)) == PLUS
15008 && GET_MODE (XEXP (x, 0)) == Pmode
15009 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
15011 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
15013 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
15014 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
15015 if (MEM_P (orig_x))
15016 x = replace_equiv_address_nv (orig_x, x);
15020 if (GET_CODE (x) == CONST
15021 && GET_CODE (XEXP (x, 0)) == UNSPEC
15022 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
15023 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
15024 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
15026 x = XVECEXP (XEXP (x, 0), 0, 0);
15027 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
15029 x = simplify_gen_subreg (GET_MODE (orig_x), x,
15037 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15038 return ix86_delegitimize_tls_address (orig_x);
15040 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15041 and -mcmodel=medium -fpic. */
15044 if (GET_CODE (x) != PLUS
15045 || GET_CODE (XEXP (x, 1)) != CONST)
15046 return ix86_delegitimize_tls_address (orig_x);
15048 if (ix86_pic_register_p (XEXP (x, 0)))
15049 /* %ebx + GOT/GOTOFF */
15051 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15053 /* %ebx + %reg * scale + GOT/GOTOFF */
15054 reg_addend = XEXP (x, 0);
15055 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15056 reg_addend = XEXP (reg_addend, 1);
15057 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15058 reg_addend = XEXP (reg_addend, 0);
15061 reg_addend = NULL_RTX;
15062 addend = XEXP (x, 0);
15066 addend = XEXP (x, 0);
15068 x = XEXP (XEXP (x, 1), 0);
15069 if (GET_CODE (x) == PLUS
15070 && CONST_INT_P (XEXP (x, 1)))
15072 const_addend = XEXP (x, 1);
15076 if (GET_CODE (x) == UNSPEC
15077 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15078 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15079 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15080 && !MEM_P (orig_x) && !addend)))
15081 result = XVECEXP (x, 0, 0);
15083 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15084 && !MEM_P (orig_x))
15085 result = XVECEXP (x, 0, 0);
15088 return ix86_delegitimize_tls_address (orig_x);
15091 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15093 result = gen_rtx_PLUS (Pmode, reg_addend, result);
15096 /* If the rest of original X doesn't involve the PIC register, add
15097 addend and subtract pic_offset_table_rtx. This can happen e.g.
15099 leal (%ebx, %ecx, 4), %ecx
15101 movl foo@GOTOFF(%ecx), %edx
15102 in which case we return (%ecx - %ebx) + foo
15103 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15104 and reload has completed. */
15105 if (pic_offset_table_rtx
15106 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15107 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15108 pic_offset_table_rtx),
15110 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15112 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15113 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15114 result = gen_rtx_PLUS (Pmode, tmp, result);
15119 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15121 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15122 if (result == NULL_RTX)
15128 /* If X is a machine specific address (i.e. a symbol or label being
15129 referenced as a displacement from the GOT implemented using an
15130 UNSPEC), then return the base term. Otherwise return X. */
15133 ix86_find_base_term (rtx x)
15139 if (GET_CODE (x) != CONST)
15141 term = XEXP (x, 0);
15142 if (GET_CODE (term) == PLUS
15143 && (CONST_INT_P (XEXP (term, 1))
15144 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15145 term = XEXP (term, 0);
15146 if (GET_CODE (term) != UNSPEC
15147 || (XINT (term, 1) != UNSPEC_GOTPCREL
15148 && XINT (term, 1) != UNSPEC_PCREL))
15151 return XVECEXP (term, 0, 0);
15154 return ix86_delegitimize_address (x);
15158 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15159 bool fp, FILE *file)
15161 const char *suffix;
15163 if (mode == CCFPmode || mode == CCFPUmode)
15165 code = ix86_fp_compare_code_to_integer (code);
15169 code = reverse_condition (code);
15220 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15224 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15225 Those same assemblers have the same but opposite lossage on cmov. */
15226 if (mode == CCmode)
15227 suffix = fp ? "nbe" : "a";
15229 gcc_unreachable ();
15245 gcc_unreachable ();
15249 if (mode == CCmode)
15251 else if (mode == CCCmode)
15252 suffix = fp ? "b" : "c";
15254 gcc_unreachable ();
15270 gcc_unreachable ();
15274 if (mode == CCmode)
15276 else if (mode == CCCmode)
15277 suffix = fp ? "nb" : "nc";
15279 gcc_unreachable ();
15282 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15286 if (mode == CCmode)
15289 gcc_unreachable ();
15292 suffix = fp ? "u" : "p";
15295 suffix = fp ? "nu" : "np";
15298 gcc_unreachable ();
15300 fputs (suffix, file);
15303 /* Print the name of register X to FILE based on its machine mode and number.
15304 If CODE is 'w', pretend the mode is HImode.
15305 If CODE is 'b', pretend the mode is QImode.
15306 If CODE is 'k', pretend the mode is SImode.
15307 If CODE is 'q', pretend the mode is DImode.
15308 If CODE is 'x', pretend the mode is V4SFmode.
15309 If CODE is 't', pretend the mode is V8SFmode.
15310 If CODE is 'g', pretend the mode is V16SFmode.
15311 If CODE is 'h', pretend the reg is the 'high' byte register.
15312 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15313 If CODE is 'd', duplicate the operand for AVX instruction.
15317 print_reg (rtx x, int code, FILE *file)
15320 unsigned int regno;
15321 bool duplicated = code == 'd' && TARGET_AVX;
15323 if (ASSEMBLER_DIALECT == ASM_ATT)
15328 gcc_assert (TARGET_64BIT);
15329 fputs ("rip", file);
15333 regno = true_regnum (x);
15334 gcc_assert (regno != ARG_POINTER_REGNUM
15335 && regno != FRAME_POINTER_REGNUM
15336 && regno != FLAGS_REG
15337 && regno != FPSR_REG
15338 && regno != FPCR_REG);
15340 if (code == 'w' || MMX_REG_P (x))
15342 else if (code == 'b')
15344 else if (code == 'k')
15346 else if (code == 'q')
15348 else if (code == 'y')
15350 else if (code == 'h')
15352 else if (code == 'x')
15354 else if (code == 't')
15356 else if (code == 'g')
15359 code = GET_MODE_SIZE (GET_MODE (x));
15361 /* Irritatingly, AMD extended registers use different naming convention
15362 from the normal registers: "r%d[bwd]" */
15363 if (REX_INT_REGNO_P (regno))
15365 gcc_assert (TARGET_64BIT);
15367 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15371 error ("extended registers have no high halves");
15386 error ("unsupported operand size for extended register");
15396 if (STACK_TOP_P (x))
15405 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15406 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15411 reg = hi_reg_name[regno];
15414 if (regno >= ARRAY_SIZE (qi_reg_name))
15416 reg = qi_reg_name[regno];
15419 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15421 reg = qi_high_reg_name[regno];
15426 gcc_assert (!duplicated);
15428 fputs (hi_reg_name[regno] + 1, file);
15434 gcc_assert (!duplicated);
15436 fputs (hi_reg_name[REGNO (x)] + 1, file);
15441 gcc_unreachable ();
15447 if (ASSEMBLER_DIALECT == ASM_ATT)
15448 fprintf (file, ", %%%s", reg);
15450 fprintf (file, ", %s", reg);
15454 /* Meaning of CODE:
15455 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15456 C -- print opcode suffix for set/cmov insn.
15457 c -- like C, but print reversed condition
15458 F,f -- likewise, but for floating-point.
15459 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15461 R -- print embeded rounding and sae.
15462 r -- print only sae.
15463 z -- print the opcode suffix for the size of the current operand.
15464 Z -- likewise, with special suffixes for x87 instructions.
15465 * -- print a star (in certain assembler syntax)
15466 A -- print an absolute memory reference.
15467 E -- print address with DImode register names if TARGET_64BIT.
15468 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15469 s -- print a shift double count, followed by the assemblers argument
15471 b -- print the QImode name of the register for the indicated operand.
15472 %b0 would print %al if operands[0] is reg 0.
15473 w -- likewise, print the HImode name of the register.
15474 k -- likewise, print the SImode name of the register.
15475 q -- likewise, print the DImode name of the register.
15476 x -- likewise, print the V4SFmode name of the register.
15477 t -- likewise, print the V8SFmode name of the register.
15478 g -- likewise, print the V16SFmode name of the register.
15479 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15480 y -- print "st(0)" instead of "st" as a register.
15481 d -- print duplicated register operand for AVX instruction.
15482 D -- print condition for SSE cmp instruction.
15483 P -- if PIC, print an @PLT suffix.
15484 p -- print raw symbol name.
15485 X -- don't print any sort of PIC '@' suffix for a symbol.
15486 & -- print some in-use local-dynamic symbol name.
15487 H -- print a memory address offset by 8; used for sse high-parts
15488 Y -- print condition for XOP pcom* instruction.
15489 + -- print a branch hint as 'cs' or 'ds' prefix
15490 ; -- print a semicolon (after prefixes due to bug in older gas).
15491 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15492 @ -- print a segment register of thread base pointer load
15493 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15494 ! -- print MPX prefix for jxx/call/ret instructions if required.
15498 ix86_print_operand (FILE *file, rtx x, int code)
15505 switch (ASSEMBLER_DIALECT)
15512 /* Intel syntax. For absolute addresses, registers should not
15513 be surrounded by braces. */
15517 ix86_print_operand (file, x, 0);
15524 gcc_unreachable ();
15527 ix86_print_operand (file, x, 0);
15531 /* Wrap address in an UNSPEC to declare special handling. */
15533 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15535 output_address (x);
15539 if (ASSEMBLER_DIALECT == ASM_ATT)
15544 if (ASSEMBLER_DIALECT == ASM_ATT)
15549 if (ASSEMBLER_DIALECT == ASM_ATT)
15554 if (ASSEMBLER_DIALECT == ASM_ATT)
15559 if (ASSEMBLER_DIALECT == ASM_ATT)
15564 if (ASSEMBLER_DIALECT == ASM_ATT)
15569 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15570 if (ASSEMBLER_DIALECT != ASM_ATT)
15573 switch (GET_MODE_SIZE (GET_MODE (x)))
15588 output_operand_lossage
15589 ("invalid operand size for operand code 'O'");
15598 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15600 /* Opcodes don't get size suffixes if using Intel opcodes. */
15601 if (ASSEMBLER_DIALECT == ASM_INTEL)
15604 switch (GET_MODE_SIZE (GET_MODE (x)))
15623 output_operand_lossage
15624 ("invalid operand size for operand code 'z'");
15629 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15631 (0, "non-integer operand used with operand code 'z'");
15635 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15636 if (ASSEMBLER_DIALECT == ASM_INTEL)
15639 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15641 switch (GET_MODE_SIZE (GET_MODE (x)))
15644 #ifdef HAVE_AS_IX86_FILDS
15654 #ifdef HAVE_AS_IX86_FILDQ
15657 fputs ("ll", file);
15665 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15667 /* 387 opcodes don't get size suffixes
15668 if the operands are registers. */
15669 if (STACK_REG_P (x))
15672 switch (GET_MODE_SIZE (GET_MODE (x)))
15693 output_operand_lossage
15694 ("invalid operand type used with operand code 'Z'");
15698 output_operand_lossage
15699 ("invalid operand size for operand code 'Z'");
15718 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15720 ix86_print_operand (file, x, 0);
15721 fputs (", ", file);
15726 switch (GET_CODE (x))
15729 fputs ("neq", file);
15732 fputs ("eq", file);
15736 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15740 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15744 fputs ("le", file);
15748 fputs ("lt", file);
15751 fputs ("unord", file);
15754 fputs ("ord", file);
15757 fputs ("ueq", file);
15760 fputs ("nlt", file);
15763 fputs ("nle", file);
15766 fputs ("ule", file);
15769 fputs ("ult", file);
15772 fputs ("une", file);
15775 output_operand_lossage ("operand is not a condition code, "
15776 "invalid operand code 'Y'");
15782 /* Little bit of braindamage here. The SSE compare instructions
15783 does use completely different names for the comparisons that the
15784 fp conditional moves. */
15785 switch (GET_CODE (x))
15790 fputs ("eq_us", file);
15794 fputs ("eq", file);
15799 fputs ("nge", file);
15803 fputs ("lt", file);
15808 fputs ("ngt", file);
15812 fputs ("le", file);
15815 fputs ("unord", file);
15820 fputs ("neq_oq", file);
15824 fputs ("neq", file);
15829 fputs ("ge", file);
15833 fputs ("nlt", file);
15838 fputs ("gt", file);
15842 fputs ("nle", file);
15845 fputs ("ord", file);
15848 output_operand_lossage ("operand is not a condition code, "
15849 "invalid operand code 'D'");
15856 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15857 if (ASSEMBLER_DIALECT == ASM_ATT)
15863 if (!COMPARISON_P (x))
15865 output_operand_lossage ("operand is not a condition code, "
15866 "invalid operand code '%c'", code);
15869 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15870 code == 'c' || code == 'f',
15871 code == 'F' || code == 'f',
15876 if (!offsettable_memref_p (x))
15878 output_operand_lossage ("operand is not an offsettable memory "
15879 "reference, invalid operand code 'H'");
15882 /* It doesn't actually matter what mode we use here, as we're
15883 only going to use this for printing. */
15884 x = adjust_address_nv (x, DImode, 8);
15885 /* Output 'qword ptr' for intel assembler dialect. */
15886 if (ASSEMBLER_DIALECT == ASM_INTEL)
15891 gcc_assert (CONST_INT_P (x));
15893 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15894 #ifdef HAVE_AS_IX86_HLE
15895 fputs ("xacquire ", file);
15897 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15899 else if (INTVAL (x) & IX86_HLE_RELEASE)
15900 #ifdef HAVE_AS_IX86_HLE
15901 fputs ("xrelease ", file);
15903 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15905 /* We do not want to print value of the operand. */
15909 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15910 fputs ("{z}", file);
15914 gcc_assert (CONST_INT_P (x));
15915 gcc_assert (INTVAL (x) == ROUND_SAE);
15917 if (ASSEMBLER_DIALECT == ASM_INTEL)
15918 fputs (", ", file);
15920 fputs ("{sae}", file);
15922 if (ASSEMBLER_DIALECT == ASM_ATT)
15923 fputs (", ", file);
15928 gcc_assert (CONST_INT_P (x));
15930 if (ASSEMBLER_DIALECT == ASM_INTEL)
15931 fputs (", ", file);
15933 switch (INTVAL (x))
15935 case ROUND_NEAREST_INT | ROUND_SAE:
15936 fputs ("{rn-sae}", file);
15938 case ROUND_NEG_INF | ROUND_SAE:
15939 fputs ("{rd-sae}", file);
15941 case ROUND_POS_INF | ROUND_SAE:
15942 fputs ("{ru-sae}", file);
15944 case ROUND_ZERO | ROUND_SAE:
15945 fputs ("{rz-sae}", file);
15948 gcc_unreachable ();
15951 if (ASSEMBLER_DIALECT == ASM_ATT)
15952 fputs (", ", file);
15957 if (ASSEMBLER_DIALECT == ASM_ATT)
15963 const char *name = get_some_local_dynamic_name ();
15965 output_operand_lossage ("'%%&' used without any "
15966 "local dynamic TLS references");
15968 assemble_name (file, name);
15977 || optimize_function_for_size_p (cfun)
15978 || !TARGET_BRANCH_PREDICTION_HINTS)
15981 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15984 int pred_val = XINT (x, 0);
15986 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15987 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15989 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15991 = final_forward_branch_p (current_output_insn) == 0;
15993 /* Emit hints only in the case default branch prediction
15994 heuristics would fail. */
15995 if (taken != cputaken)
15997 /* We use 3e (DS) prefix for taken branches and
15998 2e (CS) prefix for not taken branches. */
16000 fputs ("ds ; ", file);
16002 fputs ("cs ; ", file);
16010 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
16016 if (ASSEMBLER_DIALECT == ASM_ATT)
16019 /* The kernel uses a different segment register for performance
16020 reasons; a system call would not have to trash the userspace
16021 segment register, which would be expensive. */
16022 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
16023 fputs ("fs", file);
16025 fputs ("gs", file);
16029 putc (TARGET_AVX2 ? 'i' : 'f', file);
16033 if (TARGET_64BIT && Pmode != word_mode)
16034 fputs ("addr32 ", file);
16038 if (ix86_bnd_prefixed_insn_p (current_output_insn))
16039 fputs ("bnd ", file);
16043 output_operand_lossage ("invalid operand code '%c'", code);
16048 print_reg (x, code, file);
16050 else if (MEM_P (x))
16052 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
16053 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16054 && GET_MODE (x) != BLKmode)
16057 switch (GET_MODE_SIZE (GET_MODE (x)))
16059 case 1: size = "BYTE"; break;
16060 case 2: size = "WORD"; break;
16061 case 4: size = "DWORD"; break;
16062 case 8: size = "QWORD"; break;
16063 case 12: size = "TBYTE"; break;
16065 if (GET_MODE (x) == XFmode)
16070 case 32: size = "YMMWORD"; break;
16071 case 64: size = "ZMMWORD"; break;
16073 gcc_unreachable ();
16076 /* Check for explicit size override (codes 'b', 'w', 'k',
16080 else if (code == 'w')
16082 else if (code == 'k')
16084 else if (code == 'q')
16086 else if (code == 'x')
16089 fputs (size, file);
16090 fputs (" PTR ", file);
16094 /* Avoid (%rip) for call operands. */
16095 if (CONSTANT_ADDRESS_P (x) && code == 'P'
16096 && !CONST_INT_P (x))
16097 output_addr_const (file, x);
16098 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16099 output_operand_lossage ("invalid constraints for operand");
16101 output_address (x);
16104 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
16109 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16110 REAL_VALUE_TO_TARGET_SINGLE (r, l);
16112 if (ASSEMBLER_DIALECT == ASM_ATT)
16114 /* Sign extend 32bit SFmode immediate to 8 bytes. */
16116 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16117 (unsigned long long) (int) l);
16119 fprintf (file, "0x%08x", (unsigned int) l);
16122 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
16127 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16128 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16130 if (ASSEMBLER_DIALECT == ASM_ATT)
16132 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16135 /* These float cases don't actually occur as immediate operands. */
16136 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16140 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16141 fputs (dstr, file);
16146 /* We have patterns that allow zero sets of memory, for instance.
16147 In 64-bit mode, we should probably support all 8-byte vectors,
16148 since we can in fact encode that into an immediate. */
16149 if (GET_CODE (x) == CONST_VECTOR)
16151 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16155 if (code != 'P' && code != 'p')
16157 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16159 if (ASSEMBLER_DIALECT == ASM_ATT)
16162 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16163 || GET_CODE (x) == LABEL_REF)
16165 if (ASSEMBLER_DIALECT == ASM_ATT)
16168 fputs ("OFFSET FLAT:", file);
16171 if (CONST_INT_P (x))
16172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16173 else if (flag_pic || MACHOPIC_INDIRECT)
16174 output_pic_addr_const (file, x, code);
16176 output_addr_const (file, x);
16181 ix86_print_operand_punct_valid_p (unsigned char code)
16183 return (code == '@' || code == '*' || code == '+' || code == '&'
16184 || code == ';' || code == '~' || code == '^' || code == '!');
16187 /* Print a memory operand whose address is ADDR. */
16190 ix86_print_operand_address (FILE *file, rtx addr)
16192 struct ix86_address parts;
16193 rtx base, index, disp;
16199 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16201 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16202 gcc_assert (parts.index == NULL_RTX);
16203 parts.index = XVECEXP (addr, 0, 1);
16204 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16205 addr = XVECEXP (addr, 0, 0);
16208 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16210 gcc_assert (TARGET_64BIT);
16211 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16214 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16216 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16217 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16218 if (parts.base != NULL_RTX)
16220 parts.index = parts.base;
16223 parts.base = XVECEXP (addr, 0, 0);
16224 addr = XVECEXP (addr, 0, 0);
16226 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16228 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16229 gcc_assert (parts.index == NULL_RTX);
16230 parts.index = XVECEXP (addr, 0, 1);
16231 addr = XVECEXP (addr, 0, 0);
16234 ok = ix86_decompose_address (addr, &parts);
16239 index = parts.index;
16241 scale = parts.scale;
16249 if (ASSEMBLER_DIALECT == ASM_ATT)
16251 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16254 gcc_unreachable ();
16257 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16258 if (TARGET_64BIT && !base && !index)
16262 if (GET_CODE (disp) == CONST
16263 && GET_CODE (XEXP (disp, 0)) == PLUS
16264 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16265 symbol = XEXP (XEXP (disp, 0), 0);
16267 if (GET_CODE (symbol) == LABEL_REF
16268 || (GET_CODE (symbol) == SYMBOL_REF
16269 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16272 if (!base && !index)
16274 /* Displacement only requires special attention. */
16276 if (CONST_INT_P (disp))
16278 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16279 fputs ("ds:", file);
16280 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16283 output_pic_addr_const (file, disp, 0);
16285 output_addr_const (file, disp);
16289 /* Print SImode register names to force addr32 prefix. */
16290 if (SImode_address_operand (addr, VOIDmode))
16292 #ifdef ENABLE_CHECKING
16293 gcc_assert (TARGET_64BIT);
16294 switch (GET_CODE (addr))
16297 gcc_assert (GET_MODE (addr) == SImode);
16298 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16302 gcc_assert (GET_MODE (addr) == DImode);
16305 gcc_unreachable ();
16308 gcc_assert (!code);
16314 && CONST_INT_P (disp)
16315 && INTVAL (disp) < -16*1024*1024)
16317 /* X32 runs in 64-bit mode, where displacement, DISP, in
16318 address DISP(%r64), is encoded as 32-bit immediate sign-
16319 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16320 address is %r64 + 0xffffffffbffffd00. When %r64 <
16321 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16322 which is invalid for x32. The correct address is %r64
16323 - 0x40000300 == 0xf7ffdd64. To properly encode
16324 -0x40000300(%r64) for x32, we zero-extend negative
16325 displacement by forcing addr32 prefix which truncates
16326 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16327 zero-extend all negative displacements, including -1(%rsp).
16328 However, for small negative displacements, sign-extension
16329 won't cause overflow. We only zero-extend negative
16330 displacements if they < -16*1024*1024, which is also used
16331 to check legitimate address displacements for PIC. */
16335 if (ASSEMBLER_DIALECT == ASM_ATT)
16340 output_pic_addr_const (file, disp, 0);
16341 else if (GET_CODE (disp) == LABEL_REF)
16342 output_asm_label (disp);
16344 output_addr_const (file, disp);
16349 print_reg (base, code, file);
16353 print_reg (index, vsib ? 0 : code, file);
16354 if (scale != 1 || vsib)
16355 fprintf (file, ",%d", scale);
16361 rtx offset = NULL_RTX;
16365 /* Pull out the offset of a symbol; print any symbol itself. */
16366 if (GET_CODE (disp) == CONST
16367 && GET_CODE (XEXP (disp, 0)) == PLUS
16368 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16370 offset = XEXP (XEXP (disp, 0), 1);
16371 disp = gen_rtx_CONST (VOIDmode,
16372 XEXP (XEXP (disp, 0), 0));
16376 output_pic_addr_const (file, disp, 0);
16377 else if (GET_CODE (disp) == LABEL_REF)
16378 output_asm_label (disp);
16379 else if (CONST_INT_P (disp))
16382 output_addr_const (file, disp);
16388 print_reg (base, code, file);
16391 if (INTVAL (offset) >= 0)
16393 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16397 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16404 print_reg (index, vsib ? 0 : code, file);
16405 if (scale != 1 || vsib)
16406 fprintf (file, "*%d", scale);
16413 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16416 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16420 if (GET_CODE (x) != UNSPEC)
16423 op = XVECEXP (x, 0, 0);
16424 switch (XINT (x, 1))
16426 case UNSPEC_GOTTPOFF:
16427 output_addr_const (file, op);
16428 /* FIXME: This might be @TPOFF in Sun ld. */
16429 fputs ("@gottpoff", file);
16432 output_addr_const (file, op);
16433 fputs ("@tpoff", file);
16435 case UNSPEC_NTPOFF:
16436 output_addr_const (file, op);
16438 fputs ("@tpoff", file);
16440 fputs ("@ntpoff", file);
16442 case UNSPEC_DTPOFF:
16443 output_addr_const (file, op);
16444 fputs ("@dtpoff", file);
16446 case UNSPEC_GOTNTPOFF:
16447 output_addr_const (file, op);
16449 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16450 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16452 fputs ("@gotntpoff", file);
16454 case UNSPEC_INDNTPOFF:
16455 output_addr_const (file, op);
16456 fputs ("@indntpoff", file);
16459 case UNSPEC_MACHOPIC_OFFSET:
16460 output_addr_const (file, op);
16462 machopic_output_function_base_name (file);
16466 case UNSPEC_STACK_CHECK:
16470 gcc_assert (flag_split_stack);
16472 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16473 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16475 gcc_unreachable ();
16478 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16489 /* Split one or more double-mode RTL references into pairs of half-mode
16490 references. The RTL can be REG, offsettable MEM, integer constant, or
16491 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16492 split and "num" is its length. lo_half and hi_half are output arrays
16493 that parallel "operands". */
16496 split_double_mode (machine_mode mode, rtx operands[],
16497 int num, rtx lo_half[], rtx hi_half[])
16499 machine_mode half_mode;
16505 half_mode = DImode;
16508 half_mode = SImode;
16511 gcc_unreachable ();
16514 byte = GET_MODE_SIZE (half_mode);
16518 rtx op = operands[num];
16520 /* simplify_subreg refuse to split volatile memory addresses,
16521 but we still have to handle it. */
16524 lo_half[num] = adjust_address (op, half_mode, 0);
16525 hi_half[num] = adjust_address (op, half_mode, byte);
16529 lo_half[num] = simplify_gen_subreg (half_mode, op,
16530 GET_MODE (op) == VOIDmode
16531 ? mode : GET_MODE (op), 0);
16532 hi_half[num] = simplify_gen_subreg (half_mode, op,
16533 GET_MODE (op) == VOIDmode
16534 ? mode : GET_MODE (op), byte);
16539 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16540 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16541 is the expression of the binary operation. The output may either be
16542 emitted here, or returned to the caller, like all output_* functions.
16544 There is no guarantee that the operands are the same mode, as they
16545 might be within FLOAT or FLOAT_EXTEND expressions. */
16547 #ifndef SYSV386_COMPAT
16548 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16549 wants to fix the assemblers because that causes incompatibility
16550 with gcc. No-one wants to fix gcc because that causes
16551 incompatibility with assemblers... You can use the option of
16552 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16553 #define SYSV386_COMPAT 1
16557 output_387_binary_op (rtx insn, rtx *operands)
16559 static char buf[40];
16562 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16564 #ifdef ENABLE_CHECKING
16565 /* Even if we do not want to check the inputs, this documents input
16566 constraints. Which helps in understanding the following code. */
16567 if (STACK_REG_P (operands[0])
16568 && ((REG_P (operands[1])
16569 && REGNO (operands[0]) == REGNO (operands[1])
16570 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16571 || (REG_P (operands[2])
16572 && REGNO (operands[0]) == REGNO (operands[2])
16573 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16574 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16577 gcc_assert (is_sse);
16580 switch (GET_CODE (operands[3]))
16583 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16584 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16592 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16593 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16601 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16602 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16619 gcc_unreachable ();
16626 strcpy (buf, ssep);
16627 if (GET_MODE (operands[0]) == SFmode)
16628 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16630 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16634 strcpy (buf, ssep + 1);
16635 if (GET_MODE (operands[0]) == SFmode)
16636 strcat (buf, "ss\t{%2, %0|%0, %2}");
16638 strcat (buf, "sd\t{%2, %0|%0, %2}");
16644 switch (GET_CODE (operands[3]))
16648 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16649 std::swap (operands[1], operands[2]);
16651 /* know operands[0] == operands[1]. */
16653 if (MEM_P (operands[2]))
16659 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16661 if (STACK_TOP_P (operands[0]))
16662 /* How is it that we are storing to a dead operand[2]?
16663 Well, presumably operands[1] is dead too. We can't
16664 store the result to st(0) as st(0) gets popped on this
16665 instruction. Instead store to operands[2] (which I
16666 think has to be st(1)). st(1) will be popped later.
16667 gcc <= 2.8.1 didn't have this check and generated
16668 assembly code that the Unixware assembler rejected. */
16669 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16671 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16675 if (STACK_TOP_P (operands[0]))
16676 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16678 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16683 if (MEM_P (operands[1]))
16689 if (MEM_P (operands[2]))
16695 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16698 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16699 derived assemblers, confusingly reverse the direction of
16700 the operation for fsub{r} and fdiv{r} when the
16701 destination register is not st(0). The Intel assembler
16702 doesn't have this brain damage. Read !SYSV386_COMPAT to
16703 figure out what the hardware really does. */
16704 if (STACK_TOP_P (operands[0]))
16705 p = "{p\t%0, %2|rp\t%2, %0}";
16707 p = "{rp\t%2, %0|p\t%0, %2}";
16709 if (STACK_TOP_P (operands[0]))
16710 /* As above for fmul/fadd, we can't store to st(0). */
16711 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16713 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16718 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16721 if (STACK_TOP_P (operands[0]))
16722 p = "{rp\t%0, %1|p\t%1, %0}";
16724 p = "{p\t%1, %0|rp\t%0, %1}";
16726 if (STACK_TOP_P (operands[0]))
16727 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16729 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16734 if (STACK_TOP_P (operands[0]))
16736 if (STACK_TOP_P (operands[1]))
16737 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16739 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16742 else if (STACK_TOP_P (operands[1]))
16745 p = "{\t%1, %0|r\t%0, %1}";
16747 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16753 p = "{r\t%2, %0|\t%0, %2}";
16755 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16761 gcc_unreachable ();
16768 /* Check if a 256bit AVX register is referenced inside of EXP. */
16771 ix86_check_avx256_register (const_rtx exp)
16773 if (GET_CODE (exp) == SUBREG)
16774 exp = SUBREG_REG (exp);
16776 return (REG_P (exp)
16777 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16780 /* Return needed mode for entity in optimize_mode_switching pass. */
16783 ix86_avx_u128_mode_needed (rtx_insn *insn)
16789 /* Needed mode is set to AVX_U128_CLEAN if there are
16790 no 256bit modes used in function arguments. */
16791 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16793 link = XEXP (link, 1))
16795 if (GET_CODE (XEXP (link, 0)) == USE)
16797 rtx arg = XEXP (XEXP (link, 0), 0);
16799 if (ix86_check_avx256_register (arg))
16800 return AVX_U128_DIRTY;
16804 return AVX_U128_CLEAN;
16807 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16808 changes state only when a 256bit register is written to, but we need
16809 to prevent the compiler from moving optimal insertion point above
16810 eventual read from 256bit register. */
16811 subrtx_iterator::array_type array;
16812 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16813 if (ix86_check_avx256_register (*iter))
16814 return AVX_U128_DIRTY;
16816 return AVX_U128_ANY;
16819 /* Return mode that i387 must be switched into
16820 prior to the execution of insn. */
16823 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16825 enum attr_i387_cw mode;
16827 /* The mode UNINITIALIZED is used to store control word after a
16828 function call or ASM pattern. The mode ANY specify that function
16829 has no requirements on the control word and make no changes in the
16830 bits we are interested in. */
16833 || (NONJUMP_INSN_P (insn)
16834 && (asm_noperands (PATTERN (insn)) >= 0
16835 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16836 return I387_CW_UNINITIALIZED;
16838 if (recog_memoized (insn) < 0)
16839 return I387_CW_ANY;
16841 mode = get_attr_i387_cw (insn);
16846 if (mode == I387_CW_TRUNC)
16851 if (mode == I387_CW_FLOOR)
16856 if (mode == I387_CW_CEIL)
16861 if (mode == I387_CW_MASK_PM)
16866 gcc_unreachable ();
16869 return I387_CW_ANY;
16872 /* Return mode that entity must be switched into
16873 prior to the execution of insn. */
16876 ix86_mode_needed (int entity, rtx_insn *insn)
16881 return ix86_avx_u128_mode_needed (insn);
16886 return ix86_i387_mode_needed (entity, insn);
16888 gcc_unreachable ();
16893 /* Check if a 256bit AVX register is referenced in stores. */
16896 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16898 if (ix86_check_avx256_register (dest))
16900 bool *used = (bool *) data;
16905 /* Calculate mode of upper 128bit AVX registers after the insn. */
16908 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16910 rtx pat = PATTERN (insn);
16912 if (vzeroupper_operation (pat, VOIDmode)
16913 || vzeroall_operation (pat, VOIDmode))
16914 return AVX_U128_CLEAN;
16916 /* We know that state is clean after CALL insn if there are no
16917 256bit registers used in the function return register. */
16920 bool avx_reg256_found = false;
16921 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16923 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16926 /* Otherwise, return current mode. Remember that if insn
16927 references AVX 256bit registers, the mode was already changed
16928 to DIRTY from MODE_NEEDED. */
16932 /* Return the mode that an insn results in. */
16935 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16940 return ix86_avx_u128_mode_after (mode, insn);
16947 gcc_unreachable ();
16952 ix86_avx_u128_mode_entry (void)
16956 /* Entry mode is set to AVX_U128_DIRTY if there are
16957 256bit modes used in function arguments. */
16958 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16959 arg = TREE_CHAIN (arg))
16961 rtx incoming = DECL_INCOMING_RTL (arg);
16963 if (incoming && ix86_check_avx256_register (incoming))
16964 return AVX_U128_DIRTY;
16967 return AVX_U128_CLEAN;
16970 /* Return a mode that ENTITY is assumed to be
16971 switched to at function entry. */
16974 ix86_mode_entry (int entity)
16979 return ix86_avx_u128_mode_entry ();
16984 return I387_CW_ANY;
16986 gcc_unreachable ();
16991 ix86_avx_u128_mode_exit (void)
16993 rtx reg = crtl->return_rtx;
16995 /* Exit mode is set to AVX_U128_DIRTY if there are
16996 256bit modes used in the function return register. */
16997 if (reg && ix86_check_avx256_register (reg))
16998 return AVX_U128_DIRTY;
17000 return AVX_U128_CLEAN;
17003 /* Return a mode that ENTITY is assumed to be
17004 switched to at function exit. */
17007 ix86_mode_exit (int entity)
17012 return ix86_avx_u128_mode_exit ();
17017 return I387_CW_ANY;
17019 gcc_unreachable ();
17024 ix86_mode_priority (int, int n)
17029 /* Output code to initialize control word copies used by trunc?f?i and
17030 rounding patterns. CURRENT_MODE is set to current control word,
17031 while NEW_MODE is set to new control word. */
17034 emit_i387_cw_initialization (int mode)
17036 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
17039 enum ix86_stack_slot slot;
17041 rtx reg = gen_reg_rtx (HImode);
17043 emit_insn (gen_x86_fnstcw_1 (stored_mode));
17044 emit_move_insn (reg, copy_rtx (stored_mode));
17046 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17047 || optimize_insn_for_size_p ())
17051 case I387_CW_TRUNC:
17052 /* round toward zero (truncate) */
17053 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17054 slot = SLOT_CW_TRUNC;
17057 case I387_CW_FLOOR:
17058 /* round down toward -oo */
17059 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17060 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17061 slot = SLOT_CW_FLOOR;
17065 /* round up toward +oo */
17066 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17067 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17068 slot = SLOT_CW_CEIL;
17071 case I387_CW_MASK_PM:
17072 /* mask precision exception for nearbyint() */
17073 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17074 slot = SLOT_CW_MASK_PM;
17078 gcc_unreachable ();
17085 case I387_CW_TRUNC:
17086 /* round toward zero (truncate) */
17087 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
17088 slot = SLOT_CW_TRUNC;
17091 case I387_CW_FLOOR:
17092 /* round down toward -oo */
17093 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
17094 slot = SLOT_CW_FLOOR;
17098 /* round up toward +oo */
17099 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
17100 slot = SLOT_CW_CEIL;
17103 case I387_CW_MASK_PM:
17104 /* mask precision exception for nearbyint() */
17105 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17106 slot = SLOT_CW_MASK_PM;
17110 gcc_unreachable ();
17114 gcc_assert (slot < MAX_386_STACK_LOCALS);
17116 new_mode = assign_386_stack_local (HImode, slot);
17117 emit_move_insn (new_mode, reg);
17120 /* Emit vzeroupper. */
17123 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17127 /* Cancel automatic vzeroupper insertion if there are
17128 live call-saved SSE registers at the insertion point. */
17130 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17131 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17135 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17136 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17139 emit_insn (gen_avx_vzeroupper ());
17142 /* Generate one or more insns to set ENTITY to MODE. */
17144 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17145 is the set of hard registers live at the point where the insn(s)
17146 are to be inserted. */
17149 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17150 HARD_REG_SET regs_live)
17155 if (mode == AVX_U128_CLEAN)
17156 ix86_avx_emit_vzeroupper (regs_live);
17162 if (mode != I387_CW_ANY
17163 && mode != I387_CW_UNINITIALIZED)
17164 emit_i387_cw_initialization (mode);
17167 gcc_unreachable ();
17171 /* Output code for INSN to convert a float to a signed int. OPERANDS
17172 are the insn operands. The output may be [HSD]Imode and the input
17173 operand may be [SDX]Fmode. */
17176 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17178 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17179 int dimode_p = GET_MODE (operands[0]) == DImode;
17180 int round_mode = get_attr_i387_cw (insn);
17182 /* Jump through a hoop or two for DImode, since the hardware has no
17183 non-popping instruction. We used to do this a different way, but
17184 that was somewhat fragile and broke with post-reload splitters. */
17185 if ((dimode_p || fisttp) && !stack_top_dies)
17186 output_asm_insn ("fld\t%y1", operands);
17188 gcc_assert (STACK_TOP_P (operands[1]));
17189 gcc_assert (MEM_P (operands[0]));
17190 gcc_assert (GET_MODE (operands[1]) != TFmode);
17193 output_asm_insn ("fisttp%Z0\t%0", operands);
17196 if (round_mode != I387_CW_ANY)
17197 output_asm_insn ("fldcw\t%3", operands);
17198 if (stack_top_dies || dimode_p)
17199 output_asm_insn ("fistp%Z0\t%0", operands);
17201 output_asm_insn ("fist%Z0\t%0", operands);
17202 if (round_mode != I387_CW_ANY)
17203 output_asm_insn ("fldcw\t%2", operands);
17209 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17210 have the values zero or one, indicates the ffreep insn's operand
17211 from the OPERANDS array. */
17213 static const char *
17214 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17216 if (TARGET_USE_FFREEP)
17217 #ifdef HAVE_AS_IX86_FFREEP
17218 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17221 static char retval[32];
17222 int regno = REGNO (operands[opno]);
17224 gcc_assert (STACK_REGNO_P (regno));
17226 regno -= FIRST_STACK_REG;
17228 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17233 return opno ? "fstp\t%y1" : "fstp\t%y0";
17237 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17238 should be used. UNORDERED_P is true when fucom should be used. */
17241 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17243 int stack_top_dies;
17244 rtx cmp_op0, cmp_op1;
17245 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17249 cmp_op0 = operands[0];
17250 cmp_op1 = operands[1];
17254 cmp_op0 = operands[1];
17255 cmp_op1 = operands[2];
17260 if (GET_MODE (operands[0]) == SFmode)
17262 return "%vucomiss\t{%1, %0|%0, %1}";
17264 return "%vcomiss\t{%1, %0|%0, %1}";
17267 return "%vucomisd\t{%1, %0|%0, %1}";
17269 return "%vcomisd\t{%1, %0|%0, %1}";
17272 gcc_assert (STACK_TOP_P (cmp_op0));
17274 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17276 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17278 if (stack_top_dies)
17280 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17281 return output_387_ffreep (operands, 1);
17284 return "ftst\n\tfnstsw\t%0";
17287 if (STACK_REG_P (cmp_op1)
17289 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17290 && REGNO (cmp_op1) != FIRST_STACK_REG)
17292 /* If both the top of the 387 stack dies, and the other operand
17293 is also a stack register that dies, then this must be a
17294 `fcompp' float compare */
17298 /* There is no double popping fcomi variant. Fortunately,
17299 eflags is immune from the fstp's cc clobbering. */
17301 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17303 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17304 return output_387_ffreep (operands, 0);
17309 return "fucompp\n\tfnstsw\t%0";
17311 return "fcompp\n\tfnstsw\t%0";
17316 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17318 static const char * const alt[16] =
17320 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17321 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17322 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17323 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17325 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17326 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17330 "fcomi\t{%y1, %0|%0, %y1}",
17331 "fcomip\t{%y1, %0|%0, %y1}",
17332 "fucomi\t{%y1, %0|%0, %y1}",
17333 "fucomip\t{%y1, %0|%0, %y1}",
17344 mask = eflags_p << 3;
17345 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17346 mask |= unordered_p << 1;
17347 mask |= stack_top_dies;
17349 gcc_assert (mask < 16);
17358 ix86_output_addr_vec_elt (FILE *file, int value)
17360 const char *directive = ASM_LONG;
17364 directive = ASM_QUAD;
17366 gcc_assert (!TARGET_64BIT);
17369 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17373 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17375 const char *directive = ASM_LONG;
17378 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17379 directive = ASM_QUAD;
17381 gcc_assert (!TARGET_64BIT);
17383 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17384 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17385 fprintf (file, "%s%s%d-%s%d\n",
17386 directive, LPREFIX, value, LPREFIX, rel);
17387 else if (HAVE_AS_GOTOFF_IN_DATA)
17388 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17390 else if (TARGET_MACHO)
17392 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17393 machopic_output_function_base_name (file);
17398 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17399 GOT_SYMBOL_NAME, LPREFIX, value);
17402 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17406 ix86_expand_clear (rtx dest)
17410 /* We play register width games, which are only valid after reload. */
17411 gcc_assert (reload_completed);
17413 /* Avoid HImode and its attendant prefix byte. */
17414 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17415 dest = gen_rtx_REG (SImode, REGNO (dest));
17416 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17418 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17420 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17421 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17427 /* X is an unchanging MEM. If it is a constant pool reference, return
17428 the constant pool rtx, else NULL. */
17431 maybe_get_pool_constant (rtx x)
17433 x = ix86_delegitimize_address (XEXP (x, 0));
17435 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17436 return get_pool_constant (x);
17442 ix86_expand_move (machine_mode mode, rtx operands[])
17445 enum tls_model model;
17450 if (GET_CODE (op1) == SYMBOL_REF)
17454 model = SYMBOL_REF_TLS_MODEL (op1);
17457 op1 = legitimize_tls_address (op1, model, true);
17458 op1 = force_operand (op1, op0);
17461 op1 = convert_to_mode (mode, op1, 1);
17463 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17466 else if (GET_CODE (op1) == CONST
17467 && GET_CODE (XEXP (op1, 0)) == PLUS
17468 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17470 rtx addend = XEXP (XEXP (op1, 0), 1);
17471 rtx symbol = XEXP (XEXP (op1, 0), 0);
17474 model = SYMBOL_REF_TLS_MODEL (symbol);
17476 tmp = legitimize_tls_address (symbol, model, true);
17478 tmp = legitimize_pe_coff_symbol (symbol, true);
17482 tmp = force_operand (tmp, NULL);
17483 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17484 op0, 1, OPTAB_DIRECT);
17487 op1 = convert_to_mode (mode, tmp, 1);
17491 if ((flag_pic || MACHOPIC_INDIRECT)
17492 && symbolic_operand (op1, mode))
17494 if (TARGET_MACHO && !TARGET_64BIT)
17497 /* dynamic-no-pic */
17498 if (MACHOPIC_INDIRECT)
17500 rtx temp = ((reload_in_progress
17501 || ((op0 && REG_P (op0))
17503 ? op0 : gen_reg_rtx (Pmode));
17504 op1 = machopic_indirect_data_reference (op1, temp);
17506 op1 = machopic_legitimize_pic_address (op1, mode,
17507 temp == op1 ? 0 : temp);
17509 if (op0 != op1 && GET_CODE (op0) != MEM)
17511 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17515 if (GET_CODE (op0) == MEM)
17516 op1 = force_reg (Pmode, op1);
17520 if (GET_CODE (temp) != REG)
17521 temp = gen_reg_rtx (Pmode);
17522 temp = legitimize_pic_address (op1, temp);
17527 /* dynamic-no-pic */
17533 op1 = force_reg (mode, op1);
17534 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17536 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17537 op1 = legitimize_pic_address (op1, reg);
17540 op1 = convert_to_mode (mode, op1, 1);
17547 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17548 || !push_operand (op0, mode))
17550 op1 = force_reg (mode, op1);
17552 if (push_operand (op0, mode)
17553 && ! general_no_elim_operand (op1, mode))
17554 op1 = copy_to_mode_reg (mode, op1);
17556 /* Force large constants in 64bit compilation into register
17557 to get them CSEed. */
17558 if (can_create_pseudo_p ()
17559 && (mode == DImode) && TARGET_64BIT
17560 && immediate_operand (op1, mode)
17561 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17562 && !register_operand (op0, mode)
17564 op1 = copy_to_mode_reg (mode, op1);
17566 if (can_create_pseudo_p ()
17567 && FLOAT_MODE_P (mode)
17568 && GET_CODE (op1) == CONST_DOUBLE)
17570 /* If we are loading a floating point constant to a register,
17571 force the value to memory now, since we'll get better code
17572 out the back end. */
17574 op1 = validize_mem (force_const_mem (mode, op1));
17575 if (!register_operand (op0, mode))
17577 rtx temp = gen_reg_rtx (mode);
17578 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17579 emit_move_insn (op0, temp);
17585 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17589 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17591 rtx op0 = operands[0], op1 = operands[1];
17592 unsigned int align = GET_MODE_ALIGNMENT (mode);
17594 if (push_operand (op0, VOIDmode))
17595 op0 = emit_move_resolve_push (mode, op0);
17597 /* Force constants other than zero into memory. We do not know how
17598 the instructions used to build constants modify the upper 64 bits
17599 of the register, once we have that information we may be able
17600 to handle some of them more efficiently. */
17601 if (can_create_pseudo_p ()
17602 && (CONSTANT_P (op1)
17603 || (GET_CODE (op1) == SUBREG
17604 && CONSTANT_P (SUBREG_REG (op1))))
17605 && ((register_operand (op0, mode)
17606 && !standard_sse_constant_p (op1))
17607 /* ix86_expand_vector_move_misalign() does not like constants. */
17608 || (SSE_REG_MODE_P (mode)
17610 && MEM_ALIGN (op0) < align)))
17612 if (SUBREG_P (op1))
17614 machine_mode imode = GET_MODE (SUBREG_REG (op1));
17615 rtx r = force_const_mem (imode, SUBREG_REG (op1));
17617 r = validize_mem (r);
17619 r = force_reg (imode, SUBREG_REG (op1));
17620 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
17623 op1 = validize_mem (force_const_mem (mode, op1));
17626 /* We need to check memory alignment for SSE mode since attribute
17627 can make operands unaligned. */
17628 if (can_create_pseudo_p ()
17629 && SSE_REG_MODE_P (mode)
17630 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17631 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17635 /* ix86_expand_vector_move_misalign() does not like both
17636 arguments in memory. */
17637 if (!register_operand (op0, mode)
17638 && !register_operand (op1, mode))
17639 op1 = force_reg (mode, op1);
17641 tmp[0] = op0; tmp[1] = op1;
17642 ix86_expand_vector_move_misalign (mode, tmp);
17646 /* Make operand1 a register if it isn't already. */
17647 if (can_create_pseudo_p ()
17648 && !register_operand (op0, mode)
17649 && !register_operand (op1, mode))
17651 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17655 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17658 /* Split 32-byte AVX unaligned load and store if needed. */
17661 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17664 rtx (*extract) (rtx, rtx, rtx);
17665 rtx (*load_unaligned) (rtx, rtx);
17666 rtx (*store_unaligned) (rtx, rtx);
17669 switch (GET_MODE (op0))
17672 gcc_unreachable ();
17674 extract = gen_avx_vextractf128v32qi;
17675 load_unaligned = gen_avx_loaddquv32qi;
17676 store_unaligned = gen_avx_storedquv32qi;
17680 extract = gen_avx_vextractf128v8sf;
17681 load_unaligned = gen_avx_loadups256;
17682 store_unaligned = gen_avx_storeups256;
17686 extract = gen_avx_vextractf128v4df;
17687 load_unaligned = gen_avx_loadupd256;
17688 store_unaligned = gen_avx_storeupd256;
17695 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17696 && optimize_insn_for_speed_p ())
17698 rtx r = gen_reg_rtx (mode);
17699 m = adjust_address (op1, mode, 0);
17700 emit_move_insn (r, m);
17701 m = adjust_address (op1, mode, 16);
17702 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17703 emit_move_insn (op0, r);
17705 /* Normal *mov<mode>_internal pattern will handle
17706 unaligned loads just fine if misaligned_operand
17707 is true, and without the UNSPEC it can be combined
17708 with arithmetic instructions. */
17709 else if (misaligned_operand (op1, GET_MODE (op1)))
17710 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17712 emit_insn (load_unaligned (op0, op1));
17714 else if (MEM_P (op0))
17716 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17717 && optimize_insn_for_speed_p ())
17719 m = adjust_address (op0, mode, 0);
17720 emit_insn (extract (m, op1, const0_rtx));
17721 m = adjust_address (op0, mode, 16);
17722 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
17725 emit_insn (store_unaligned (op0, op1));
17728 gcc_unreachable ();
17731 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17732 straight to ix86_expand_vector_move. */
17733 /* Code generation for scalar reg-reg moves of single and double precision data:
17734 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17738 if (x86_sse_partial_reg_dependency == true)
17743 Code generation for scalar loads of double precision data:
17744 if (x86_sse_split_regs == true)
17745 movlpd mem, reg (gas syntax)
17749 Code generation for unaligned packed loads of single precision data
17750 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17751 if (x86_sse_unaligned_move_optimal)
17754 if (x86_sse_partial_reg_dependency == true)
17766 Code generation for unaligned packed loads of double precision data
17767 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17768 if (x86_sse_unaligned_move_optimal)
17771 if (x86_sse_split_regs == true)
17784 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17786 rtx op0, op1, orig_op0 = NULL_RTX, m;
17787 rtx (*load_unaligned) (rtx, rtx);
17788 rtx (*store_unaligned) (rtx, rtx);
17793 if (GET_MODE_SIZE (mode) == 64)
17795 switch (GET_MODE_CLASS (mode))
17797 case MODE_VECTOR_INT:
17799 if (GET_MODE (op0) != V16SImode)
17804 op0 = gen_reg_rtx (V16SImode);
17807 op0 = gen_lowpart (V16SImode, op0);
17809 op1 = gen_lowpart (V16SImode, op1);
17812 case MODE_VECTOR_FLOAT:
17813 switch (GET_MODE (op0))
17816 gcc_unreachable ();
17818 load_unaligned = gen_avx512f_loaddquv16si;
17819 store_unaligned = gen_avx512f_storedquv16si;
17822 load_unaligned = gen_avx512f_loadups512;
17823 store_unaligned = gen_avx512f_storeups512;
17826 load_unaligned = gen_avx512f_loadupd512;
17827 store_unaligned = gen_avx512f_storeupd512;
17832 emit_insn (load_unaligned (op0, op1));
17833 else if (MEM_P (op0))
17834 emit_insn (store_unaligned (op0, op1));
17836 gcc_unreachable ();
17838 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17842 gcc_unreachable ();
17849 && GET_MODE_SIZE (mode) == 32)
17851 switch (GET_MODE_CLASS (mode))
17853 case MODE_VECTOR_INT:
17855 if (GET_MODE (op0) != V32QImode)
17860 op0 = gen_reg_rtx (V32QImode);
17863 op0 = gen_lowpart (V32QImode, op0);
17865 op1 = gen_lowpart (V32QImode, op1);
17868 case MODE_VECTOR_FLOAT:
17869 ix86_avx256_split_vector_move_misalign (op0, op1);
17871 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17875 gcc_unreachable ();
17883 /* Normal *mov<mode>_internal pattern will handle
17884 unaligned loads just fine if misaligned_operand
17885 is true, and without the UNSPEC it can be combined
17886 with arithmetic instructions. */
17888 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17889 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17890 && misaligned_operand (op1, GET_MODE (op1)))
17891 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17892 /* ??? If we have typed data, then it would appear that using
17893 movdqu is the only way to get unaligned data loaded with
17895 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17897 if (GET_MODE (op0) != V16QImode)
17900 op0 = gen_reg_rtx (V16QImode);
17902 op1 = gen_lowpart (V16QImode, op1);
17903 /* We will eventually emit movups based on insn attributes. */
17904 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17906 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17908 else if (TARGET_SSE2 && mode == V2DFmode)
17913 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17914 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17915 || optimize_insn_for_size_p ())
17917 /* We will eventually emit movups based on insn attributes. */
17918 emit_insn (gen_sse2_loadupd (op0, op1));
17922 /* When SSE registers are split into halves, we can avoid
17923 writing to the top half twice. */
17924 if (TARGET_SSE_SPLIT_REGS)
17926 emit_clobber (op0);
17931 /* ??? Not sure about the best option for the Intel chips.
17932 The following would seem to satisfy; the register is
17933 entirely cleared, breaking the dependency chain. We
17934 then store to the upper half, with a dependency depth
17935 of one. A rumor has it that Intel recommends two movsd
17936 followed by an unpacklpd, but this is unconfirmed. And
17937 given that the dependency depth of the unpacklpd would
17938 still be one, I'm not sure why this would be better. */
17939 zero = CONST0_RTX (V2DFmode);
17942 m = adjust_address (op1, DFmode, 0);
17943 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17944 m = adjust_address (op1, DFmode, 8);
17945 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17952 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17953 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17954 || optimize_insn_for_size_p ())
17956 if (GET_MODE (op0) != V4SFmode)
17959 op0 = gen_reg_rtx (V4SFmode);
17961 op1 = gen_lowpart (V4SFmode, op1);
17962 emit_insn (gen_sse_loadups (op0, op1));
17964 emit_move_insn (orig_op0,
17965 gen_lowpart (GET_MODE (orig_op0), op0));
17969 if (mode != V4SFmode)
17970 t = gen_reg_rtx (V4SFmode);
17974 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17975 emit_move_insn (t, CONST0_RTX (V4SFmode));
17979 m = adjust_address (op1, V2SFmode, 0);
17980 emit_insn (gen_sse_loadlps (t, t, m));
17981 m = adjust_address (op1, V2SFmode, 8);
17982 emit_insn (gen_sse_loadhps (t, t, m));
17983 if (mode != V4SFmode)
17984 emit_move_insn (op0, gen_lowpart (mode, t));
17987 else if (MEM_P (op0))
17989 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17991 op0 = gen_lowpart (V16QImode, op0);
17992 op1 = gen_lowpart (V16QImode, op1);
17993 /* We will eventually emit movups based on insn attributes. */
17994 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17996 else if (TARGET_SSE2 && mode == V2DFmode)
17999 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
18000 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18001 || optimize_insn_for_size_p ())
18002 /* We will eventually emit movups based on insn attributes. */
18003 emit_insn (gen_sse2_storeupd (op0, op1));
18006 m = adjust_address (op0, DFmode, 0);
18007 emit_insn (gen_sse2_storelpd (m, op1));
18008 m = adjust_address (op0, DFmode, 8);
18009 emit_insn (gen_sse2_storehpd (m, op1));
18014 if (mode != V4SFmode)
18015 op1 = gen_lowpart (V4SFmode, op1);
18018 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
18019 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18020 || optimize_insn_for_size_p ())
18022 op0 = gen_lowpart (V4SFmode, op0);
18023 emit_insn (gen_sse_storeups (op0, op1));
18027 m = adjust_address (op0, V2SFmode, 0);
18028 emit_insn (gen_sse_storelps (m, op1));
18029 m = adjust_address (op0, V2SFmode, 8);
18030 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
18035 gcc_unreachable ();
18038 /* Helper function of ix86_fixup_binary_operands to canonicalize
18039 operand order. Returns true if the operands should be swapped. */
18042 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
18045 rtx dst = operands[0];
18046 rtx src1 = operands[1];
18047 rtx src2 = operands[2];
18049 /* If the operation is not commutative, we can't do anything. */
18050 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
18053 /* Highest priority is that src1 should match dst. */
18054 if (rtx_equal_p (dst, src1))
18056 if (rtx_equal_p (dst, src2))
18059 /* Next highest priority is that immediate constants come second. */
18060 if (immediate_operand (src2, mode))
18062 if (immediate_operand (src1, mode))
18065 /* Lowest priority is that memory references should come second. */
18075 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
18076 destination to use for the operation. If different from the true
18077 destination in operands[0], a copy operation will be required. */
18080 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18083 rtx dst = operands[0];
18084 rtx src1 = operands[1];
18085 rtx src2 = operands[2];
18087 /* Canonicalize operand order. */
18088 if (ix86_swap_binary_operands_p (code, mode, operands))
18090 /* It is invalid to swap operands of different modes. */
18091 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18093 std::swap (src1, src2);
18096 /* Both source operands cannot be in memory. */
18097 if (MEM_P (src1) && MEM_P (src2))
18099 /* Optimization: Only read from memory once. */
18100 if (rtx_equal_p (src1, src2))
18102 src2 = force_reg (mode, src2);
18105 else if (rtx_equal_p (dst, src1))
18106 src2 = force_reg (mode, src2);
18108 src1 = force_reg (mode, src1);
18111 /* If the destination is memory, and we do not have matching source
18112 operands, do things in registers. */
18113 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18114 dst = gen_reg_rtx (mode);
18116 /* Source 1 cannot be a constant. */
18117 if (CONSTANT_P (src1))
18118 src1 = force_reg (mode, src1);
18120 /* Source 1 cannot be a non-matching memory. */
18121 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18122 src1 = force_reg (mode, src1);
18124 /* Improve address combine. */
18126 && GET_MODE_CLASS (mode) == MODE_INT
18128 src2 = force_reg (mode, src2);
18130 operands[1] = src1;
18131 operands[2] = src2;
18135 /* Similarly, but assume that the destination has already been
18136 set up properly. */
18139 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18140 machine_mode mode, rtx operands[])
18142 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18143 gcc_assert (dst == operands[0]);
18146 /* Attempt to expand a binary operator. Make the expansion closer to the
18147 actual machine, then just general_operand, which will allow 3 separate
18148 memory references (one output, two input) in a single insn. */
18151 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18154 rtx src1, src2, dst, op, clob;
18156 dst = ix86_fixup_binary_operands (code, mode, operands);
18157 src1 = operands[1];
18158 src2 = operands[2];
18160 /* Emit the instruction. */
18162 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18163 if (reload_in_progress)
18165 /* Reload doesn't know about the flags register, and doesn't know that
18166 it doesn't want to clobber it. We can only do this with PLUS. */
18167 gcc_assert (code == PLUS);
18170 else if (reload_completed
18172 && !rtx_equal_p (dst, src1))
18174 /* This is going to be an LEA; avoid splitting it later. */
18179 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18180 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18183 /* Fix up the destination if needed. */
18184 if (dst != operands[0])
18185 emit_move_insn (operands[0], dst);
18188 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18189 the given OPERANDS. */
18192 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18195 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18196 if (GET_CODE (operands[1]) == SUBREG)
18201 else if (GET_CODE (operands[2]) == SUBREG)
18206 /* Optimize (__m128i) d | (__m128i) e and similar code
18207 when d and e are float vectors into float vector logical
18208 insn. In C/C++ without using intrinsics there is no other way
18209 to express vector logical operation on float vectors than
18210 to cast them temporarily to integer vectors. */
18212 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18213 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18214 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18215 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18216 && SUBREG_BYTE (op1) == 0
18217 && (GET_CODE (op2) == CONST_VECTOR
18218 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18219 && SUBREG_BYTE (op2) == 0))
18220 && can_create_pseudo_p ())
18223 switch (GET_MODE (SUBREG_REG (op1)))
18231 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18232 if (GET_CODE (op2) == CONST_VECTOR)
18234 op2 = gen_lowpart (GET_MODE (dst), op2);
18235 op2 = force_reg (GET_MODE (dst), op2);
18240 op2 = SUBREG_REG (operands[2]);
18241 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18242 op2 = force_reg (GET_MODE (dst), op2);
18244 op1 = SUBREG_REG (op1);
18245 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18246 op1 = force_reg (GET_MODE (dst), op1);
18247 emit_insn (gen_rtx_SET (VOIDmode, dst,
18248 gen_rtx_fmt_ee (code, GET_MODE (dst),
18250 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18256 if (!nonimmediate_operand (operands[1], mode))
18257 operands[1] = force_reg (mode, operands[1]);
18258 if (!nonimmediate_operand (operands[2], mode))
18259 operands[2] = force_reg (mode, operands[2]);
18260 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18261 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18262 gen_rtx_fmt_ee (code, mode, operands[1],
18266 /* Return TRUE or FALSE depending on whether the binary operator meets the
18267 appropriate constraints. */
18270 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18273 rtx dst = operands[0];
18274 rtx src1 = operands[1];
18275 rtx src2 = operands[2];
18277 /* Both source operands cannot be in memory. */
18278 if (MEM_P (src1) && MEM_P (src2))
18281 /* Canonicalize operand order for commutative operators. */
18282 if (ix86_swap_binary_operands_p (code, mode, operands))
18283 std::swap (src1, src2);
18285 /* If the destination is memory, we must have a matching source operand. */
18286 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18289 /* Source 1 cannot be a constant. */
18290 if (CONSTANT_P (src1))
18293 /* Source 1 cannot be a non-matching memory. */
18294 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18295 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18296 return (code == AND
18299 || (TARGET_64BIT && mode == DImode))
18300 && satisfies_constraint_L (src2));
18305 /* Attempt to expand a unary operator. Make the expansion closer to the
18306 actual machine, then just general_operand, which will allow 2 separate
18307 memory references (one output, one input) in a single insn. */
18310 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18313 bool matching_memory = false;
18314 rtx src, dst, op, clob;
18319 /* If the destination is memory, and we do not have matching source
18320 operands, do things in registers. */
18323 if (rtx_equal_p (dst, src))
18324 matching_memory = true;
18326 dst = gen_reg_rtx (mode);
18329 /* When source operand is memory, destination must match. */
18330 if (MEM_P (src) && !matching_memory)
18331 src = force_reg (mode, src);
18333 /* Emit the instruction. */
18335 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18336 if (reload_in_progress || code == NOT)
18338 /* Reload doesn't know about the flags register, and doesn't know that
18339 it doesn't want to clobber it. */
18340 gcc_assert (code == NOT);
18345 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18346 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18349 /* Fix up the destination if needed. */
18350 if (dst != operands[0])
18351 emit_move_insn (operands[0], dst);
18354 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18355 divisor are within the range [0-255]. */
18358 ix86_split_idivmod (machine_mode mode, rtx operands[],
18361 rtx_code_label *end_label, *qimode_label;
18362 rtx insn, div, mod;
18363 rtx scratch, tmp0, tmp1, tmp2;
18364 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18365 rtx (*gen_zero_extend) (rtx, rtx);
18366 rtx (*gen_test_ccno_1) (rtx, rtx);
18371 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18372 gen_test_ccno_1 = gen_testsi_ccno_1;
18373 gen_zero_extend = gen_zero_extendqisi2;
18376 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18377 gen_test_ccno_1 = gen_testdi_ccno_1;
18378 gen_zero_extend = gen_zero_extendqidi2;
18381 gcc_unreachable ();
18384 end_label = gen_label_rtx ();
18385 qimode_label = gen_label_rtx ();
18387 scratch = gen_reg_rtx (mode);
18389 /* Use 8bit unsigned divimod if dividend and divisor are within
18390 the range [0-255]. */
18391 emit_move_insn (scratch, operands[2]);
18392 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18393 scratch, 1, OPTAB_DIRECT);
18394 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18395 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18396 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18397 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18398 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18400 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18401 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18402 JUMP_LABEL (insn) = qimode_label;
18404 /* Generate original signed/unsigned divimod. */
18405 div = gen_divmod4_1 (operands[0], operands[1],
18406 operands[2], operands[3]);
18409 /* Branch to the end. */
18410 emit_jump_insn (gen_jump (end_label));
18413 /* Generate 8bit unsigned divide. */
18414 emit_label (qimode_label);
18415 /* Don't use operands[0] for result of 8bit divide since not all
18416 registers support QImode ZERO_EXTRACT. */
18417 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18418 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18419 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18420 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18424 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18425 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18429 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18430 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18433 /* Extract remainder from AH. */
18434 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18435 if (REG_P (operands[1]))
18436 insn = emit_move_insn (operands[1], tmp1);
18439 /* Need a new scratch register since the old one has result
18441 scratch = gen_reg_rtx (mode);
18442 emit_move_insn (scratch, tmp1);
18443 insn = emit_move_insn (operands[1], scratch);
18445 set_unique_reg_note (insn, REG_EQUAL, mod);
18447 /* Zero extend quotient from AL. */
18448 tmp1 = gen_lowpart (QImode, tmp0);
18449 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18450 set_unique_reg_note (insn, REG_EQUAL, div);
18452 emit_label (end_label);
18455 #define LEA_MAX_STALL (3)
18456 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18458 /* Increase given DISTANCE in half-cycles according to
18459 dependencies between PREV and NEXT instructions.
18460 Add 1 half-cycle if there is no dependency and
18461 go to next cycle if there is some dependecy. */
18463 static unsigned int
18464 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18468 if (!prev || !next)
18469 return distance + (distance & 1) + 2;
18471 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18472 return distance + 1;
18474 FOR_EACH_INSN_USE (use, next)
18475 FOR_EACH_INSN_DEF (def, prev)
18476 if (!DF_REF_IS_ARTIFICIAL (def)
18477 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18478 return distance + (distance & 1) + 2;
18480 return distance + 1;
18483 /* Function checks if instruction INSN defines register number
18484 REGNO1 or REGNO2. */
18487 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18492 FOR_EACH_INSN_DEF (def, insn)
18493 if (DF_REF_REG_DEF_P (def)
18494 && !DF_REF_IS_ARTIFICIAL (def)
18495 && (regno1 == DF_REF_REGNO (def)
18496 || regno2 == DF_REF_REGNO (def)))
18502 /* Function checks if instruction INSN uses register number
18503 REGNO as a part of address expression. */
18506 insn_uses_reg_mem (unsigned int regno, rtx insn)
18510 FOR_EACH_INSN_USE (use, insn)
18511 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18517 /* Search backward for non-agu definition of register number REGNO1
18518 or register number REGNO2 in basic block starting from instruction
18519 START up to head of basic block or instruction INSN.
18521 Function puts true value into *FOUND var if definition was found
18522 and false otherwise.
18524 Distance in half-cycles between START and found instruction or head
18525 of BB is added to DISTANCE and returned. */
18528 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18529 rtx_insn *insn, int distance,
18530 rtx_insn *start, bool *found)
18532 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18533 rtx_insn *prev = start;
18534 rtx_insn *next = NULL;
18540 && distance < LEA_SEARCH_THRESHOLD)
18542 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18544 distance = increase_distance (prev, next, distance);
18545 if (insn_defines_reg (regno1, regno2, prev))
18547 if (recog_memoized (prev) < 0
18548 || get_attr_type (prev) != TYPE_LEA)
18557 if (prev == BB_HEAD (bb))
18560 prev = PREV_INSN (prev);
18566 /* Search backward for non-agu definition of register number REGNO1
18567 or register number REGNO2 in INSN's basic block until
18568 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18569 2. Reach neighbour BBs boundary, or
18570 3. Reach agu definition.
18571 Returns the distance between the non-agu definition point and INSN.
18572 If no definition point, returns -1. */
18575 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18578 basic_block bb = BLOCK_FOR_INSN (insn);
18580 bool found = false;
18582 if (insn != BB_HEAD (bb))
18583 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18584 distance, PREV_INSN (insn),
18587 if (!found && distance < LEA_SEARCH_THRESHOLD)
18591 bool simple_loop = false;
18593 FOR_EACH_EDGE (e, ei, bb->preds)
18596 simple_loop = true;
18601 distance = distance_non_agu_define_in_bb (regno1, regno2,
18603 BB_END (bb), &found);
18606 int shortest_dist = -1;
18607 bool found_in_bb = false;
18609 FOR_EACH_EDGE (e, ei, bb->preds)
18612 = distance_non_agu_define_in_bb (regno1, regno2,
18618 if (shortest_dist < 0)
18619 shortest_dist = bb_dist;
18620 else if (bb_dist > 0)
18621 shortest_dist = MIN (bb_dist, shortest_dist);
18627 distance = shortest_dist;
18631 /* get_attr_type may modify recog data. We want to make sure
18632 that recog data is valid for instruction INSN, on which
18633 distance_non_agu_define is called. INSN is unchanged here. */
18634 extract_insn_cached (insn);
18639 return distance >> 1;
18642 /* Return the distance in half-cycles between INSN and the next
18643 insn that uses register number REGNO in memory address added
18644 to DISTANCE. Return -1 if REGNO0 is set.
18646 Put true value into *FOUND if register usage was found and
18648 Put true value into *REDEFINED if register redefinition was
18649 found and false otherwise. */
18652 distance_agu_use_in_bb (unsigned int regno,
18653 rtx_insn *insn, int distance, rtx_insn *start,
18654 bool *found, bool *redefined)
18656 basic_block bb = NULL;
18657 rtx_insn *next = start;
18658 rtx_insn *prev = NULL;
18661 *redefined = false;
18663 if (start != NULL_RTX)
18665 bb = BLOCK_FOR_INSN (start);
18666 if (start != BB_HEAD (bb))
18667 /* If insn and start belong to the same bb, set prev to insn,
18668 so the call to increase_distance will increase the distance
18669 between insns by 1. */
18675 && distance < LEA_SEARCH_THRESHOLD)
18677 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18679 distance = increase_distance(prev, next, distance);
18680 if (insn_uses_reg_mem (regno, next))
18682 /* Return DISTANCE if OP0 is used in memory
18683 address in NEXT. */
18688 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18690 /* Return -1 if OP0 is set in NEXT. */
18698 if (next == BB_END (bb))
18701 next = NEXT_INSN (next);
18707 /* Return the distance between INSN and the next insn that uses
18708 register number REGNO0 in memory address. Return -1 if no such
18709 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18712 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18714 basic_block bb = BLOCK_FOR_INSN (insn);
18716 bool found = false;
18717 bool redefined = false;
18719 if (insn != BB_END (bb))
18720 distance = distance_agu_use_in_bb (regno0, insn, distance,
18722 &found, &redefined);
18724 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18728 bool simple_loop = false;
18730 FOR_EACH_EDGE (e, ei, bb->succs)
18733 simple_loop = true;
18738 distance = distance_agu_use_in_bb (regno0, insn,
18739 distance, BB_HEAD (bb),
18740 &found, &redefined);
18743 int shortest_dist = -1;
18744 bool found_in_bb = false;
18745 bool redefined_in_bb = false;
18747 FOR_EACH_EDGE (e, ei, bb->succs)
18750 = distance_agu_use_in_bb (regno0, insn,
18751 distance, BB_HEAD (e->dest),
18752 &found_in_bb, &redefined_in_bb);
18755 if (shortest_dist < 0)
18756 shortest_dist = bb_dist;
18757 else if (bb_dist > 0)
18758 shortest_dist = MIN (bb_dist, shortest_dist);
18764 distance = shortest_dist;
18768 if (!found || redefined)
18771 return distance >> 1;
18774 /* Define this macro to tune LEA priority vs ADD, it take effect when
18775 there is a dilemma of choicing LEA or ADD
18776 Negative value: ADD is more preferred than LEA
18778 Positive value: LEA is more preferred than ADD*/
18779 #define IX86_LEA_PRIORITY 0
18781 /* Return true if usage of lea INSN has performance advantage
18782 over a sequence of instructions. Instructions sequence has
18783 SPLIT_COST cycles higher latency than lea latency. */
18786 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18787 unsigned int regno2, int split_cost, bool has_scale)
18789 int dist_define, dist_use;
18791 /* For Silvermont if using a 2-source or 3-source LEA for
18792 non-destructive destination purposes, or due to wanting
18793 ability to use SCALE, the use of LEA is justified. */
18794 if (TARGET_SILVERMONT || TARGET_INTEL)
18798 if (split_cost < 1)
18800 if (regno0 == regno1 || regno0 == regno2)
18805 dist_define = distance_non_agu_define (regno1, regno2, insn);
18806 dist_use = distance_agu_use (regno0, insn);
18808 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18810 /* If there is no non AGU operand definition, no AGU
18811 operand usage and split cost is 0 then both lea
18812 and non lea variants have same priority. Currently
18813 we prefer lea for 64 bit code and non lea on 32 bit
18815 if (dist_use < 0 && split_cost == 0)
18816 return TARGET_64BIT || IX86_LEA_PRIORITY;
18821 /* With longer definitions distance lea is more preferable.
18822 Here we change it to take into account splitting cost and
18824 dist_define += split_cost + IX86_LEA_PRIORITY;
18826 /* If there is no use in memory addess then we just check
18827 that split cost exceeds AGU stall. */
18829 return dist_define > LEA_MAX_STALL;
18831 /* If this insn has both backward non-agu dependence and forward
18832 agu dependence, the one with short distance takes effect. */
18833 return dist_define >= dist_use;
18836 /* Return true if it is legal to clobber flags by INSN and
18837 false otherwise. */
18840 ix86_ok_to_clobber_flags (rtx_insn *insn)
18842 basic_block bb = BLOCK_FOR_INSN (insn);
18848 if (NONDEBUG_INSN_P (insn))
18850 FOR_EACH_INSN_USE (use, insn)
18851 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18854 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18858 if (insn == BB_END (bb))
18861 insn = NEXT_INSN (insn);
18864 live = df_get_live_out(bb);
18865 return !REGNO_REG_SET_P (live, FLAGS_REG);
18868 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18869 move and add to avoid AGU stalls. */
18872 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18874 unsigned int regno0, regno1, regno2;
18876 /* Check if we need to optimize. */
18877 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18880 /* Check it is correct to split here. */
18881 if (!ix86_ok_to_clobber_flags(insn))
18884 regno0 = true_regnum (operands[0]);
18885 regno1 = true_regnum (operands[1]);
18886 regno2 = true_regnum (operands[2]);
18888 /* We need to split only adds with non destructive
18889 destination operand. */
18890 if (regno0 == regno1 || regno0 == regno2)
18893 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18896 /* Return true if we should emit lea instruction instead of mov
18900 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18902 unsigned int regno0, regno1;
18904 /* Check if we need to optimize. */
18905 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18908 /* Use lea for reg to reg moves only. */
18909 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18912 regno0 = true_regnum (operands[0]);
18913 regno1 = true_regnum (operands[1]);
18915 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18918 /* Return true if we need to split lea into a sequence of
18919 instructions to avoid AGU stalls. */
18922 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18924 unsigned int regno0, regno1, regno2;
18926 struct ix86_address parts;
18929 /* Check we need to optimize. */
18930 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18933 /* The "at least two components" test below might not catch simple
18934 move or zero extension insns if parts.base is non-NULL and parts.disp
18935 is const0_rtx as the only components in the address, e.g. if the
18936 register is %rbp or %r13. As this test is much cheaper and moves or
18937 zero extensions are the common case, do this check first. */
18938 if (REG_P (operands[1])
18939 || (SImode_address_operand (operands[1], VOIDmode)
18940 && REG_P (XEXP (operands[1], 0))))
18943 /* Check if it is OK to split here. */
18944 if (!ix86_ok_to_clobber_flags (insn))
18947 ok = ix86_decompose_address (operands[1], &parts);
18950 /* There should be at least two components in the address. */
18951 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18952 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18955 /* We should not split into add if non legitimate pic
18956 operand is used as displacement. */
18957 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18960 regno0 = true_regnum (operands[0]) ;
18961 regno1 = INVALID_REGNUM;
18962 regno2 = INVALID_REGNUM;
18965 regno1 = true_regnum (parts.base);
18967 regno2 = true_regnum (parts.index);
18971 /* Compute how many cycles we will add to execution time
18972 if split lea into a sequence of instructions. */
18973 if (parts.base || parts.index)
18975 /* Have to use mov instruction if non desctructive
18976 destination form is used. */
18977 if (regno1 != regno0 && regno2 != regno0)
18980 /* Have to add index to base if both exist. */
18981 if (parts.base && parts.index)
18984 /* Have to use shift and adds if scale is 2 or greater. */
18985 if (parts.scale > 1)
18987 if (regno0 != regno1)
18989 else if (regno2 == regno0)
18992 split_cost += parts.scale;
18995 /* Have to use add instruction with immediate if
18996 disp is non zero. */
18997 if (parts.disp && parts.disp != const0_rtx)
19000 /* Subtract the price of lea. */
19004 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
19008 /* Emit x86 binary operand CODE in mode MODE, where the first operand
19009 matches destination. RTX includes clobber of FLAGS_REG. */
19012 ix86_emit_binop (enum rtx_code code, machine_mode mode,
19017 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
19018 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19020 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19023 /* Return true if regno1 def is nearest to the insn. */
19026 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
19028 rtx_insn *prev = insn;
19029 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
19033 while (prev && prev != start)
19035 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
19037 prev = PREV_INSN (prev);
19040 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
19042 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
19044 prev = PREV_INSN (prev);
19047 /* None of the regs is defined in the bb. */
19051 /* Split lea instructions into a sequence of instructions
19052 which are executed on ALU to avoid AGU stalls.
19053 It is assumed that it is allowed to clobber flags register
19054 at lea position. */
19057 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
19059 unsigned int regno0, regno1, regno2;
19060 struct ix86_address parts;
19064 ok = ix86_decompose_address (operands[1], &parts);
19067 target = gen_lowpart (mode, operands[0]);
19069 regno0 = true_regnum (target);
19070 regno1 = INVALID_REGNUM;
19071 regno2 = INVALID_REGNUM;
19075 parts.base = gen_lowpart (mode, parts.base);
19076 regno1 = true_regnum (parts.base);
19081 parts.index = gen_lowpart (mode, parts.index);
19082 regno2 = true_regnum (parts.index);
19086 parts.disp = gen_lowpart (mode, parts.disp);
19088 if (parts.scale > 1)
19090 /* Case r1 = r1 + ... */
19091 if (regno1 == regno0)
19093 /* If we have a case r1 = r1 + C * r2 then we
19094 should use multiplication which is very
19095 expensive. Assume cost model is wrong if we
19096 have such case here. */
19097 gcc_assert (regno2 != regno0);
19099 for (adds = parts.scale; adds > 0; adds--)
19100 ix86_emit_binop (PLUS, mode, target, parts.index);
19104 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
19105 if (regno0 != regno2)
19106 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19108 /* Use shift for scaling. */
19109 ix86_emit_binop (ASHIFT, mode, target,
19110 GEN_INT (exact_log2 (parts.scale)));
19113 ix86_emit_binop (PLUS, mode, target, parts.base);
19115 if (parts.disp && parts.disp != const0_rtx)
19116 ix86_emit_binop (PLUS, mode, target, parts.disp);
19119 else if (!parts.base && !parts.index)
19121 gcc_assert(parts.disp);
19122 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
19128 if (regno0 != regno2)
19129 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19131 else if (!parts.index)
19133 if (regno0 != regno1)
19134 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
19138 if (regno0 == regno1)
19140 else if (regno0 == regno2)
19146 /* Find better operand for SET instruction, depending
19147 on which definition is farther from the insn. */
19148 if (find_nearest_reg_def (insn, regno1, regno2))
19149 tmp = parts.index, tmp1 = parts.base;
19151 tmp = parts.base, tmp1 = parts.index;
19153 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19155 if (parts.disp && parts.disp != const0_rtx)
19156 ix86_emit_binop (PLUS, mode, target, parts.disp);
19158 ix86_emit_binop (PLUS, mode, target, tmp1);
19162 ix86_emit_binop (PLUS, mode, target, tmp);
19165 if (parts.disp && parts.disp != const0_rtx)
19166 ix86_emit_binop (PLUS, mode, target, parts.disp);
19170 /* Return true if it is ok to optimize an ADD operation to LEA
19171 operation to avoid flag register consumation. For most processors,
19172 ADD is faster than LEA. For the processors like BONNELL, if the
19173 destination register of LEA holds an actual address which will be
19174 used soon, LEA is better and otherwise ADD is better. */
19177 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19179 unsigned int regno0 = true_regnum (operands[0]);
19180 unsigned int regno1 = true_regnum (operands[1]);
19181 unsigned int regno2 = true_regnum (operands[2]);
19183 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19184 if (regno0 != regno1 && regno0 != regno2)
19187 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19190 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19193 /* Return true if destination reg of SET_BODY is shift count of
19197 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19203 /* Retrieve destination of SET_BODY. */
19204 switch (GET_CODE (set_body))
19207 set_dest = SET_DEST (set_body);
19208 if (!set_dest || !REG_P (set_dest))
19212 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19213 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19221 /* Retrieve shift count of USE_BODY. */
19222 switch (GET_CODE (use_body))
19225 shift_rtx = XEXP (use_body, 1);
19228 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19229 if (ix86_dep_by_shift_count_body (set_body,
19230 XVECEXP (use_body, 0, i)))
19238 && (GET_CODE (shift_rtx) == ASHIFT
19239 || GET_CODE (shift_rtx) == LSHIFTRT
19240 || GET_CODE (shift_rtx) == ASHIFTRT
19241 || GET_CODE (shift_rtx) == ROTATE
19242 || GET_CODE (shift_rtx) == ROTATERT))
19244 rtx shift_count = XEXP (shift_rtx, 1);
19246 /* Return true if shift count is dest of SET_BODY. */
19247 if (REG_P (shift_count))
19249 /* Add check since it can be invoked before register
19250 allocation in pre-reload schedule. */
19251 if (reload_completed
19252 && true_regnum (set_dest) == true_regnum (shift_count))
19254 else if (REGNO(set_dest) == REGNO(shift_count))
19262 /* Return true if destination reg of SET_INSN is shift count of
19266 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19268 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19269 PATTERN (use_insn));
19272 /* Return TRUE or FALSE depending on whether the unary operator meets the
19273 appropriate constraints. */
19276 ix86_unary_operator_ok (enum rtx_code,
19280 /* If one of operands is memory, source and destination must match. */
19281 if ((MEM_P (operands[0])
19282 || MEM_P (operands[1]))
19283 && ! rtx_equal_p (operands[0], operands[1]))
19288 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19289 are ok, keeping in mind the possible movddup alternative. */
19292 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19294 if (MEM_P (operands[0]))
19295 return rtx_equal_p (operands[0], operands[1 + high]);
19296 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19297 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19301 /* Post-reload splitter for converting an SF or DFmode value in an
19302 SSE register into an unsigned SImode. */
19305 ix86_split_convert_uns_si_sse (rtx operands[])
19307 machine_mode vecmode;
19308 rtx value, large, zero_or_two31, input, two31, x;
19310 large = operands[1];
19311 zero_or_two31 = operands[2];
19312 input = operands[3];
19313 two31 = operands[4];
19314 vecmode = GET_MODE (large);
19315 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19317 /* Load up the value into the low element. We must ensure that the other
19318 elements are valid floats -- zero is the easiest such value. */
19321 if (vecmode == V4SFmode)
19322 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19324 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19328 input = gen_rtx_REG (vecmode, REGNO (input));
19329 emit_move_insn (value, CONST0_RTX (vecmode));
19330 if (vecmode == V4SFmode)
19331 emit_insn (gen_sse_movss (value, value, input));
19333 emit_insn (gen_sse2_movsd (value, value, input));
19336 emit_move_insn (large, two31);
19337 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19339 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19340 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19342 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19343 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19345 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19346 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19348 large = gen_rtx_REG (V4SImode, REGNO (large));
19349 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19351 x = gen_rtx_REG (V4SImode, REGNO (value));
19352 if (vecmode == V4SFmode)
19353 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19355 emit_insn (gen_sse2_cvttpd2dq (x, value));
19358 emit_insn (gen_xorv4si3 (value, value, large));
19361 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19362 Expects the 64-bit DImode to be supplied in a pair of integral
19363 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19364 -mfpmath=sse, !optimize_size only. */
19367 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19369 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19370 rtx int_xmm, fp_xmm;
19371 rtx biases, exponents;
19374 int_xmm = gen_reg_rtx (V4SImode);
19375 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19376 emit_insn (gen_movdi_to_sse (int_xmm, input));
19377 else if (TARGET_SSE_SPLIT_REGS)
19379 emit_clobber (int_xmm);
19380 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19384 x = gen_reg_rtx (V2DImode);
19385 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19386 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19389 x = gen_rtx_CONST_VECTOR (V4SImode,
19390 gen_rtvec (4, GEN_INT (0x43300000UL),
19391 GEN_INT (0x45300000UL),
19392 const0_rtx, const0_rtx));
19393 exponents = validize_mem (force_const_mem (V4SImode, x));
19395 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19396 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19398 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19399 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19400 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19401 (0x1.0p84 + double(fp_value_hi_xmm)).
19402 Note these exponents differ by 32. */
19404 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19406 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19407 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19408 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19409 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19410 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19411 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19412 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19413 biases = validize_mem (force_const_mem (V2DFmode, biases));
19414 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19416 /* Add the upper and lower DFmode values together. */
19418 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19421 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19422 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19423 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19426 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19429 /* Not used, but eases macroization of patterns. */
19431 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19433 gcc_unreachable ();
19436 /* Convert an unsigned SImode value into a DFmode. Only currently used
19437 for SSE, but applicable anywhere. */
19440 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19442 REAL_VALUE_TYPE TWO31r;
19445 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19446 NULL, 1, OPTAB_DIRECT);
19448 fp = gen_reg_rtx (DFmode);
19449 emit_insn (gen_floatsidf2 (fp, x));
19451 real_ldexp (&TWO31r, &dconst1, 31);
19452 x = const_double_from_real_value (TWO31r, DFmode);
19454 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19456 emit_move_insn (target, x);
19459 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19460 32-bit mode; otherwise we have a direct convert instruction. */
19463 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19465 REAL_VALUE_TYPE TWO32r;
19466 rtx fp_lo, fp_hi, x;
19468 fp_lo = gen_reg_rtx (DFmode);
19469 fp_hi = gen_reg_rtx (DFmode);
19471 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19473 real_ldexp (&TWO32r, &dconst1, 32);
19474 x = const_double_from_real_value (TWO32r, DFmode);
19475 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19477 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19479 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19482 emit_move_insn (target, x);
19485 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19486 For x86_32, -mfpmath=sse, !optimize_size only. */
19488 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19490 REAL_VALUE_TYPE ONE16r;
19491 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19493 real_ldexp (&ONE16r, &dconst1, 16);
19494 x = const_double_from_real_value (ONE16r, SFmode);
19495 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19496 NULL, 0, OPTAB_DIRECT);
19497 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19498 NULL, 0, OPTAB_DIRECT);
19499 fp_hi = gen_reg_rtx (SFmode);
19500 fp_lo = gen_reg_rtx (SFmode);
19501 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19502 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19503 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19505 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19507 if (!rtx_equal_p (target, fp_hi))
19508 emit_move_insn (target, fp_hi);
19511 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19512 a vector of unsigned ints VAL to vector of floats TARGET. */
19515 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19518 REAL_VALUE_TYPE TWO16r;
19519 machine_mode intmode = GET_MODE (val);
19520 machine_mode fltmode = GET_MODE (target);
19521 rtx (*cvt) (rtx, rtx);
19523 if (intmode == V4SImode)
19524 cvt = gen_floatv4siv4sf2;
19526 cvt = gen_floatv8siv8sf2;
19527 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19528 tmp[0] = force_reg (intmode, tmp[0]);
19529 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19531 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19532 NULL_RTX, 1, OPTAB_DIRECT);
19533 tmp[3] = gen_reg_rtx (fltmode);
19534 emit_insn (cvt (tmp[3], tmp[1]));
19535 tmp[4] = gen_reg_rtx (fltmode);
19536 emit_insn (cvt (tmp[4], tmp[2]));
19537 real_ldexp (&TWO16r, &dconst1, 16);
19538 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19539 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19540 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19542 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19544 if (tmp[7] != target)
19545 emit_move_insn (target, tmp[7]);
19548 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19549 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19550 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19551 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19554 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19556 REAL_VALUE_TYPE TWO31r;
19557 rtx two31r, tmp[4];
19558 machine_mode mode = GET_MODE (val);
19559 machine_mode scalarmode = GET_MODE_INNER (mode);
19560 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19561 rtx (*cmp) (rtx, rtx, rtx, rtx);
19564 for (i = 0; i < 3; i++)
19565 tmp[i] = gen_reg_rtx (mode);
19566 real_ldexp (&TWO31r, &dconst1, 31);
19567 two31r = const_double_from_real_value (TWO31r, scalarmode);
19568 two31r = ix86_build_const_vector (mode, 1, two31r);
19569 two31r = force_reg (mode, two31r);
19572 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19573 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19574 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19575 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19576 default: gcc_unreachable ();
19578 tmp[3] = gen_rtx_LE (mode, two31r, val);
19579 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19580 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19582 if (intmode == V4SImode || TARGET_AVX2)
19583 *xorp = expand_simple_binop (intmode, ASHIFT,
19584 gen_lowpart (intmode, tmp[0]),
19585 GEN_INT (31), NULL_RTX, 0,
19589 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19590 two31 = ix86_build_const_vector (intmode, 1, two31);
19591 *xorp = expand_simple_binop (intmode, AND,
19592 gen_lowpart (intmode, tmp[0]),
19593 two31, NULL_RTX, 0,
19596 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19600 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19601 then replicate the value for all elements of the vector
19605 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19609 machine_mode scalar_mode;
19632 n_elt = GET_MODE_NUNITS (mode);
19633 v = rtvec_alloc (n_elt);
19634 scalar_mode = GET_MODE_INNER (mode);
19636 RTVEC_ELT (v, 0) = value;
19638 for (i = 1; i < n_elt; ++i)
19639 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19641 return gen_rtx_CONST_VECTOR (mode, v);
19644 gcc_unreachable ();
19648 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19649 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19650 for an SSE register. If VECT is true, then replicate the mask for
19651 all elements of the vector register. If INVERT is true, then create
19652 a mask excluding the sign bit. */
19655 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19657 machine_mode vec_mode, imode;
19658 HOST_WIDE_INT hi, lo;
19663 /* Find the sign bit, sign extended to 2*HWI. */
19673 mode = GET_MODE_INNER (mode);
19675 lo = 0x80000000, hi = lo < 0;
19685 mode = GET_MODE_INNER (mode);
19687 if (HOST_BITS_PER_WIDE_INT >= 64)
19688 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19690 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19695 vec_mode = VOIDmode;
19696 if (HOST_BITS_PER_WIDE_INT >= 64)
19699 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19706 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19710 lo = ~lo, hi = ~hi;
19716 mask = immed_double_const (lo, hi, imode);
19718 vec = gen_rtvec (2, v, mask);
19719 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19720 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19727 gcc_unreachable ();
19731 lo = ~lo, hi = ~hi;
19733 /* Force this value into the low part of a fp vector constant. */
19734 mask = immed_double_const (lo, hi, imode);
19735 mask = gen_lowpart (mode, mask);
19737 if (vec_mode == VOIDmode)
19738 return force_reg (mode, mask);
19740 v = ix86_build_const_vector (vec_mode, vect, mask);
19741 return force_reg (vec_mode, v);
19744 /* Generate code for floating point ABS or NEG. */
19747 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19750 rtx mask, set, dst, src;
19751 bool use_sse = false;
19752 bool vector_mode = VECTOR_MODE_P (mode);
19753 machine_mode vmode = mode;
19757 else if (mode == TFmode)
19759 else if (TARGET_SSE_MATH)
19761 use_sse = SSE_FLOAT_MODE_P (mode);
19762 if (mode == SFmode)
19764 else if (mode == DFmode)
19768 /* NEG and ABS performed with SSE use bitwise mask operations.
19769 Create the appropriate mask now. */
19771 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19778 set = gen_rtx_fmt_e (code, mode, src);
19779 set = gen_rtx_SET (VOIDmode, dst, set);
19786 use = gen_rtx_USE (VOIDmode, mask);
19788 par = gen_rtvec (2, set, use);
19791 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19792 par = gen_rtvec (3, set, use, clob);
19794 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19800 /* Expand a copysign operation. Special case operand 0 being a constant. */
19803 ix86_expand_copysign (rtx operands[])
19805 machine_mode mode, vmode;
19806 rtx dest, op0, op1, mask, nmask;
19808 dest = operands[0];
19812 mode = GET_MODE (dest);
19814 if (mode == SFmode)
19816 else if (mode == DFmode)
19821 if (GET_CODE (op0) == CONST_DOUBLE)
19823 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19825 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19826 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19828 if (mode == SFmode || mode == DFmode)
19830 if (op0 == CONST0_RTX (mode))
19831 op0 = CONST0_RTX (vmode);
19834 rtx v = ix86_build_const_vector (vmode, false, op0);
19836 op0 = force_reg (vmode, v);
19839 else if (op0 != CONST0_RTX (mode))
19840 op0 = force_reg (mode, op0);
19842 mask = ix86_build_signbit_mask (vmode, 0, 0);
19844 if (mode == SFmode)
19845 copysign_insn = gen_copysignsf3_const;
19846 else if (mode == DFmode)
19847 copysign_insn = gen_copysigndf3_const;
19849 copysign_insn = gen_copysigntf3_const;
19851 emit_insn (copysign_insn (dest, op0, op1, mask));
19855 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19857 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19858 mask = ix86_build_signbit_mask (vmode, 0, 0);
19860 if (mode == SFmode)
19861 copysign_insn = gen_copysignsf3_var;
19862 else if (mode == DFmode)
19863 copysign_insn = gen_copysigndf3_var;
19865 copysign_insn = gen_copysigntf3_var;
19867 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19871 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19872 be a constant, and so has already been expanded into a vector constant. */
19875 ix86_split_copysign_const (rtx operands[])
19877 machine_mode mode, vmode;
19878 rtx dest, op0, mask, x;
19880 dest = operands[0];
19882 mask = operands[3];
19884 mode = GET_MODE (dest);
19885 vmode = GET_MODE (mask);
19887 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19888 x = gen_rtx_AND (vmode, dest, mask);
19889 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19891 if (op0 != CONST0_RTX (vmode))
19893 x = gen_rtx_IOR (vmode, dest, op0);
19894 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19898 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19899 so we have to do two masks. */
19902 ix86_split_copysign_var (rtx operands[])
19904 machine_mode mode, vmode;
19905 rtx dest, scratch, op0, op1, mask, nmask, x;
19907 dest = operands[0];
19908 scratch = operands[1];
19911 nmask = operands[4];
19912 mask = operands[5];
19914 mode = GET_MODE (dest);
19915 vmode = GET_MODE (mask);
19917 if (rtx_equal_p (op0, op1))
19919 /* Shouldn't happen often (it's useless, obviously), but when it does
19920 we'd generate incorrect code if we continue below. */
19921 emit_move_insn (dest, op0);
19925 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19927 gcc_assert (REGNO (op1) == REGNO (scratch));
19929 x = gen_rtx_AND (vmode, scratch, mask);
19930 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19933 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19934 x = gen_rtx_NOT (vmode, dest);
19935 x = gen_rtx_AND (vmode, x, op0);
19936 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19940 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19942 x = gen_rtx_AND (vmode, scratch, mask);
19944 else /* alternative 2,4 */
19946 gcc_assert (REGNO (mask) == REGNO (scratch));
19947 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19948 x = gen_rtx_AND (vmode, scratch, op1);
19950 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19952 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19954 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19955 x = gen_rtx_AND (vmode, dest, nmask);
19957 else /* alternative 3,4 */
19959 gcc_assert (REGNO (nmask) == REGNO (dest));
19961 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19962 x = gen_rtx_AND (vmode, dest, op0);
19964 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19967 x = gen_rtx_IOR (vmode, dest, scratch);
19968 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19971 /* Return TRUE or FALSE depending on whether the first SET in INSN
19972 has source and destination with matching CC modes, and that the
19973 CC mode is at least as constrained as REQ_MODE. */
19976 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19979 machine_mode set_mode;
19981 set = PATTERN (insn);
19982 if (GET_CODE (set) == PARALLEL)
19983 set = XVECEXP (set, 0, 0);
19984 gcc_assert (GET_CODE (set) == SET);
19985 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19987 set_mode = GET_MODE (SET_DEST (set));
19991 if (req_mode != CCNOmode
19992 && (req_mode != CCmode
19993 || XEXP (SET_SRC (set), 1) != const0_rtx))
19997 if (req_mode == CCGCmode)
20001 if (req_mode == CCGOCmode || req_mode == CCNOmode)
20005 if (req_mode == CCZmode)
20015 if (set_mode != req_mode)
20020 gcc_unreachable ();
20023 return GET_MODE (SET_SRC (set)) == set_mode;
20026 /* Generate insn patterns to do an integer compare of OPERANDS. */
20029 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
20031 machine_mode cmpmode;
20034 cmpmode = SELECT_CC_MODE (code, op0, op1);
20035 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
20037 /* This is very simple, but making the interface the same as in the
20038 FP case makes the rest of the code easier. */
20039 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
20040 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
20042 /* Return the test that should be put into the flags user, i.e.
20043 the bcc, scc, or cmov instruction. */
20044 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
20047 /* Figure out whether to use ordered or unordered fp comparisons.
20048 Return the appropriate mode to use. */
20051 ix86_fp_compare_mode (enum rtx_code)
20053 /* ??? In order to make all comparisons reversible, we do all comparisons
20054 non-trapping when compiling for IEEE. Once gcc is able to distinguish
20055 all forms trapping and nontrapping comparisons, we can make inequality
20056 comparisons trapping again, since it results in better code when using
20057 FCOM based compares. */
20058 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
20062 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
20064 machine_mode mode = GET_MODE (op0);
20066 if (SCALAR_FLOAT_MODE_P (mode))
20068 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20069 return ix86_fp_compare_mode (code);
20074 /* Only zero flag is needed. */
20075 case EQ: /* ZF=0 */
20076 case NE: /* ZF!=0 */
20078 /* Codes needing carry flag. */
20079 case GEU: /* CF=0 */
20080 case LTU: /* CF=1 */
20081 /* Detect overflow checks. They need just the carry flag. */
20082 if (GET_CODE (op0) == PLUS
20083 && rtx_equal_p (op1, XEXP (op0, 0)))
20087 case GTU: /* CF=0 & ZF=0 */
20088 case LEU: /* CF=1 | ZF=1 */
20090 /* Codes possibly doable only with sign flag when
20091 comparing against zero. */
20092 case GE: /* SF=OF or SF=0 */
20093 case LT: /* SF<>OF or SF=1 */
20094 if (op1 == const0_rtx)
20097 /* For other cases Carry flag is not required. */
20099 /* Codes doable only with sign flag when comparing
20100 against zero, but we miss jump instruction for it
20101 so we need to use relational tests against overflow
20102 that thus needs to be zero. */
20103 case GT: /* ZF=0 & SF=OF */
20104 case LE: /* ZF=1 | SF<>OF */
20105 if (op1 == const0_rtx)
20109 /* strcmp pattern do (use flags) and combine may ask us for proper
20114 gcc_unreachable ();
20118 /* Return the fixed registers used for condition codes. */
20121 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20128 /* If two condition code modes are compatible, return a condition code
20129 mode which is compatible with both. Otherwise, return
20132 static machine_mode
20133 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20138 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20141 if ((m1 == CCGCmode && m2 == CCGOCmode)
20142 || (m1 == CCGOCmode && m2 == CCGCmode))
20145 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20147 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20153 gcc_unreachable ();
20183 /* These are only compatible with themselves, which we already
20190 /* Return a comparison we can do and that it is equivalent to
20191 swap_condition (code) apart possibly from orderedness.
20192 But, never change orderedness if TARGET_IEEE_FP, returning
20193 UNKNOWN in that case if necessary. */
20195 static enum rtx_code
20196 ix86_fp_swap_condition (enum rtx_code code)
20200 case GT: /* GTU - CF=0 & ZF=0 */
20201 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20202 case GE: /* GEU - CF=0 */
20203 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20204 case UNLT: /* LTU - CF=1 */
20205 return TARGET_IEEE_FP ? UNKNOWN : GT;
20206 case UNLE: /* LEU - CF=1 | ZF=1 */
20207 return TARGET_IEEE_FP ? UNKNOWN : GE;
20209 return swap_condition (code);
20213 /* Return cost of comparison CODE using the best strategy for performance.
20214 All following functions do use number of instructions as a cost metrics.
20215 In future this should be tweaked to compute bytes for optimize_size and
20216 take into account performance of various instructions on various CPUs. */
20219 ix86_fp_comparison_cost (enum rtx_code code)
20223 /* The cost of code using bit-twiddling on %ah. */
20240 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20244 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20247 gcc_unreachable ();
20250 switch (ix86_fp_comparison_strategy (code))
20252 case IX86_FPCMP_COMI:
20253 return arith_cost > 4 ? 3 : 2;
20254 case IX86_FPCMP_SAHF:
20255 return arith_cost > 4 ? 4 : 3;
20261 /* Return strategy to use for floating-point. We assume that fcomi is always
20262 preferrable where available, since that is also true when looking at size
20263 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20265 enum ix86_fpcmp_strategy
20266 ix86_fp_comparison_strategy (enum rtx_code)
20268 /* Do fcomi/sahf based test when profitable. */
20271 return IX86_FPCMP_COMI;
20273 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20274 return IX86_FPCMP_SAHF;
20276 return IX86_FPCMP_ARITH;
20279 /* Swap, force into registers, or otherwise massage the two operands
20280 to a fp comparison. The operands are updated in place; the new
20281 comparison code is returned. */
20283 static enum rtx_code
20284 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20286 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20287 rtx op0 = *pop0, op1 = *pop1;
20288 machine_mode op_mode = GET_MODE (op0);
20289 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20291 /* All of the unordered compare instructions only work on registers.
20292 The same is true of the fcomi compare instructions. The XFmode
20293 compare instructions require registers except when comparing
20294 against zero or when converting operand 1 from fixed point to
20298 && (fpcmp_mode == CCFPUmode
20299 || (op_mode == XFmode
20300 && ! (standard_80387_constant_p (op0) == 1
20301 || standard_80387_constant_p (op1) == 1)
20302 && GET_CODE (op1) != FLOAT)
20303 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20305 op0 = force_reg (op_mode, op0);
20306 op1 = force_reg (op_mode, op1);
20310 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20311 things around if they appear profitable, otherwise force op0
20312 into a register. */
20314 if (standard_80387_constant_p (op0) == 0
20316 && ! (standard_80387_constant_p (op1) == 0
20319 enum rtx_code new_code = ix86_fp_swap_condition (code);
20320 if (new_code != UNKNOWN)
20322 std::swap (op0, op1);
20328 op0 = force_reg (op_mode, op0);
20330 if (CONSTANT_P (op1))
20332 int tmp = standard_80387_constant_p (op1);
20334 op1 = validize_mem (force_const_mem (op_mode, op1));
20338 op1 = force_reg (op_mode, op1);
20341 op1 = force_reg (op_mode, op1);
20345 /* Try to rearrange the comparison to make it cheaper. */
20346 if (ix86_fp_comparison_cost (code)
20347 > ix86_fp_comparison_cost (swap_condition (code))
20348 && (REG_P (op1) || can_create_pseudo_p ()))
20350 std::swap (op0, op1);
20351 code = swap_condition (code);
20353 op0 = force_reg (op_mode, op0);
20361 /* Convert comparison codes we use to represent FP comparison to integer
20362 code that will result in proper branch. Return UNKNOWN if no such code
20366 ix86_fp_compare_code_to_integer (enum rtx_code code)
20395 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20398 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20400 machine_mode fpcmp_mode, intcmp_mode;
20403 fpcmp_mode = ix86_fp_compare_mode (code);
20404 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20406 /* Do fcomi/sahf based test when profitable. */
20407 switch (ix86_fp_comparison_strategy (code))
20409 case IX86_FPCMP_COMI:
20410 intcmp_mode = fpcmp_mode;
20411 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20412 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20417 case IX86_FPCMP_SAHF:
20418 intcmp_mode = fpcmp_mode;
20419 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20420 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20424 scratch = gen_reg_rtx (HImode);
20425 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20426 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20429 case IX86_FPCMP_ARITH:
20430 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20431 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20432 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20434 scratch = gen_reg_rtx (HImode);
20435 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20437 /* In the unordered case, we have to check C2 for NaN's, which
20438 doesn't happen to work out to anything nice combination-wise.
20439 So do some bit twiddling on the value we've got in AH to come
20440 up with an appropriate set of condition codes. */
20442 intcmp_mode = CCNOmode;
20447 if (code == GT || !TARGET_IEEE_FP)
20449 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20454 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20455 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20456 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20457 intcmp_mode = CCmode;
20463 if (code == LT && TARGET_IEEE_FP)
20465 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20466 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20467 intcmp_mode = CCmode;
20472 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20478 if (code == GE || !TARGET_IEEE_FP)
20480 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20485 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20486 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20492 if (code == LE && TARGET_IEEE_FP)
20494 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20495 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20496 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20497 intcmp_mode = CCmode;
20502 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20508 if (code == EQ && TARGET_IEEE_FP)
20510 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20511 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20512 intcmp_mode = CCmode;
20517 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20523 if (code == NE && TARGET_IEEE_FP)
20525 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20526 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20532 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20547 gcc_unreachable ();
20555 /* Return the test that should be put into the flags user, i.e.
20556 the bcc, scc, or cmov instruction. */
20557 return gen_rtx_fmt_ee (code, VOIDmode,
20558 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20563 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20567 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20568 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20570 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20572 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20573 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20576 ret = ix86_expand_int_compare (code, op0, op1);
20582 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20584 machine_mode mode = GET_MODE (op0);
20596 tmp = ix86_expand_compare (code, op0, op1);
20597 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20598 gen_rtx_LABEL_REF (VOIDmode, label),
20600 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20607 /* Expand DImode branch into multiple compare+branch. */
20610 rtx_code_label *label2;
20611 enum rtx_code code1, code2, code3;
20612 machine_mode submode;
20614 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20616 std::swap (op0, op1);
20617 code = swap_condition (code);
20620 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20621 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20623 submode = mode == DImode ? SImode : DImode;
20625 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20626 avoid two branches. This costs one extra insn, so disable when
20627 optimizing for size. */
20629 if ((code == EQ || code == NE)
20630 && (!optimize_insn_for_size_p ()
20631 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20636 if (hi[1] != const0_rtx)
20637 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20638 NULL_RTX, 0, OPTAB_WIDEN);
20641 if (lo[1] != const0_rtx)
20642 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20643 NULL_RTX, 0, OPTAB_WIDEN);
20645 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20646 NULL_RTX, 0, OPTAB_WIDEN);
20648 ix86_expand_branch (code, tmp, const0_rtx, label);
20652 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20653 op1 is a constant and the low word is zero, then we can just
20654 examine the high word. Similarly for low word -1 and
20655 less-or-equal-than or greater-than. */
20657 if (CONST_INT_P (hi[1]))
20660 case LT: case LTU: case GE: case GEU:
20661 if (lo[1] == const0_rtx)
20663 ix86_expand_branch (code, hi[0], hi[1], label);
20667 case LE: case LEU: case GT: case GTU:
20668 if (lo[1] == constm1_rtx)
20670 ix86_expand_branch (code, hi[0], hi[1], label);
20678 /* Otherwise, we need two or three jumps. */
20680 label2 = gen_label_rtx ();
20683 code2 = swap_condition (code);
20684 code3 = unsigned_condition (code);
20688 case LT: case GT: case LTU: case GTU:
20691 case LE: code1 = LT; code2 = GT; break;
20692 case GE: code1 = GT; code2 = LT; break;
20693 case LEU: code1 = LTU; code2 = GTU; break;
20694 case GEU: code1 = GTU; code2 = LTU; break;
20696 case EQ: code1 = UNKNOWN; code2 = NE; break;
20697 case NE: code2 = UNKNOWN; break;
20700 gcc_unreachable ();
20705 * if (hi(a) < hi(b)) goto true;
20706 * if (hi(a) > hi(b)) goto false;
20707 * if (lo(a) < lo(b)) goto true;
20711 if (code1 != UNKNOWN)
20712 ix86_expand_branch (code1, hi[0], hi[1], label);
20713 if (code2 != UNKNOWN)
20714 ix86_expand_branch (code2, hi[0], hi[1], label2);
20716 ix86_expand_branch (code3, lo[0], lo[1], label);
20718 if (code2 != UNKNOWN)
20719 emit_label (label2);
20724 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20729 /* Split branch based on floating point condition. */
20731 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20732 rtx target1, rtx target2, rtx tmp)
20737 if (target2 != pc_rtx)
20739 std::swap (target1, target2);
20740 code = reverse_condition_maybe_unordered (code);
20743 condition = ix86_expand_fp_compare (code, op1, op2,
20746 i = emit_jump_insn (gen_rtx_SET
20748 gen_rtx_IF_THEN_ELSE (VOIDmode,
20749 condition, target1, target2)));
20750 if (split_branch_probability >= 0)
20751 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20755 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20759 gcc_assert (GET_MODE (dest) == QImode);
20761 ret = ix86_expand_compare (code, op0, op1);
20762 PUT_MODE (ret, QImode);
20763 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20766 /* Expand comparison setting or clearing carry flag. Return true when
20767 successful and set pop for the operation. */
20769 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20771 machine_mode mode =
20772 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20774 /* Do not handle double-mode compares that go through special path. */
20775 if (mode == (TARGET_64BIT ? TImode : DImode))
20778 if (SCALAR_FLOAT_MODE_P (mode))
20781 rtx_insn *compare_seq;
20783 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20785 /* Shortcut: following common codes never translate
20786 into carry flag compares. */
20787 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20788 || code == ORDERED || code == UNORDERED)
20791 /* These comparisons require zero flag; swap operands so they won't. */
20792 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20793 && !TARGET_IEEE_FP)
20795 std::swap (op0, op1);
20796 code = swap_condition (code);
20799 /* Try to expand the comparison and verify that we end up with
20800 carry flag based comparison. This fails to be true only when
20801 we decide to expand comparison using arithmetic that is not
20802 too common scenario. */
20804 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20805 compare_seq = get_insns ();
20808 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20809 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20810 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20812 code = GET_CODE (compare_op);
20814 if (code != LTU && code != GEU)
20817 emit_insn (compare_seq);
20822 if (!INTEGRAL_MODE_P (mode))
20831 /* Convert a==0 into (unsigned)a<1. */
20834 if (op1 != const0_rtx)
20837 code = (code == EQ ? LTU : GEU);
20840 /* Convert a>b into b<a or a>=b-1. */
20843 if (CONST_INT_P (op1))
20845 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20846 /* Bail out on overflow. We still can swap operands but that
20847 would force loading of the constant into register. */
20848 if (op1 == const0_rtx
20849 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20851 code = (code == GTU ? GEU : LTU);
20855 std::swap (op0, op1);
20856 code = (code == GTU ? LTU : GEU);
20860 /* Convert a>=0 into (unsigned)a<0x80000000. */
20863 if (mode == DImode || op1 != const0_rtx)
20865 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20866 code = (code == LT ? GEU : LTU);
20870 if (mode == DImode || op1 != constm1_rtx)
20872 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20873 code = (code == LE ? GEU : LTU);
20879 /* Swapping operands may cause constant to appear as first operand. */
20880 if (!nonimmediate_operand (op0, VOIDmode))
20882 if (!can_create_pseudo_p ())
20884 op0 = force_reg (mode, op0);
20886 *pop = ix86_expand_compare (code, op0, op1);
20887 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20892 ix86_expand_int_movcc (rtx operands[])
20894 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20895 rtx_insn *compare_seq;
20897 machine_mode mode = GET_MODE (operands[0]);
20898 bool sign_bit_compare_p = false;
20899 rtx op0 = XEXP (operands[1], 0);
20900 rtx op1 = XEXP (operands[1], 1);
20902 if (GET_MODE (op0) == TImode
20903 || (GET_MODE (op0) == DImode
20908 compare_op = ix86_expand_compare (code, op0, op1);
20909 compare_seq = get_insns ();
20912 compare_code = GET_CODE (compare_op);
20914 if ((op1 == const0_rtx && (code == GE || code == LT))
20915 || (op1 == constm1_rtx && (code == GT || code == LE)))
20916 sign_bit_compare_p = true;
20918 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20919 HImode insns, we'd be swallowed in word prefix ops. */
20921 if ((mode != HImode || TARGET_FAST_PREFIX)
20922 && (mode != (TARGET_64BIT ? TImode : DImode))
20923 && CONST_INT_P (operands[2])
20924 && CONST_INT_P (operands[3]))
20926 rtx out = operands[0];
20927 HOST_WIDE_INT ct = INTVAL (operands[2]);
20928 HOST_WIDE_INT cf = INTVAL (operands[3]);
20929 HOST_WIDE_INT diff;
20932 /* Sign bit compares are better done using shifts than we do by using
20934 if (sign_bit_compare_p
20935 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20937 /* Detect overlap between destination and compare sources. */
20940 if (!sign_bit_compare_p)
20943 bool fpcmp = false;
20945 compare_code = GET_CODE (compare_op);
20947 flags = XEXP (compare_op, 0);
20949 if (GET_MODE (flags) == CCFPmode
20950 || GET_MODE (flags) == CCFPUmode)
20954 = ix86_fp_compare_code_to_integer (compare_code);
20957 /* To simplify rest of code, restrict to the GEU case. */
20958 if (compare_code == LTU)
20960 std::swap (ct, cf);
20961 compare_code = reverse_condition (compare_code);
20962 code = reverse_condition (code);
20967 PUT_CODE (compare_op,
20968 reverse_condition_maybe_unordered
20969 (GET_CODE (compare_op)));
20971 PUT_CODE (compare_op,
20972 reverse_condition (GET_CODE (compare_op)));
20976 if (reg_overlap_mentioned_p (out, op0)
20977 || reg_overlap_mentioned_p (out, op1))
20978 tmp = gen_reg_rtx (mode);
20980 if (mode == DImode)
20981 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20983 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20984 flags, compare_op));
20988 if (code == GT || code == GE)
20989 code = reverse_condition (code);
20992 std::swap (ct, cf);
20995 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
21008 tmp = expand_simple_binop (mode, PLUS,
21010 copy_rtx (tmp), 1, OPTAB_DIRECT);
21021 tmp = expand_simple_binop (mode, IOR,
21023 copy_rtx (tmp), 1, OPTAB_DIRECT);
21025 else if (diff == -1 && ct)
21035 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
21037 tmp = expand_simple_binop (mode, PLUS,
21038 copy_rtx (tmp), GEN_INT (cf),
21039 copy_rtx (tmp), 1, OPTAB_DIRECT);
21047 * andl cf - ct, dest
21057 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
21060 tmp = expand_simple_binop (mode, AND,
21062 gen_int_mode (cf - ct, mode),
21063 copy_rtx (tmp), 1, OPTAB_DIRECT);
21065 tmp = expand_simple_binop (mode, PLUS,
21066 copy_rtx (tmp), GEN_INT (ct),
21067 copy_rtx (tmp), 1, OPTAB_DIRECT);
21070 if (!rtx_equal_p (tmp, out))
21071 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
21078 machine_mode cmp_mode = GET_MODE (op0);
21079 enum rtx_code new_code;
21081 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21083 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21085 /* We may be reversing unordered compare to normal compare, that
21086 is not valid in general (we may convert non-trapping condition
21087 to trapping one), however on i386 we currently emit all
21088 comparisons unordered. */
21089 new_code = reverse_condition_maybe_unordered (code);
21092 new_code = ix86_reverse_condition (code, cmp_mode);
21093 if (new_code != UNKNOWN)
21095 std::swap (ct, cf);
21101 compare_code = UNKNOWN;
21102 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21103 && CONST_INT_P (op1))
21105 if (op1 == const0_rtx
21106 && (code == LT || code == GE))
21107 compare_code = code;
21108 else if (op1 == constm1_rtx)
21112 else if (code == GT)
21117 /* Optimize dest = (op0 < 0) ? -1 : cf. */
21118 if (compare_code != UNKNOWN
21119 && GET_MODE (op0) == GET_MODE (out)
21120 && (cf == -1 || ct == -1))
21122 /* If lea code below could be used, only optimize
21123 if it results in a 2 insn sequence. */
21125 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21126 || diff == 3 || diff == 5 || diff == 9)
21127 || (compare_code == LT && ct == -1)
21128 || (compare_code == GE && cf == -1))
21131 * notl op1 (if necessary)
21139 code = reverse_condition (code);
21142 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21144 out = expand_simple_binop (mode, IOR,
21146 out, 1, OPTAB_DIRECT);
21147 if (out != operands[0])
21148 emit_move_insn (operands[0], out);
21155 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21156 || diff == 3 || diff == 5 || diff == 9)
21157 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21159 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21165 * lea cf(dest*(ct-cf)),dest
21169 * This also catches the degenerate setcc-only case.
21175 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21178 /* On x86_64 the lea instruction operates on Pmode, so we need
21179 to get arithmetics done in proper mode to match. */
21181 tmp = copy_rtx (out);
21185 out1 = copy_rtx (out);
21186 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21190 tmp = gen_rtx_PLUS (mode, tmp, out1);
21196 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21199 if (!rtx_equal_p (tmp, out))
21202 out = force_operand (tmp, copy_rtx (out));
21204 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21206 if (!rtx_equal_p (out, operands[0]))
21207 emit_move_insn (operands[0], copy_rtx (out));
21213 * General case: Jumpful:
21214 * xorl dest,dest cmpl op1, op2
21215 * cmpl op1, op2 movl ct, dest
21216 * setcc dest jcc 1f
21217 * decl dest movl cf, dest
21218 * andl (cf-ct),dest 1:
21221 * Size 20. Size 14.
21223 * This is reasonably steep, but branch mispredict costs are
21224 * high on modern cpus, so consider failing only if optimizing
21228 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21229 && BRANCH_COST (optimize_insn_for_speed_p (),
21234 machine_mode cmp_mode = GET_MODE (op0);
21235 enum rtx_code new_code;
21237 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21239 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21241 /* We may be reversing unordered compare to normal compare,
21242 that is not valid in general (we may convert non-trapping
21243 condition to trapping one), however on i386 we currently
21244 emit all comparisons unordered. */
21245 new_code = reverse_condition_maybe_unordered (code);
21249 new_code = ix86_reverse_condition (code, cmp_mode);
21250 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21251 compare_code = reverse_condition (compare_code);
21254 if (new_code != UNKNOWN)
21262 if (compare_code != UNKNOWN)
21264 /* notl op1 (if needed)
21269 For x < 0 (resp. x <= -1) there will be no notl,
21270 so if possible swap the constants to get rid of the
21272 True/false will be -1/0 while code below (store flag
21273 followed by decrement) is 0/-1, so the constants need
21274 to be exchanged once more. */
21276 if (compare_code == GE || !cf)
21278 code = reverse_condition (code);
21282 std::swap (ct, cf);
21284 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21288 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21290 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21292 copy_rtx (out), 1, OPTAB_DIRECT);
21295 out = expand_simple_binop (mode, AND, copy_rtx (out),
21296 gen_int_mode (cf - ct, mode),
21297 copy_rtx (out), 1, OPTAB_DIRECT);
21299 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21300 copy_rtx (out), 1, OPTAB_DIRECT);
21301 if (!rtx_equal_p (out, operands[0]))
21302 emit_move_insn (operands[0], copy_rtx (out));
21308 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21310 /* Try a few things more with specific constants and a variable. */
21313 rtx var, orig_out, out, tmp;
21315 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21318 /* If one of the two operands is an interesting constant, load a
21319 constant with the above and mask it in with a logical operation. */
21321 if (CONST_INT_P (operands[2]))
21324 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21325 operands[3] = constm1_rtx, op = and_optab;
21326 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21327 operands[3] = const0_rtx, op = ior_optab;
21331 else if (CONST_INT_P (operands[3]))
21334 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21335 operands[2] = constm1_rtx, op = and_optab;
21336 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21337 operands[2] = const0_rtx, op = ior_optab;
21344 orig_out = operands[0];
21345 tmp = gen_reg_rtx (mode);
21348 /* Recurse to get the constant loaded. */
21349 if (ix86_expand_int_movcc (operands) == 0)
21352 /* Mask in the interesting variable. */
21353 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21355 if (!rtx_equal_p (out, orig_out))
21356 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21362 * For comparison with above,
21372 if (! nonimmediate_operand (operands[2], mode))
21373 operands[2] = force_reg (mode, operands[2]);
21374 if (! nonimmediate_operand (operands[3], mode))
21375 operands[3] = force_reg (mode, operands[3]);
21377 if (! register_operand (operands[2], VOIDmode)
21379 || ! register_operand (operands[3], VOIDmode)))
21380 operands[2] = force_reg (mode, operands[2]);
21383 && ! register_operand (operands[3], VOIDmode))
21384 operands[3] = force_reg (mode, operands[3]);
21386 emit_insn (compare_seq);
21387 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21388 gen_rtx_IF_THEN_ELSE (mode,
21389 compare_op, operands[2],
21394 /* Swap, force into registers, or otherwise massage the two operands
21395 to an sse comparison with a mask result. Thus we differ a bit from
21396 ix86_prepare_fp_compare_args which expects to produce a flags result.
21398 The DEST operand exists to help determine whether to commute commutative
21399 operators. The POP0/POP1 operands are updated in place. The new
21400 comparison code is returned, or UNKNOWN if not implementable. */
21402 static enum rtx_code
21403 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21404 rtx *pop0, rtx *pop1)
21410 /* AVX supports all the needed comparisons. */
21413 /* We have no LTGT as an operator. We could implement it with
21414 NE & ORDERED, but this requires an extra temporary. It's
21415 not clear that it's worth it. */
21422 /* These are supported directly. */
21429 /* AVX has 3 operand comparisons, no need to swap anything. */
21432 /* For commutative operators, try to canonicalize the destination
21433 operand to be first in the comparison - this helps reload to
21434 avoid extra moves. */
21435 if (!dest || !rtx_equal_p (dest, *pop1))
21443 /* These are not supported directly before AVX, and furthermore
21444 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21445 comparison operands to transform into something that is
21447 std::swap (*pop0, *pop1);
21448 code = swap_condition (code);
21452 gcc_unreachable ();
21458 /* Detect conditional moves that exactly match min/max operational
21459 semantics. Note that this is IEEE safe, as long as we don't
21460 interchange the operands.
21462 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21463 and TRUE if the operation is successful and instructions are emitted. */
21466 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21467 rtx cmp_op1, rtx if_true, rtx if_false)
21475 else if (code == UNGE)
21476 std::swap (if_true, if_false);
21480 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21482 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21487 mode = GET_MODE (dest);
21489 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21490 but MODE may be a vector mode and thus not appropriate. */
21491 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21493 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21496 if_true = force_reg (mode, if_true);
21497 v = gen_rtvec (2, if_true, if_false);
21498 tmp = gen_rtx_UNSPEC (mode, v, u);
21502 code = is_min ? SMIN : SMAX;
21503 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21506 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21510 /* Expand an sse vector comparison. Return the register with the result. */
21513 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21514 rtx op_true, rtx op_false)
21516 machine_mode mode = GET_MODE (dest);
21517 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21519 /* In general case result of comparison can differ from operands' type. */
21520 machine_mode cmp_mode;
21522 /* In AVX512F the result of comparison is an integer mask. */
21523 bool maskcmp = false;
21526 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21528 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21529 gcc_assert (cmp_mode != BLKmode);
21534 cmp_mode = cmp_ops_mode;
21537 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21538 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21539 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21542 || reg_overlap_mentioned_p (dest, op_true)
21543 || reg_overlap_mentioned_p (dest, op_false))
21544 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21546 /* Compare patterns for int modes are unspec in AVX512F only. */
21547 if (maskcmp && (code == GT || code == EQ))
21549 rtx (*gen)(rtx, rtx, rtx);
21551 switch (cmp_ops_mode)
21554 gcc_assert (TARGET_AVX512BW);
21555 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21558 gcc_assert (TARGET_AVX512BW);
21559 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21562 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21565 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21573 emit_insn (gen (dest, cmp_op0, cmp_op1));
21577 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21579 if (cmp_mode != mode && !maskcmp)
21581 x = force_reg (cmp_ops_mode, x);
21582 convert_move (dest, x, false);
21585 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21590 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21591 operations. This is used for both scalar and vector conditional moves. */
21594 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21596 machine_mode mode = GET_MODE (dest);
21597 machine_mode cmpmode = GET_MODE (cmp);
21599 /* In AVX512F the result of comparison is an integer mask. */
21600 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21604 if (vector_all_ones_operand (op_true, mode)
21605 && rtx_equal_p (op_false, CONST0_RTX (mode))
21608 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21610 else if (op_false == CONST0_RTX (mode)
21613 op_true = force_reg (mode, op_true);
21614 x = gen_rtx_AND (mode, cmp, op_true);
21615 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21617 else if (op_true == CONST0_RTX (mode)
21620 op_false = force_reg (mode, op_false);
21621 x = gen_rtx_NOT (mode, cmp);
21622 x = gen_rtx_AND (mode, x, op_false);
21623 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21625 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21628 op_false = force_reg (mode, op_false);
21629 x = gen_rtx_IOR (mode, cmp, op_false);
21630 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21632 else if (TARGET_XOP
21635 op_true = force_reg (mode, op_true);
21637 if (!nonimmediate_operand (op_false, mode))
21638 op_false = force_reg (mode, op_false);
21640 emit_insn (gen_rtx_SET (mode, dest,
21641 gen_rtx_IF_THEN_ELSE (mode, cmp,
21647 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21650 if (!nonimmediate_operand (op_true, mode))
21651 op_true = force_reg (mode, op_true);
21653 op_false = force_reg (mode, op_false);
21659 gen = gen_sse4_1_blendvps;
21663 gen = gen_sse4_1_blendvpd;
21671 gen = gen_sse4_1_pblendvb;
21672 if (mode != V16QImode)
21673 d = gen_reg_rtx (V16QImode);
21674 op_false = gen_lowpart (V16QImode, op_false);
21675 op_true = gen_lowpart (V16QImode, op_true);
21676 cmp = gen_lowpart (V16QImode, cmp);
21681 gen = gen_avx_blendvps256;
21685 gen = gen_avx_blendvpd256;
21693 gen = gen_avx2_pblendvb;
21694 if (mode != V32QImode)
21695 d = gen_reg_rtx (V32QImode);
21696 op_false = gen_lowpart (V32QImode, op_false);
21697 op_true = gen_lowpart (V32QImode, op_true);
21698 cmp = gen_lowpart (V32QImode, cmp);
21703 gen = gen_avx512bw_blendmv64qi;
21706 gen = gen_avx512bw_blendmv32hi;
21709 gen = gen_avx512f_blendmv16si;
21712 gen = gen_avx512f_blendmv8di;
21715 gen = gen_avx512f_blendmv8df;
21718 gen = gen_avx512f_blendmv16sf;
21727 emit_insn (gen (d, op_false, op_true, cmp));
21729 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21733 op_true = force_reg (mode, op_true);
21735 t2 = gen_reg_rtx (mode);
21737 t3 = gen_reg_rtx (mode);
21741 x = gen_rtx_AND (mode, op_true, cmp);
21742 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21744 x = gen_rtx_NOT (mode, cmp);
21745 x = gen_rtx_AND (mode, x, op_false);
21746 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21748 x = gen_rtx_IOR (mode, t3, t2);
21749 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21754 /* Expand a floating-point conditional move. Return true if successful. */
21757 ix86_expand_fp_movcc (rtx operands[])
21759 machine_mode mode = GET_MODE (operands[0]);
21760 enum rtx_code code = GET_CODE (operands[1]);
21761 rtx tmp, compare_op;
21762 rtx op0 = XEXP (operands[1], 0);
21763 rtx op1 = XEXP (operands[1], 1);
21765 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21767 machine_mode cmode;
21769 /* Since we've no cmove for sse registers, don't force bad register
21770 allocation just to gain access to it. Deny movcc when the
21771 comparison mode doesn't match the move mode. */
21772 cmode = GET_MODE (op0);
21773 if (cmode == VOIDmode)
21774 cmode = GET_MODE (op1);
21778 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21779 if (code == UNKNOWN)
21782 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21783 operands[2], operands[3]))
21786 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21787 operands[2], operands[3]);
21788 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21792 if (GET_MODE (op0) == TImode
21793 || (GET_MODE (op0) == DImode
21797 /* The floating point conditional move instructions don't directly
21798 support conditions resulting from a signed integer comparison. */
21800 compare_op = ix86_expand_compare (code, op0, op1);
21801 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21803 tmp = gen_reg_rtx (QImode);
21804 ix86_expand_setcc (tmp, code, op0, op1);
21806 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21809 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21810 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21811 operands[2], operands[3])));
21816 /* Expand a floating-point vector conditional move; a vcond operation
21817 rather than a movcc operation. */
21820 ix86_expand_fp_vcond (rtx operands[])
21822 enum rtx_code code = GET_CODE (operands[3]);
21825 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21826 &operands[4], &operands[5]);
21827 if (code == UNKNOWN)
21830 switch (GET_CODE (operands[3]))
21833 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21834 operands[5], operands[0], operands[0]);
21835 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21836 operands[5], operands[1], operands[2]);
21840 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21841 operands[5], operands[0], operands[0]);
21842 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21843 operands[5], operands[1], operands[2]);
21847 gcc_unreachable ();
21849 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21851 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21855 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21856 operands[5], operands[1], operands[2]))
21859 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21860 operands[1], operands[2]);
21861 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21865 /* Expand a signed/unsigned integral vector conditional move. */
21868 ix86_expand_int_vcond (rtx operands[])
21870 machine_mode data_mode = GET_MODE (operands[0]);
21871 machine_mode mode = GET_MODE (operands[4]);
21872 enum rtx_code code = GET_CODE (operands[3]);
21873 bool negate = false;
21876 cop0 = operands[4];
21877 cop1 = operands[5];
21879 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21880 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21881 if ((code == LT || code == GE)
21882 && data_mode == mode
21883 && cop1 == CONST0_RTX (mode)
21884 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21885 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21886 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21887 && (GET_MODE_SIZE (data_mode) == 16
21888 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21890 rtx negop = operands[2 - (code == LT)];
21891 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21892 if (negop == CONST1_RTX (data_mode))
21894 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21895 operands[0], 1, OPTAB_DIRECT);
21896 if (res != operands[0])
21897 emit_move_insn (operands[0], res);
21900 else if (GET_MODE_INNER (data_mode) != DImode
21901 && vector_all_ones_operand (negop, data_mode))
21903 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21904 operands[0], 0, OPTAB_DIRECT);
21905 if (res != operands[0])
21906 emit_move_insn (operands[0], res);
21911 if (!nonimmediate_operand (cop1, mode))
21912 cop1 = force_reg (mode, cop1);
21913 if (!general_operand (operands[1], data_mode))
21914 operands[1] = force_reg (data_mode, operands[1]);
21915 if (!general_operand (operands[2], data_mode))
21916 operands[2] = force_reg (data_mode, operands[2]);
21918 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21920 && (mode == V16QImode || mode == V8HImode
21921 || mode == V4SImode || mode == V2DImode))
21925 /* Canonicalize the comparison to EQ, GT, GTU. */
21936 code = reverse_condition (code);
21942 code = reverse_condition (code);
21948 std::swap (cop0, cop1);
21949 code = swap_condition (code);
21953 gcc_unreachable ();
21956 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21957 if (mode == V2DImode)
21962 /* SSE4.1 supports EQ. */
21963 if (!TARGET_SSE4_1)
21969 /* SSE4.2 supports GT/GTU. */
21970 if (!TARGET_SSE4_2)
21975 gcc_unreachable ();
21979 /* Unsigned parallel compare is not supported by the hardware.
21980 Play some tricks to turn this into a signed comparison
21984 cop0 = force_reg (mode, cop0);
21996 rtx (*gen_sub3) (rtx, rtx, rtx);
22000 case V16SImode: gen_sub3 = gen_subv16si3; break;
22001 case V8DImode: gen_sub3 = gen_subv8di3; break;
22002 case V8SImode: gen_sub3 = gen_subv8si3; break;
22003 case V4DImode: gen_sub3 = gen_subv4di3; break;
22004 case V4SImode: gen_sub3 = gen_subv4si3; break;
22005 case V2DImode: gen_sub3 = gen_subv2di3; break;
22007 gcc_unreachable ();
22009 /* Subtract (-(INT MAX) - 1) from both operands to make
22011 mask = ix86_build_signbit_mask (mode, true, false);
22012 t1 = gen_reg_rtx (mode);
22013 emit_insn (gen_sub3 (t1, cop0, mask));
22015 t2 = gen_reg_rtx (mode);
22016 emit_insn (gen_sub3 (t2, cop1, mask));
22030 /* Perform a parallel unsigned saturating subtraction. */
22031 x = gen_reg_rtx (mode);
22032 emit_insn (gen_rtx_SET (VOIDmode, x,
22033 gen_rtx_US_MINUS (mode, cop0, cop1)));
22036 cop1 = CONST0_RTX (mode);
22042 gcc_unreachable ();
22047 /* Allow the comparison to be done in one mode, but the movcc to
22048 happen in another mode. */
22049 if (data_mode == mode)
22051 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
22052 operands[1+negate], operands[2-negate]);
22056 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
22057 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
22058 operands[1+negate], operands[2-negate]);
22059 if (GET_MODE (x) == mode)
22060 x = gen_lowpart (data_mode, x);
22063 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
22064 operands[2-negate]);
22068 /* AVX512F does support 64-byte integer vector operations,
22069 thus the longest vector we are faced with is V64QImode. */
22070 #define MAX_VECT_LEN 64
22072 struct expand_vec_perm_d
22074 rtx target, op0, op1;
22075 unsigned char perm[MAX_VECT_LEN];
22076 machine_mode vmode;
22077 unsigned char nelt;
22078 bool one_operand_p;
22083 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
22084 struct expand_vec_perm_d *d)
22086 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22087 expander, so args are either in d, or in op0, op1 etc. */
22088 machine_mode mode = GET_MODE (d ? d->op0 : op0);
22089 machine_mode maskmode = mode;
22090 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22095 if (TARGET_AVX512VL && TARGET_AVX512BW)
22096 gen = gen_avx512vl_vpermi2varv8hi3;
22099 if (TARGET_AVX512VL && TARGET_AVX512BW)
22100 gen = gen_avx512vl_vpermi2varv16hi3;
22103 if (TARGET_AVX512VBMI)
22104 gen = gen_avx512bw_vpermi2varv64qi3;
22107 if (TARGET_AVX512BW)
22108 gen = gen_avx512bw_vpermi2varv32hi3;
22111 if (TARGET_AVX512VL)
22112 gen = gen_avx512vl_vpermi2varv4si3;
22115 if (TARGET_AVX512VL)
22116 gen = gen_avx512vl_vpermi2varv8si3;
22119 if (TARGET_AVX512F)
22120 gen = gen_avx512f_vpermi2varv16si3;
22123 if (TARGET_AVX512VL)
22125 gen = gen_avx512vl_vpermi2varv4sf3;
22126 maskmode = V4SImode;
22130 if (TARGET_AVX512VL)
22132 gen = gen_avx512vl_vpermi2varv8sf3;
22133 maskmode = V8SImode;
22137 if (TARGET_AVX512F)
22139 gen = gen_avx512f_vpermi2varv16sf3;
22140 maskmode = V16SImode;
22144 if (TARGET_AVX512VL)
22145 gen = gen_avx512vl_vpermi2varv2di3;
22148 if (TARGET_AVX512VL)
22149 gen = gen_avx512vl_vpermi2varv4di3;
22152 if (TARGET_AVX512F)
22153 gen = gen_avx512f_vpermi2varv8di3;
22156 if (TARGET_AVX512VL)
22158 gen = gen_avx512vl_vpermi2varv2df3;
22159 maskmode = V2DImode;
22163 if (TARGET_AVX512VL)
22165 gen = gen_avx512vl_vpermi2varv4df3;
22166 maskmode = V4DImode;
22170 if (TARGET_AVX512F)
22172 gen = gen_avx512f_vpermi2varv8df3;
22173 maskmode = V8DImode;
22183 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22184 expander, so args are either in d, or in op0, op1 etc. */
22188 target = d->target;
22191 for (int i = 0; i < d->nelt; ++i)
22192 vec[i] = GEN_INT (d->perm[i]);
22193 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22196 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22200 /* Expand a variable vector permutation. */
22203 ix86_expand_vec_perm (rtx operands[])
22205 rtx target = operands[0];
22206 rtx op0 = operands[1];
22207 rtx op1 = operands[2];
22208 rtx mask = operands[3];
22209 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22210 machine_mode mode = GET_MODE (op0);
22211 machine_mode maskmode = GET_MODE (mask);
22213 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22215 /* Number of elements in the vector. */
22216 w = GET_MODE_NUNITS (mode);
22217 e = GET_MODE_UNIT_SIZE (mode);
22218 gcc_assert (w <= 64);
22220 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22225 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22227 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22228 an constant shuffle operand. With a tiny bit of effort we can
22229 use VPERMD instead. A re-interpretation stall for V4DFmode is
22230 unfortunate but there's no avoiding it.
22231 Similarly for V16HImode we don't have instructions for variable
22232 shuffling, while for V32QImode we can use after preparing suitable
22233 masks vpshufb; vpshufb; vpermq; vpor. */
22235 if (mode == V16HImode)
22237 maskmode = mode = V32QImode;
22243 maskmode = mode = V8SImode;
22247 t1 = gen_reg_rtx (maskmode);
22249 /* Replicate the low bits of the V4DImode mask into V8SImode:
22251 t1 = { A A B B C C D D }. */
22252 for (i = 0; i < w / 2; ++i)
22253 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22254 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22255 vt = force_reg (maskmode, vt);
22256 mask = gen_lowpart (maskmode, mask);
22257 if (maskmode == V8SImode)
22258 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22260 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22262 /* Multiply the shuffle indicies by two. */
22263 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22266 /* Add one to the odd shuffle indicies:
22267 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22268 for (i = 0; i < w / 2; ++i)
22270 vec[i * 2] = const0_rtx;
22271 vec[i * 2 + 1] = const1_rtx;
22273 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22274 vt = validize_mem (force_const_mem (maskmode, vt));
22275 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22278 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22279 operands[3] = mask = t1;
22280 target = gen_reg_rtx (mode);
22281 op0 = gen_lowpart (mode, op0);
22282 op1 = gen_lowpart (mode, op1);
22288 /* The VPERMD and VPERMPS instructions already properly ignore
22289 the high bits of the shuffle elements. No need for us to
22290 perform an AND ourselves. */
22291 if (one_operand_shuffle)
22293 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22294 if (target != operands[0])
22295 emit_move_insn (operands[0],
22296 gen_lowpart (GET_MODE (operands[0]), target));
22300 t1 = gen_reg_rtx (V8SImode);
22301 t2 = gen_reg_rtx (V8SImode);
22302 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22303 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22309 mask = gen_lowpart (V8SImode, mask);
22310 if (one_operand_shuffle)
22311 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22314 t1 = gen_reg_rtx (V8SFmode);
22315 t2 = gen_reg_rtx (V8SFmode);
22316 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22317 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22323 /* By combining the two 128-bit input vectors into one 256-bit
22324 input vector, we can use VPERMD and VPERMPS for the full
22325 two-operand shuffle. */
22326 t1 = gen_reg_rtx (V8SImode);
22327 t2 = gen_reg_rtx (V8SImode);
22328 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22329 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22330 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22331 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22335 t1 = gen_reg_rtx (V8SFmode);
22336 t2 = gen_reg_rtx (V8SImode);
22337 mask = gen_lowpart (V4SImode, mask);
22338 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22339 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22340 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22341 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22345 t1 = gen_reg_rtx (V32QImode);
22346 t2 = gen_reg_rtx (V32QImode);
22347 t3 = gen_reg_rtx (V32QImode);
22348 vt2 = GEN_INT (-128);
22349 for (i = 0; i < 32; i++)
22351 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22352 vt = force_reg (V32QImode, vt);
22353 for (i = 0; i < 32; i++)
22354 vec[i] = i < 16 ? vt2 : const0_rtx;
22355 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22356 vt2 = force_reg (V32QImode, vt2);
22357 /* From mask create two adjusted masks, which contain the same
22358 bits as mask in the low 7 bits of each vector element.
22359 The first mask will have the most significant bit clear
22360 if it requests element from the same 128-bit lane
22361 and MSB set if it requests element from the other 128-bit lane.
22362 The second mask will have the opposite values of the MSB,
22363 and additionally will have its 128-bit lanes swapped.
22364 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22365 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22366 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22367 stands for other 12 bytes. */
22368 /* The bit whether element is from the same lane or the other
22369 lane is bit 4, so shift it up by 3 to the MSB position. */
22370 t5 = gen_reg_rtx (V4DImode);
22371 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22373 /* Clear MSB bits from the mask just in case it had them set. */
22374 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22375 /* After this t1 will have MSB set for elements from other lane. */
22376 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22377 /* Clear bits other than MSB. */
22378 emit_insn (gen_andv32qi3 (t1, t1, vt));
22379 /* Or in the lower bits from mask into t3. */
22380 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22381 /* And invert MSB bits in t1, so MSB is set for elements from the same
22383 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22384 /* Swap 128-bit lanes in t3. */
22385 t6 = gen_reg_rtx (V4DImode);
22386 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22387 const2_rtx, GEN_INT (3),
22388 const0_rtx, const1_rtx));
22389 /* And or in the lower bits from mask into t1. */
22390 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22391 if (one_operand_shuffle)
22393 /* Each of these shuffles will put 0s in places where
22394 element from the other 128-bit lane is needed, otherwise
22395 will shuffle in the requested value. */
22396 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22397 gen_lowpart (V32QImode, t6)));
22398 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22399 /* For t3 the 128-bit lanes are swapped again. */
22400 t7 = gen_reg_rtx (V4DImode);
22401 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22402 const2_rtx, GEN_INT (3),
22403 const0_rtx, const1_rtx));
22404 /* And oring both together leads to the result. */
22405 emit_insn (gen_iorv32qi3 (target, t1,
22406 gen_lowpart (V32QImode, t7)));
22407 if (target != operands[0])
22408 emit_move_insn (operands[0],
22409 gen_lowpart (GET_MODE (operands[0]), target));
22413 t4 = gen_reg_rtx (V32QImode);
22414 /* Similarly to the above one_operand_shuffle code,
22415 just for repeated twice for each operand. merge_two:
22416 code will merge the two results together. */
22417 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22418 gen_lowpart (V32QImode, t6)));
22419 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22420 gen_lowpart (V32QImode, t6)));
22421 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22422 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22423 t7 = gen_reg_rtx (V4DImode);
22424 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22425 const2_rtx, GEN_INT (3),
22426 const0_rtx, const1_rtx));
22427 t8 = gen_reg_rtx (V4DImode);
22428 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22429 const2_rtx, GEN_INT (3),
22430 const0_rtx, const1_rtx));
22431 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22432 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22438 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22445 /* The XOP VPPERM insn supports three inputs. By ignoring the
22446 one_operand_shuffle special case, we avoid creating another
22447 set of constant vectors in memory. */
22448 one_operand_shuffle = false;
22450 /* mask = mask & {2*w-1, ...} */
22451 vt = GEN_INT (2*w - 1);
22455 /* mask = mask & {w-1, ...} */
22456 vt = GEN_INT (w - 1);
22459 for (i = 0; i < w; i++)
22461 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22462 mask = expand_simple_binop (maskmode, AND, mask, vt,
22463 NULL_RTX, 0, OPTAB_DIRECT);
22465 /* For non-QImode operations, convert the word permutation control
22466 into a byte permutation control. */
22467 if (mode != V16QImode)
22469 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22470 GEN_INT (exact_log2 (e)),
22471 NULL_RTX, 0, OPTAB_DIRECT);
22473 /* Convert mask to vector of chars. */
22474 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22476 /* Replicate each of the input bytes into byte positions:
22477 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22478 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22479 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22480 for (i = 0; i < 16; ++i)
22481 vec[i] = GEN_INT (i/e * e);
22482 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22483 vt = validize_mem (force_const_mem (V16QImode, vt));
22485 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22487 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22489 /* Convert it into the byte positions by doing
22490 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22491 for (i = 0; i < 16; ++i)
22492 vec[i] = GEN_INT (i % e);
22493 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22494 vt = validize_mem (force_const_mem (V16QImode, vt));
22495 emit_insn (gen_addv16qi3 (mask, mask, vt));
22498 /* The actual shuffle operations all operate on V16QImode. */
22499 op0 = gen_lowpart (V16QImode, op0);
22500 op1 = gen_lowpart (V16QImode, op1);
22504 if (GET_MODE (target) != V16QImode)
22505 target = gen_reg_rtx (V16QImode);
22506 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22507 if (target != operands[0])
22508 emit_move_insn (operands[0],
22509 gen_lowpart (GET_MODE (operands[0]), target));
22511 else if (one_operand_shuffle)
22513 if (GET_MODE (target) != V16QImode)
22514 target = gen_reg_rtx (V16QImode);
22515 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22516 if (target != operands[0])
22517 emit_move_insn (operands[0],
22518 gen_lowpart (GET_MODE (operands[0]), target));
22525 /* Shuffle the two input vectors independently. */
22526 t1 = gen_reg_rtx (V16QImode);
22527 t2 = gen_reg_rtx (V16QImode);
22528 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22529 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22532 /* Then merge them together. The key is whether any given control
22533 element contained a bit set that indicates the second word. */
22534 mask = operands[3];
22536 if (maskmode == V2DImode && !TARGET_SSE4_1)
22538 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22539 more shuffle to convert the V2DI input mask into a V4SI
22540 input mask. At which point the masking that expand_int_vcond
22541 will work as desired. */
22542 rtx t3 = gen_reg_rtx (V4SImode);
22543 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22544 const0_rtx, const0_rtx,
22545 const2_rtx, const2_rtx));
22547 maskmode = V4SImode;
22551 for (i = 0; i < w; i++)
22553 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22554 vt = force_reg (maskmode, vt);
22555 mask = expand_simple_binop (maskmode, AND, mask, vt,
22556 NULL_RTX, 0, OPTAB_DIRECT);
22558 if (GET_MODE (target) != mode)
22559 target = gen_reg_rtx (mode);
22561 xops[1] = gen_lowpart (mode, t2);
22562 xops[2] = gen_lowpart (mode, t1);
22563 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22566 ok = ix86_expand_int_vcond (xops);
22568 if (target != operands[0])
22569 emit_move_insn (operands[0],
22570 gen_lowpart (GET_MODE (operands[0]), target));
22574 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22575 true if we should do zero extension, else sign extension. HIGH_P is
22576 true if we want the N/2 high elements, else the low elements. */
22579 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22581 machine_mode imode = GET_MODE (src);
22586 rtx (*unpack)(rtx, rtx);
22587 rtx (*extract)(rtx, rtx) = NULL;
22588 machine_mode halfmode = BLKmode;
22594 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22596 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22597 halfmode = V32QImode;
22599 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22603 unpack = gen_avx2_zero_extendv16qiv16hi2;
22605 unpack = gen_avx2_sign_extendv16qiv16hi2;
22606 halfmode = V16QImode;
22608 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22612 unpack = gen_avx512f_zero_extendv16hiv16si2;
22614 unpack = gen_avx512f_sign_extendv16hiv16si2;
22615 halfmode = V16HImode;
22617 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22621 unpack = gen_avx2_zero_extendv8hiv8si2;
22623 unpack = gen_avx2_sign_extendv8hiv8si2;
22624 halfmode = V8HImode;
22626 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22630 unpack = gen_avx512f_zero_extendv8siv8di2;
22632 unpack = gen_avx512f_sign_extendv8siv8di2;
22633 halfmode = V8SImode;
22635 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22639 unpack = gen_avx2_zero_extendv4siv4di2;
22641 unpack = gen_avx2_sign_extendv4siv4di2;
22642 halfmode = V4SImode;
22644 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22648 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22650 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22654 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22656 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22660 unpack = gen_sse4_1_zero_extendv2siv2di2;
22662 unpack = gen_sse4_1_sign_extendv2siv2di2;
22665 gcc_unreachable ();
22668 if (GET_MODE_SIZE (imode) >= 32)
22670 tmp = gen_reg_rtx (halfmode);
22671 emit_insn (extract (tmp, src));
22675 /* Shift higher 8 bytes to lower 8 bytes. */
22676 tmp = gen_reg_rtx (V1TImode);
22677 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22679 tmp = gen_lowpart (imode, tmp);
22684 emit_insn (unpack (dest, tmp));
22688 rtx (*unpack)(rtx, rtx, rtx);
22694 unpack = gen_vec_interleave_highv16qi;
22696 unpack = gen_vec_interleave_lowv16qi;
22700 unpack = gen_vec_interleave_highv8hi;
22702 unpack = gen_vec_interleave_lowv8hi;
22706 unpack = gen_vec_interleave_highv4si;
22708 unpack = gen_vec_interleave_lowv4si;
22711 gcc_unreachable ();
22715 tmp = force_reg (imode, CONST0_RTX (imode));
22717 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22718 src, pc_rtx, pc_rtx);
22720 rtx tmp2 = gen_reg_rtx (imode);
22721 emit_insn (unpack (tmp2, src, tmp));
22722 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22726 /* Expand conditional increment or decrement using adb/sbb instructions.
22727 The default case using setcc followed by the conditional move can be
22728 done by generic code. */
22730 ix86_expand_int_addcc (rtx operands[])
22732 enum rtx_code code = GET_CODE (operands[1]);
22734 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22736 rtx val = const0_rtx;
22737 bool fpcmp = false;
22739 rtx op0 = XEXP (operands[1], 0);
22740 rtx op1 = XEXP (operands[1], 1);
22742 if (operands[3] != const1_rtx
22743 && operands[3] != constm1_rtx)
22745 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22747 code = GET_CODE (compare_op);
22749 flags = XEXP (compare_op, 0);
22751 if (GET_MODE (flags) == CCFPmode
22752 || GET_MODE (flags) == CCFPUmode)
22755 code = ix86_fp_compare_code_to_integer (code);
22762 PUT_CODE (compare_op,
22763 reverse_condition_maybe_unordered
22764 (GET_CODE (compare_op)));
22766 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22769 mode = GET_MODE (operands[0]);
22771 /* Construct either adc or sbb insn. */
22772 if ((code == LTU) == (operands[3] == constm1_rtx))
22777 insn = gen_subqi3_carry;
22780 insn = gen_subhi3_carry;
22783 insn = gen_subsi3_carry;
22786 insn = gen_subdi3_carry;
22789 gcc_unreachable ();
22797 insn = gen_addqi3_carry;
22800 insn = gen_addhi3_carry;
22803 insn = gen_addsi3_carry;
22806 insn = gen_adddi3_carry;
22809 gcc_unreachable ();
22812 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22818 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22819 but works for floating pointer parameters and nonoffsetable memories.
22820 For pushes, it returns just stack offsets; the values will be saved
22821 in the right order. Maximally three parts are generated. */
22824 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22829 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22831 size = (GET_MODE_SIZE (mode) + 4) / 8;
22833 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22834 gcc_assert (size >= 2 && size <= 4);
22836 /* Optimize constant pool reference to immediates. This is used by fp
22837 moves, that force all constants to memory to allow combining. */
22838 if (MEM_P (operand) && MEM_READONLY_P (operand))
22840 rtx tmp = maybe_get_pool_constant (operand);
22845 if (MEM_P (operand) && !offsettable_memref_p (operand))
22847 /* The only non-offsetable memories we handle are pushes. */
22848 int ok = push_operand (operand, VOIDmode);
22852 operand = copy_rtx (operand);
22853 PUT_MODE (operand, word_mode);
22854 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22858 if (GET_CODE (operand) == CONST_VECTOR)
22860 machine_mode imode = int_mode_for_mode (mode);
22861 /* Caution: if we looked through a constant pool memory above,
22862 the operand may actually have a different mode now. That's
22863 ok, since we want to pun this all the way back to an integer. */
22864 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22865 gcc_assert (operand != NULL);
22871 if (mode == DImode)
22872 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22877 if (REG_P (operand))
22879 gcc_assert (reload_completed);
22880 for (i = 0; i < size; i++)
22881 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22883 else if (offsettable_memref_p (operand))
22885 operand = adjust_address (operand, SImode, 0);
22886 parts[0] = operand;
22887 for (i = 1; i < size; i++)
22888 parts[i] = adjust_address (operand, SImode, 4 * i);
22890 else if (GET_CODE (operand) == CONST_DOUBLE)
22895 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22899 real_to_target (l, &r, mode);
22900 parts[3] = gen_int_mode (l[3], SImode);
22901 parts[2] = gen_int_mode (l[2], SImode);
22904 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22905 long double may not be 80-bit. */
22906 real_to_target (l, &r, mode);
22907 parts[2] = gen_int_mode (l[2], SImode);
22910 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22913 gcc_unreachable ();
22915 parts[1] = gen_int_mode (l[1], SImode);
22916 parts[0] = gen_int_mode (l[0], SImode);
22919 gcc_unreachable ();
22924 if (mode == TImode)
22925 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22926 if (mode == XFmode || mode == TFmode)
22928 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22929 if (REG_P (operand))
22931 gcc_assert (reload_completed);
22932 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22933 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22935 else if (offsettable_memref_p (operand))
22937 operand = adjust_address (operand, DImode, 0);
22938 parts[0] = operand;
22939 parts[1] = adjust_address (operand, upper_mode, 8);
22941 else if (GET_CODE (operand) == CONST_DOUBLE)
22946 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22947 real_to_target (l, &r, mode);
22949 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22950 if (HOST_BITS_PER_WIDE_INT >= 64)
22953 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22954 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22957 parts[0] = immed_double_const (l[0], l[1], DImode);
22959 if (upper_mode == SImode)
22960 parts[1] = gen_int_mode (l[2], SImode);
22961 else if (HOST_BITS_PER_WIDE_INT >= 64)
22964 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22965 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22968 parts[1] = immed_double_const (l[2], l[3], DImode);
22971 gcc_unreachable ();
22978 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22979 Return false when normal moves are needed; true when all required
22980 insns have been emitted. Operands 2-4 contain the input values
22981 int the correct order; operands 5-7 contain the output values. */
22984 ix86_split_long_move (rtx operands[])
22989 int collisions = 0;
22990 machine_mode mode = GET_MODE (operands[0]);
22991 bool collisionparts[4];
22993 /* The DFmode expanders may ask us to move double.
22994 For 64bit target this is single move. By hiding the fact
22995 here we simplify i386.md splitters. */
22996 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22998 /* Optimize constant pool reference to immediates. This is used by
22999 fp moves, that force all constants to memory to allow combining. */
23001 if (MEM_P (operands[1])
23002 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
23003 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
23004 operands[1] = get_pool_constant (XEXP (operands[1], 0));
23005 if (push_operand (operands[0], VOIDmode))
23007 operands[0] = copy_rtx (operands[0]);
23008 PUT_MODE (operands[0], word_mode);
23011 operands[0] = gen_lowpart (DImode, operands[0]);
23012 operands[1] = gen_lowpart (DImode, operands[1]);
23013 emit_move_insn (operands[0], operands[1]);
23017 /* The only non-offsettable memory we handle is push. */
23018 if (push_operand (operands[0], VOIDmode))
23021 gcc_assert (!MEM_P (operands[0])
23022 || offsettable_memref_p (operands[0]));
23024 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
23025 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
23027 /* When emitting push, take care for source operands on the stack. */
23028 if (push && MEM_P (operands[1])
23029 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
23031 rtx src_base = XEXP (part[1][nparts - 1], 0);
23033 /* Compensate for the stack decrement by 4. */
23034 if (!TARGET_64BIT && nparts == 3
23035 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
23036 src_base = plus_constant (Pmode, src_base, 4);
23038 /* src_base refers to the stack pointer and is
23039 automatically decreased by emitted push. */
23040 for (i = 0; i < nparts; i++)
23041 part[1][i] = change_address (part[1][i],
23042 GET_MODE (part[1][i]), src_base);
23045 /* We need to do copy in the right order in case an address register
23046 of the source overlaps the destination. */
23047 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
23051 for (i = 0; i < nparts; i++)
23054 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
23055 if (collisionparts[i])
23059 /* Collision in the middle part can be handled by reordering. */
23060 if (collisions == 1 && nparts == 3 && collisionparts [1])
23062 std::swap (part[0][1], part[0][2]);
23063 std::swap (part[1][1], part[1][2]);
23065 else if (collisions == 1
23067 && (collisionparts [1] || collisionparts [2]))
23069 if (collisionparts [1])
23071 std::swap (part[0][1], part[0][2]);
23072 std::swap (part[1][1], part[1][2]);
23076 std::swap (part[0][2], part[0][3]);
23077 std::swap (part[1][2], part[1][3]);
23081 /* If there are more collisions, we can't handle it by reordering.
23082 Do an lea to the last part and use only one colliding move. */
23083 else if (collisions > 1)
23085 rtx base, addr, tls_base = NULL_RTX;
23089 base = part[0][nparts - 1];
23091 /* Handle the case when the last part isn't valid for lea.
23092 Happens in 64-bit mode storing the 12-byte XFmode. */
23093 if (GET_MODE (base) != Pmode)
23094 base = gen_rtx_REG (Pmode, REGNO (base));
23096 addr = XEXP (part[1][0], 0);
23097 if (TARGET_TLS_DIRECT_SEG_REFS)
23099 struct ix86_address parts;
23100 int ok = ix86_decompose_address (addr, &parts);
23102 if (parts.seg == DEFAULT_TLS_SEG_REG)
23104 /* It is not valid to use %gs: or %fs: in
23105 lea though, so we need to remove it from the
23106 address used for lea and add it to each individual
23107 memory loads instead. */
23108 addr = copy_rtx (addr);
23110 while (GET_CODE (*x) == PLUS)
23112 for (i = 0; i < 2; i++)
23114 rtx u = XEXP (*x, i);
23115 if (GET_CODE (u) == ZERO_EXTEND)
23117 if (GET_CODE (u) == UNSPEC
23118 && XINT (u, 1) == UNSPEC_TP)
23120 tls_base = XEXP (*x, i);
23121 *x = XEXP (*x, 1 - i);
23129 gcc_assert (tls_base);
23132 emit_insn (gen_rtx_SET (VOIDmode, base, addr));
23134 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23135 part[1][0] = replace_equiv_address (part[1][0], base);
23136 for (i = 1; i < nparts; i++)
23139 base = copy_rtx (base);
23140 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23141 part[1][i] = replace_equiv_address (part[1][i], tmp);
23152 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23153 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23154 stack_pointer_rtx, GEN_INT (-4)));
23155 emit_move_insn (part[0][2], part[1][2]);
23157 else if (nparts == 4)
23159 emit_move_insn (part[0][3], part[1][3]);
23160 emit_move_insn (part[0][2], part[1][2]);
23165 /* In 64bit mode we don't have 32bit push available. In case this is
23166 register, it is OK - we will just use larger counterpart. We also
23167 retype memory - these comes from attempt to avoid REX prefix on
23168 moving of second half of TFmode value. */
23169 if (GET_MODE (part[1][1]) == SImode)
23171 switch (GET_CODE (part[1][1]))
23174 part[1][1] = adjust_address (part[1][1], DImode, 0);
23178 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23182 gcc_unreachable ();
23185 if (GET_MODE (part[1][0]) == SImode)
23186 part[1][0] = part[1][1];
23189 emit_move_insn (part[0][1], part[1][1]);
23190 emit_move_insn (part[0][0], part[1][0]);
23194 /* Choose correct order to not overwrite the source before it is copied. */
23195 if ((REG_P (part[0][0])
23196 && REG_P (part[1][1])
23197 && (REGNO (part[0][0]) == REGNO (part[1][1])
23199 && REGNO (part[0][0]) == REGNO (part[1][2]))
23201 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23203 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23205 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23207 operands[2 + i] = part[0][j];
23208 operands[6 + i] = part[1][j];
23213 for (i = 0; i < nparts; i++)
23215 operands[2 + i] = part[0][i];
23216 operands[6 + i] = part[1][i];
23220 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23221 if (optimize_insn_for_size_p ())
23223 for (j = 0; j < nparts - 1; j++)
23224 if (CONST_INT_P (operands[6 + j])
23225 && operands[6 + j] != const0_rtx
23226 && REG_P (operands[2 + j]))
23227 for (i = j; i < nparts - 1; i++)
23228 if (CONST_INT_P (operands[7 + i])
23229 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23230 operands[7 + i] = operands[2 + j];
23233 for (i = 0; i < nparts; i++)
23234 emit_move_insn (operands[2 + i], operands[6 + i]);
23239 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23240 left shift by a constant, either using a single shift or
23241 a sequence of add instructions. */
23244 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23246 rtx (*insn)(rtx, rtx, rtx);
23249 || (count * ix86_cost->add <= ix86_cost->shift_const
23250 && !optimize_insn_for_size_p ()))
23252 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23253 while (count-- > 0)
23254 emit_insn (insn (operand, operand, operand));
23258 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23259 emit_insn (insn (operand, operand, GEN_INT (count)));
23264 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23266 rtx (*gen_ashl3)(rtx, rtx, rtx);
23267 rtx (*gen_shld)(rtx, rtx, rtx);
23268 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23270 rtx low[2], high[2];
23273 if (CONST_INT_P (operands[2]))
23275 split_double_mode (mode, operands, 2, low, high);
23276 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23278 if (count >= half_width)
23280 emit_move_insn (high[0], low[1]);
23281 emit_move_insn (low[0], const0_rtx);
23283 if (count > half_width)
23284 ix86_expand_ashl_const (high[0], count - half_width, mode);
23288 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23290 if (!rtx_equal_p (operands[0], operands[1]))
23291 emit_move_insn (operands[0], operands[1]);
23293 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23294 ix86_expand_ashl_const (low[0], count, mode);
23299 split_double_mode (mode, operands, 1, low, high);
23301 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23303 if (operands[1] == const1_rtx)
23305 /* Assuming we've chosen a QImode capable registers, then 1 << N
23306 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23307 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23309 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23311 ix86_expand_clear (low[0]);
23312 ix86_expand_clear (high[0]);
23313 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23315 d = gen_lowpart (QImode, low[0]);
23316 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23317 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23318 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23320 d = gen_lowpart (QImode, high[0]);
23321 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23322 s = gen_rtx_NE (QImode, flags, const0_rtx);
23323 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23326 /* Otherwise, we can get the same results by manually performing
23327 a bit extract operation on bit 5/6, and then performing the two
23328 shifts. The two methods of getting 0/1 into low/high are exactly
23329 the same size. Avoiding the shift in the bit extract case helps
23330 pentium4 a bit; no one else seems to care much either way. */
23333 machine_mode half_mode;
23334 rtx (*gen_lshr3)(rtx, rtx, rtx);
23335 rtx (*gen_and3)(rtx, rtx, rtx);
23336 rtx (*gen_xor3)(rtx, rtx, rtx);
23337 HOST_WIDE_INT bits;
23340 if (mode == DImode)
23342 half_mode = SImode;
23343 gen_lshr3 = gen_lshrsi3;
23344 gen_and3 = gen_andsi3;
23345 gen_xor3 = gen_xorsi3;
23350 half_mode = DImode;
23351 gen_lshr3 = gen_lshrdi3;
23352 gen_and3 = gen_anddi3;
23353 gen_xor3 = gen_xordi3;
23357 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23358 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23360 x = gen_lowpart (half_mode, operands[2]);
23361 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23363 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23364 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23365 emit_move_insn (low[0], high[0]);
23366 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23369 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23370 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23374 if (operands[1] == constm1_rtx)
23376 /* For -1 << N, we can avoid the shld instruction, because we
23377 know that we're shifting 0...31/63 ones into a -1. */
23378 emit_move_insn (low[0], constm1_rtx);
23379 if (optimize_insn_for_size_p ())
23380 emit_move_insn (high[0], low[0]);
23382 emit_move_insn (high[0], constm1_rtx);
23386 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23388 if (!rtx_equal_p (operands[0], operands[1]))
23389 emit_move_insn (operands[0], operands[1]);
23391 split_double_mode (mode, operands, 1, low, high);
23392 emit_insn (gen_shld (high[0], low[0], operands[2]));
23395 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23397 if (TARGET_CMOVE && scratch)
23399 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23400 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23402 ix86_expand_clear (scratch);
23403 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23407 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23408 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23410 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23415 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23417 rtx (*gen_ashr3)(rtx, rtx, rtx)
23418 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23419 rtx (*gen_shrd)(rtx, rtx, rtx);
23420 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23422 rtx low[2], high[2];
23425 if (CONST_INT_P (operands[2]))
23427 split_double_mode (mode, operands, 2, low, high);
23428 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23430 if (count == GET_MODE_BITSIZE (mode) - 1)
23432 emit_move_insn (high[0], high[1]);
23433 emit_insn (gen_ashr3 (high[0], high[0],
23434 GEN_INT (half_width - 1)));
23435 emit_move_insn (low[0], high[0]);
23438 else if (count >= half_width)
23440 emit_move_insn (low[0], high[1]);
23441 emit_move_insn (high[0], low[0]);
23442 emit_insn (gen_ashr3 (high[0], high[0],
23443 GEN_INT (half_width - 1)));
23445 if (count > half_width)
23446 emit_insn (gen_ashr3 (low[0], low[0],
23447 GEN_INT (count - half_width)));
23451 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23453 if (!rtx_equal_p (operands[0], operands[1]))
23454 emit_move_insn (operands[0], operands[1]);
23456 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23457 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23462 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23464 if (!rtx_equal_p (operands[0], operands[1]))
23465 emit_move_insn (operands[0], operands[1]);
23467 split_double_mode (mode, operands, 1, low, high);
23469 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23470 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23472 if (TARGET_CMOVE && scratch)
23474 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23475 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23477 emit_move_insn (scratch, high[0]);
23478 emit_insn (gen_ashr3 (scratch, scratch,
23479 GEN_INT (half_width - 1)));
23480 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23485 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23486 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23488 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23494 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23496 rtx (*gen_lshr3)(rtx, rtx, rtx)
23497 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23498 rtx (*gen_shrd)(rtx, rtx, rtx);
23499 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23501 rtx low[2], high[2];
23504 if (CONST_INT_P (operands[2]))
23506 split_double_mode (mode, operands, 2, low, high);
23507 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23509 if (count >= half_width)
23511 emit_move_insn (low[0], high[1]);
23512 ix86_expand_clear (high[0]);
23514 if (count > half_width)
23515 emit_insn (gen_lshr3 (low[0], low[0],
23516 GEN_INT (count - half_width)));
23520 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23522 if (!rtx_equal_p (operands[0], operands[1]))
23523 emit_move_insn (operands[0], operands[1]);
23525 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23526 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23531 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23533 if (!rtx_equal_p (operands[0], operands[1]))
23534 emit_move_insn (operands[0], operands[1]);
23536 split_double_mode (mode, operands, 1, low, high);
23538 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23539 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23541 if (TARGET_CMOVE && scratch)
23543 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23544 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23546 ix86_expand_clear (scratch);
23547 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23552 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23553 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23555 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23560 /* Predict just emitted jump instruction to be taken with probability PROB. */
23562 predict_jump (int prob)
23564 rtx insn = get_last_insn ();
23565 gcc_assert (JUMP_P (insn));
23566 add_int_reg_note (insn, REG_BR_PROB, prob);
23569 /* Helper function for the string operations below. Dest VARIABLE whether
23570 it is aligned to VALUE bytes. If true, jump to the label. */
23571 static rtx_code_label *
23572 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23574 rtx_code_label *label = gen_label_rtx ();
23575 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23576 if (GET_MODE (variable) == DImode)
23577 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23579 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23580 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23583 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23585 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23589 /* Adjust COUNTER by the VALUE. */
23591 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23593 rtx (*gen_add)(rtx, rtx, rtx)
23594 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23596 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23599 /* Zero extend possibly SImode EXP to Pmode register. */
23601 ix86_zero_extend_to_Pmode (rtx exp)
23603 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23606 /* Divide COUNTREG by SCALE. */
23608 scale_counter (rtx countreg, int scale)
23614 if (CONST_INT_P (countreg))
23615 return GEN_INT (INTVAL (countreg) / scale);
23616 gcc_assert (REG_P (countreg));
23618 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23619 GEN_INT (exact_log2 (scale)),
23620 NULL, 1, OPTAB_DIRECT);
23624 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23625 DImode for constant loop counts. */
23627 static machine_mode
23628 counter_mode (rtx count_exp)
23630 if (GET_MODE (count_exp) != VOIDmode)
23631 return GET_MODE (count_exp);
23632 if (!CONST_INT_P (count_exp))
23634 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23639 /* Copy the address to a Pmode register. This is used for x32 to
23640 truncate DImode TLS address to a SImode register. */
23643 ix86_copy_addr_to_reg (rtx addr)
23646 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23648 reg = copy_addr_to_reg (addr);
23649 REG_POINTER (reg) = 1;
23654 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23655 reg = copy_to_mode_reg (DImode, addr);
23656 REG_POINTER (reg) = 1;
23657 return gen_rtx_SUBREG (SImode, reg, 0);
23661 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23662 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23663 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23664 memory by VALUE (supposed to be in MODE).
23666 The size is rounded down to whole number of chunk size moved at once.
23667 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23671 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23672 rtx destptr, rtx srcptr, rtx value,
23673 rtx count, machine_mode mode, int unroll,
23674 int expected_size, bool issetmem)
23676 rtx_code_label *out_label, *top_label;
23678 machine_mode iter_mode = counter_mode (count);
23679 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23680 rtx piece_size = GEN_INT (piece_size_n);
23681 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23685 top_label = gen_label_rtx ();
23686 out_label = gen_label_rtx ();
23687 iter = gen_reg_rtx (iter_mode);
23689 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23690 NULL, 1, OPTAB_DIRECT);
23691 /* Those two should combine. */
23692 if (piece_size == const1_rtx)
23694 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23696 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23698 emit_move_insn (iter, const0_rtx);
23700 emit_label (top_label);
23702 tmp = convert_modes (Pmode, iter_mode, iter, true);
23704 /* This assert could be relaxed - in this case we'll need to compute
23705 smallest power of two, containing in PIECE_SIZE_N and pass it to
23707 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23708 destmem = offset_address (destmem, tmp, piece_size_n);
23709 destmem = adjust_address (destmem, mode, 0);
23713 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23714 srcmem = adjust_address (srcmem, mode, 0);
23716 /* When unrolling for chips that reorder memory reads and writes,
23717 we can save registers by using single temporary.
23718 Also using 4 temporaries is overkill in 32bit mode. */
23719 if (!TARGET_64BIT && 0)
23721 for (i = 0; i < unroll; i++)
23726 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23728 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23730 emit_move_insn (destmem, srcmem);
23736 gcc_assert (unroll <= 4);
23737 for (i = 0; i < unroll; i++)
23739 tmpreg[i] = gen_reg_rtx (mode);
23743 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23745 emit_move_insn (tmpreg[i], srcmem);
23747 for (i = 0; i < unroll; i++)
23752 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23754 emit_move_insn (destmem, tmpreg[i]);
23759 for (i = 0; i < unroll; i++)
23763 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23764 emit_move_insn (destmem, value);
23767 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23768 true, OPTAB_LIB_WIDEN);
23770 emit_move_insn (iter, tmp);
23772 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23774 if (expected_size != -1)
23776 expected_size /= GET_MODE_SIZE (mode) * unroll;
23777 if (expected_size == 0)
23779 else if (expected_size > REG_BR_PROB_BASE)
23780 predict_jump (REG_BR_PROB_BASE - 1);
23782 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23785 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23786 iter = ix86_zero_extend_to_Pmode (iter);
23787 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23788 true, OPTAB_LIB_WIDEN);
23789 if (tmp != destptr)
23790 emit_move_insn (destptr, tmp);
23793 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23794 true, OPTAB_LIB_WIDEN);
23796 emit_move_insn (srcptr, tmp);
23798 emit_label (out_label);
23801 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23802 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23803 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23804 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23805 ORIG_VALUE is the original value passed to memset to fill the memory with.
23806 Other arguments have same meaning as for previous function. */
23809 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23810 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23812 machine_mode mode, bool issetmem)
23817 HOST_WIDE_INT rounded_count;
23819 /* If possible, it is shorter to use rep movs.
23820 TODO: Maybe it is better to move this logic to decide_alg. */
23821 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23822 && (!issetmem || orig_value == const0_rtx))
23825 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23826 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23828 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23829 GET_MODE_SIZE (mode)));
23830 if (mode != QImode)
23832 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23833 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23834 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23837 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23838 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23840 rounded_count = (INTVAL (count)
23841 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23842 destmem = shallow_copy_rtx (destmem);
23843 set_mem_size (destmem, rounded_count);
23845 else if (MEM_SIZE_KNOWN_P (destmem))
23846 clear_mem_size (destmem);
23850 value = force_reg (mode, gen_lowpart (mode, value));
23851 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23855 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23856 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23857 if (mode != QImode)
23859 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23860 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23861 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23864 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23865 if (CONST_INT_P (count))
23867 rounded_count = (INTVAL (count)
23868 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23869 srcmem = shallow_copy_rtx (srcmem);
23870 set_mem_size (srcmem, rounded_count);
23874 if (MEM_SIZE_KNOWN_P (srcmem))
23875 clear_mem_size (srcmem);
23877 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23882 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23884 SRC is passed by pointer to be updated on return.
23885 Return value is updated DST. */
23887 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23888 HOST_WIDE_INT size_to_move)
23890 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23891 enum insn_code code;
23892 machine_mode move_mode;
23895 /* Find the widest mode in which we could perform moves.
23896 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23897 it until move of such size is supported. */
23898 piece_size = 1 << floor_log2 (size_to_move);
23899 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23900 code = optab_handler (mov_optab, move_mode);
23901 while (code == CODE_FOR_nothing && piece_size > 1)
23904 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23905 code = optab_handler (mov_optab, move_mode);
23908 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23909 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23910 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23912 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23913 move_mode = mode_for_vector (word_mode, nunits);
23914 code = optab_handler (mov_optab, move_mode);
23915 if (code == CODE_FOR_nothing)
23917 move_mode = word_mode;
23918 piece_size = GET_MODE_SIZE (move_mode);
23919 code = optab_handler (mov_optab, move_mode);
23922 gcc_assert (code != CODE_FOR_nothing);
23924 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23925 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23927 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23928 gcc_assert (size_to_move % piece_size == 0);
23929 adjust = GEN_INT (piece_size);
23930 for (i = 0; i < size_to_move; i += piece_size)
23932 /* We move from memory to memory, so we'll need to do it via
23933 a temporary register. */
23934 tempreg = gen_reg_rtx (move_mode);
23935 emit_insn (GEN_FCN (code) (tempreg, src));
23936 emit_insn (GEN_FCN (code) (dst, tempreg));
23938 emit_move_insn (destptr,
23939 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23940 emit_move_insn (srcptr,
23941 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23943 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23945 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23949 /* Update DST and SRC rtx. */
23954 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23956 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23957 rtx destptr, rtx srcptr, rtx count, int max_size)
23960 if (CONST_INT_P (count))
23962 HOST_WIDE_INT countval = INTVAL (count);
23963 HOST_WIDE_INT epilogue_size = countval % max_size;
23966 /* For now MAX_SIZE should be a power of 2. This assert could be
23967 relaxed, but it'll require a bit more complicated epilogue
23969 gcc_assert ((max_size & (max_size - 1)) == 0);
23970 for (i = max_size; i >= 1; i >>= 1)
23972 if (epilogue_size & i)
23973 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23979 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23980 count, 1, OPTAB_DIRECT);
23981 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23982 count, QImode, 1, 4, false);
23986 /* When there are stringops, we can cheaply increase dest and src pointers.
23987 Otherwise we save code size by maintaining offset (zero is readily
23988 available from preceding rep operation) and using x86 addressing modes.
23990 if (TARGET_SINGLE_STRINGOP)
23994 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23995 src = change_address (srcmem, SImode, srcptr);
23996 dest = change_address (destmem, SImode, destptr);
23997 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23998 emit_label (label);
23999 LABEL_NUSES (label) = 1;
24003 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24004 src = change_address (srcmem, HImode, srcptr);
24005 dest = change_address (destmem, HImode, destptr);
24006 emit_insn (gen_strmov (destptr, dest, srcptr, src));
24007 emit_label (label);
24008 LABEL_NUSES (label) = 1;
24012 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24013 src = change_address (srcmem, QImode, srcptr);
24014 dest = change_address (destmem, QImode, destptr);
24015 emit_insn (gen_strmov (destptr, dest, srcptr, src));
24016 emit_label (label);
24017 LABEL_NUSES (label) = 1;
24022 rtx offset = force_reg (Pmode, const0_rtx);
24027 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24028 src = change_address (srcmem, SImode, srcptr);
24029 dest = change_address (destmem, SImode, destptr);
24030 emit_move_insn (dest, src);
24031 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
24032 true, OPTAB_LIB_WIDEN);
24034 emit_move_insn (offset, tmp);
24035 emit_label (label);
24036 LABEL_NUSES (label) = 1;
24040 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24041 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
24042 src = change_address (srcmem, HImode, tmp);
24043 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
24044 dest = change_address (destmem, HImode, tmp);
24045 emit_move_insn (dest, src);
24046 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
24047 true, OPTAB_LIB_WIDEN);
24049 emit_move_insn (offset, tmp);
24050 emit_label (label);
24051 LABEL_NUSES (label) = 1;
24055 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24056 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
24057 src = change_address (srcmem, QImode, tmp);
24058 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
24059 dest = change_address (destmem, QImode, tmp);
24060 emit_move_insn (dest, src);
24061 emit_label (label);
24062 LABEL_NUSES (label) = 1;
24067 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
24068 with value PROMOTED_VAL.
24069 SRC is passed by pointer to be updated on return.
24070 Return value is updated DST. */
24072 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
24073 HOST_WIDE_INT size_to_move)
24075 rtx dst = destmem, adjust;
24076 enum insn_code code;
24077 machine_mode move_mode;
24080 /* Find the widest mode in which we could perform moves.
24081 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
24082 it until move of such size is supported. */
24083 move_mode = GET_MODE (promoted_val);
24084 if (move_mode == VOIDmode)
24085 move_mode = QImode;
24086 if (size_to_move < GET_MODE_SIZE (move_mode))
24088 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
24089 promoted_val = gen_lowpart (move_mode, promoted_val);
24091 piece_size = GET_MODE_SIZE (move_mode);
24092 code = optab_handler (mov_optab, move_mode);
24093 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
24095 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
24097 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
24098 gcc_assert (size_to_move % piece_size == 0);
24099 adjust = GEN_INT (piece_size);
24100 for (i = 0; i < size_to_move; i += piece_size)
24102 if (piece_size <= GET_MODE_SIZE (word_mode))
24104 emit_insn (gen_strset (destptr, dst, promoted_val));
24105 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24110 emit_insn (GEN_FCN (code) (dst, promoted_val));
24112 emit_move_insn (destptr,
24113 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24115 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24119 /* Update DST rtx. */
24122 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24124 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24125 rtx count, int max_size)
24128 expand_simple_binop (counter_mode (count), AND, count,
24129 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24130 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24131 gen_lowpart (QImode, value), count, QImode,
24132 1, max_size / 2, true);
24135 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24137 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24138 rtx count, int max_size)
24142 if (CONST_INT_P (count))
24144 HOST_WIDE_INT countval = INTVAL (count);
24145 HOST_WIDE_INT epilogue_size = countval % max_size;
24148 /* For now MAX_SIZE should be a power of 2. This assert could be
24149 relaxed, but it'll require a bit more complicated epilogue
24151 gcc_assert ((max_size & (max_size - 1)) == 0);
24152 for (i = max_size; i >= 1; i >>= 1)
24154 if (epilogue_size & i)
24156 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24157 destmem = emit_memset (destmem, destptr, vec_value, i);
24159 destmem = emit_memset (destmem, destptr, value, i);
24166 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24171 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24174 dest = change_address (destmem, DImode, destptr);
24175 emit_insn (gen_strset (destptr, dest, value));
24176 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24177 emit_insn (gen_strset (destptr, dest, value));
24181 dest = change_address (destmem, SImode, destptr);
24182 emit_insn (gen_strset (destptr, dest, value));
24183 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24184 emit_insn (gen_strset (destptr, dest, value));
24185 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24186 emit_insn (gen_strset (destptr, dest, value));
24187 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24188 emit_insn (gen_strset (destptr, dest, value));
24190 emit_label (label);
24191 LABEL_NUSES (label) = 1;
24195 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24198 dest = change_address (destmem, DImode, destptr);
24199 emit_insn (gen_strset (destptr, dest, value));
24203 dest = change_address (destmem, SImode, destptr);
24204 emit_insn (gen_strset (destptr, dest, value));
24205 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24206 emit_insn (gen_strset (destptr, dest, value));
24208 emit_label (label);
24209 LABEL_NUSES (label) = 1;
24213 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24214 dest = change_address (destmem, SImode, destptr);
24215 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24216 emit_label (label);
24217 LABEL_NUSES (label) = 1;
24221 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24222 dest = change_address (destmem, HImode, destptr);
24223 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24224 emit_label (label);
24225 LABEL_NUSES (label) = 1;
24229 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24230 dest = change_address (destmem, QImode, destptr);
24231 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24232 emit_label (label);
24233 LABEL_NUSES (label) = 1;
24237 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24238 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24239 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24241 Return value is updated DESTMEM. */
24243 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24244 rtx destptr, rtx srcptr, rtx value,
24245 rtx vec_value, rtx count, int align,
24246 int desired_alignment, bool issetmem)
24249 for (i = 1; i < desired_alignment; i <<= 1)
24253 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24256 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24257 destmem = emit_memset (destmem, destptr, vec_value, i);
24259 destmem = emit_memset (destmem, destptr, value, i);
24262 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24263 ix86_adjust_counter (count, i);
24264 emit_label (label);
24265 LABEL_NUSES (label) = 1;
24266 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24272 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24273 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24274 and jump to DONE_LABEL. */
24276 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24277 rtx destptr, rtx srcptr,
24278 rtx value, rtx vec_value,
24279 rtx count, int size,
24280 rtx done_label, bool issetmem)
24282 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24283 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24287 /* If we do not have vector value to copy, we must reduce size. */
24292 if (GET_MODE (value) == VOIDmode && size > 8)
24294 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24295 mode = GET_MODE (value);
24298 mode = GET_MODE (vec_value), value = vec_value;
24302 /* Choose appropriate vector mode. */
24304 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24305 else if (size >= 16)
24306 mode = TARGET_SSE ? V16QImode : DImode;
24307 srcmem = change_address (srcmem, mode, srcptr);
24309 destmem = change_address (destmem, mode, destptr);
24310 modesize = GEN_INT (GET_MODE_SIZE (mode));
24311 gcc_assert (GET_MODE_SIZE (mode) <= size);
24312 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24315 emit_move_insn (destmem, gen_lowpart (mode, value));
24318 emit_move_insn (destmem, srcmem);
24319 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24321 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24324 destmem = offset_address (destmem, count, 1);
24325 destmem = offset_address (destmem, GEN_INT (-2 * size),
24326 GET_MODE_SIZE (mode));
24329 srcmem = offset_address (srcmem, count, 1);
24330 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24331 GET_MODE_SIZE (mode));
24333 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24336 emit_move_insn (destmem, gen_lowpart (mode, value));
24339 emit_move_insn (destmem, srcmem);
24340 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24342 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24344 emit_jump_insn (gen_jump (done_label));
24347 emit_label (label);
24348 LABEL_NUSES (label) = 1;
24351 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24352 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24353 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24354 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24355 DONE_LABEL is a label after the whole copying sequence. The label is created
24356 on demand if *DONE_LABEL is NULL.
24357 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24358 bounds after the initial copies.
24360 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24361 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24362 we will dispatch to a library call for large blocks.
24364 In pseudocode we do:
24368 Assume that SIZE is 4. Bigger sizes are handled analogously
24371 copy 4 bytes from SRCPTR to DESTPTR
24372 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24377 copy 1 byte from SRCPTR to DESTPTR
24380 copy 2 bytes from SRCPTR to DESTPTR
24381 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24386 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24387 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24389 OLD_DESPTR = DESTPTR;
24390 Align DESTPTR up to DESIRED_ALIGN
24391 SRCPTR += DESTPTR - OLD_DESTPTR
24392 COUNT -= DEST_PTR - OLD_DESTPTR
24394 Round COUNT down to multiple of SIZE
24395 << optional caller supplied zero size guard is here >>
24396 << optional caller supplied dynamic check is here >>
24397 << caller supplied main copy loop is here >>
24402 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24403 rtx *destptr, rtx *srcptr,
24405 rtx value, rtx vec_value,
24407 rtx_code_label **done_label,
24411 unsigned HOST_WIDE_INT *min_size,
24412 bool dynamic_check,
24415 rtx_code_label *loop_label = NULL, *label;
24418 int prolog_size = 0;
24421 /* Chose proper value to copy. */
24422 if (issetmem && VECTOR_MODE_P (mode))
24423 mode_value = vec_value;
24425 mode_value = value;
24426 gcc_assert (GET_MODE_SIZE (mode) <= size);
24428 /* See if block is big or small, handle small blocks. */
24429 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24432 loop_label = gen_label_rtx ();
24435 *done_label = gen_label_rtx ();
24437 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24441 /* Handle sizes > 3. */
24442 for (;size2 > 2; size2 >>= 1)
24443 expand_small_movmem_or_setmem (destmem, srcmem,
24447 size2, *done_label, issetmem);
24448 /* Nothing to copy? Jump to DONE_LABEL if so */
24449 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24452 /* Do a byte copy. */
24453 destmem = change_address (destmem, QImode, *destptr);
24455 emit_move_insn (destmem, gen_lowpart (QImode, value));
24458 srcmem = change_address (srcmem, QImode, *srcptr);
24459 emit_move_insn (destmem, srcmem);
24462 /* Handle sizes 2 and 3. */
24463 label = ix86_expand_aligntest (*count, 2, false);
24464 destmem = change_address (destmem, HImode, *destptr);
24465 destmem = offset_address (destmem, *count, 1);
24466 destmem = offset_address (destmem, GEN_INT (-2), 2);
24468 emit_move_insn (destmem, gen_lowpart (HImode, value));
24471 srcmem = change_address (srcmem, HImode, *srcptr);
24472 srcmem = offset_address (srcmem, *count, 1);
24473 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24474 emit_move_insn (destmem, srcmem);
24477 emit_label (label);
24478 LABEL_NUSES (label) = 1;
24479 emit_jump_insn (gen_jump (*done_label));
24483 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24484 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24486 /* Start memcpy for COUNT >= SIZE. */
24489 emit_label (loop_label);
24490 LABEL_NUSES (loop_label) = 1;
24493 /* Copy first desired_align bytes. */
24495 srcmem = change_address (srcmem, mode, *srcptr);
24496 destmem = change_address (destmem, mode, *destptr);
24497 modesize = GEN_INT (GET_MODE_SIZE (mode));
24498 for (n = 0; prolog_size < desired_align - align; n++)
24501 emit_move_insn (destmem, mode_value);
24504 emit_move_insn (destmem, srcmem);
24505 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24507 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24508 prolog_size += GET_MODE_SIZE (mode);
24512 /* Copy last SIZE bytes. */
24513 destmem = offset_address (destmem, *count, 1);
24514 destmem = offset_address (destmem,
24515 GEN_INT (-size - prolog_size),
24518 emit_move_insn (destmem, mode_value);
24521 srcmem = offset_address (srcmem, *count, 1);
24522 srcmem = offset_address (srcmem,
24523 GEN_INT (-size - prolog_size),
24525 emit_move_insn (destmem, srcmem);
24527 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24529 destmem = offset_address (destmem, modesize, 1);
24531 emit_move_insn (destmem, mode_value);
24534 srcmem = offset_address (srcmem, modesize, 1);
24535 emit_move_insn (destmem, srcmem);
24539 /* Align destination. */
24540 if (desired_align > 1 && desired_align > align)
24542 rtx saveddest = *destptr;
24544 gcc_assert (desired_align <= size);
24545 /* Align destptr up, place it to new register. */
24546 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24547 GEN_INT (prolog_size),
24548 NULL_RTX, 1, OPTAB_DIRECT);
24549 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24550 REG_POINTER (*destptr) = 1;
24551 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24552 GEN_INT (-desired_align),
24553 *destptr, 1, OPTAB_DIRECT);
24554 /* See how many bytes we skipped. */
24555 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24557 saveddest, 1, OPTAB_DIRECT);
24558 /* Adjust srcptr and count. */
24560 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24561 saveddest, *srcptr, 1, OPTAB_DIRECT);
24562 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24563 saveddest, *count, 1, OPTAB_DIRECT);
24564 /* We copied at most size + prolog_size. */
24565 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24566 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24570 /* Our loops always round down the block size, but for dispatch to
24571 library we need precise value. */
24573 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24574 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24578 gcc_assert (prolog_size == 0);
24579 /* Decrease count, so we won't end up copying last word twice. */
24580 if (!CONST_INT_P (*count))
24581 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24582 constm1_rtx, *count, 1, OPTAB_DIRECT);
24584 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24586 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24591 /* This function is like the previous one, except here we know how many bytes
24592 need to be copied. That allows us to update alignment not only of DST, which
24593 is returned, but also of SRC, which is passed as a pointer for that
24596 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24597 rtx srcreg, rtx value, rtx vec_value,
24598 int desired_align, int align_bytes,
24602 rtx orig_dst = dst;
24603 rtx orig_src = NULL;
24604 int piece_size = 1;
24605 int copied_bytes = 0;
24609 gcc_assert (srcp != NULL);
24614 for (piece_size = 1;
24615 piece_size <= desired_align && copied_bytes < align_bytes;
24618 if (align_bytes & piece_size)
24622 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24623 dst = emit_memset (dst, destreg, vec_value, piece_size);
24625 dst = emit_memset (dst, destreg, value, piece_size);
24628 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24629 copied_bytes += piece_size;
24632 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24633 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24634 if (MEM_SIZE_KNOWN_P (orig_dst))
24635 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24639 int src_align_bytes = get_mem_align_offset (src, desired_align
24641 if (src_align_bytes >= 0)
24642 src_align_bytes = desired_align - src_align_bytes;
24643 if (src_align_bytes >= 0)
24645 unsigned int src_align;
24646 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24648 if ((src_align_bytes & (src_align - 1))
24649 == (align_bytes & (src_align - 1)))
24652 if (src_align > (unsigned int) desired_align)
24653 src_align = desired_align;
24654 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24655 set_mem_align (src, src_align * BITS_PER_UNIT);
24657 if (MEM_SIZE_KNOWN_P (orig_src))
24658 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24665 /* Return true if ALG can be used in current context.
24666 Assume we expand memset if MEMSET is true. */
24668 alg_usable_p (enum stringop_alg alg, bool memset)
24670 if (alg == no_stringop)
24672 if (alg == vector_loop)
24673 return TARGET_SSE || TARGET_AVX;
24674 /* Algorithms using the rep prefix want at least edi and ecx;
24675 additionally, memset wants eax and memcpy wants esi. Don't
24676 consider such algorithms if the user has appropriated those
24677 registers for their own purposes. */
24678 if (alg == rep_prefix_1_byte
24679 || alg == rep_prefix_4_byte
24680 || alg == rep_prefix_8_byte)
24681 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24682 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24686 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24687 static enum stringop_alg
24688 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24689 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24690 bool memset, bool zero_memset, int *dynamic_check, bool *noalign,
24693 const struct stringop_algs *algs;
24694 bool optimize_for_speed;
24696 const struct processor_costs *cost;
24698 bool any_alg_usable_p = false;
24701 *dynamic_check = -1;
24703 /* Even if the string operation call is cold, we still might spend a lot
24704 of time processing large blocks. */
24705 if (optimize_function_for_size_p (cfun)
24706 || (optimize_insn_for_size_p ()
24708 || (expected_size != -1 && expected_size < 256))))
24709 optimize_for_speed = false;
24711 optimize_for_speed = true;
24713 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24715 algs = &cost->memset[TARGET_64BIT != 0];
24717 algs = &cost->memcpy[TARGET_64BIT != 0];
24719 /* See maximal size for user defined algorithm. */
24720 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24722 enum stringop_alg candidate = algs->size[i].alg;
24723 bool usable = alg_usable_p (candidate, memset);
24724 any_alg_usable_p |= usable;
24726 if (candidate != libcall && candidate && usable)
24727 max = algs->size[i].max;
24730 /* If expected size is not known but max size is small enough
24731 so inline version is a win, set expected size into
24733 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24734 && expected_size == -1)
24735 expected_size = min_size / 2 + max_size / 2;
24737 /* If user specified the algorithm, honor it if possible. */
24738 if (ix86_stringop_alg != no_stringop
24739 && alg_usable_p (ix86_stringop_alg, memset))
24740 return ix86_stringop_alg;
24741 /* rep; movq or rep; movl is the smallest variant. */
24742 else if (!optimize_for_speed)
24745 if (!count || (count & 3) || (memset && !zero_memset))
24746 return alg_usable_p (rep_prefix_1_byte, memset)
24747 ? rep_prefix_1_byte : loop_1_byte;
24749 return alg_usable_p (rep_prefix_4_byte, memset)
24750 ? rep_prefix_4_byte : loop;
24752 /* Very tiny blocks are best handled via the loop, REP is expensive to
24754 else if (expected_size != -1 && expected_size < 4)
24755 return loop_1_byte;
24756 else if (expected_size != -1)
24758 enum stringop_alg alg = libcall;
24759 bool alg_noalign = false;
24760 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24762 /* We get here if the algorithms that were not libcall-based
24763 were rep-prefix based and we are unable to use rep prefixes
24764 based on global register usage. Break out of the loop and
24765 use the heuristic below. */
24766 if (algs->size[i].max == 0)
24768 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24770 enum stringop_alg candidate = algs->size[i].alg;
24772 if (candidate != libcall && alg_usable_p (candidate, memset))
24775 alg_noalign = algs->size[i].noalign;
24777 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24778 last non-libcall inline algorithm. */
24779 if (TARGET_INLINE_ALL_STRINGOPS)
24781 /* When the current size is best to be copied by a libcall,
24782 but we are still forced to inline, run the heuristic below
24783 that will pick code for medium sized blocks. */
24784 if (alg != libcall)
24786 *noalign = alg_noalign;
24789 else if (!any_alg_usable_p)
24792 else if (alg_usable_p (candidate, memset))
24794 *noalign = algs->size[i].noalign;
24800 /* When asked to inline the call anyway, try to pick meaningful choice.
24801 We look for maximal size of block that is faster to copy by hand and
24802 take blocks of at most of that size guessing that average size will
24803 be roughly half of the block.
24805 If this turns out to be bad, we might simply specify the preferred
24806 choice in ix86_costs. */
24807 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24808 && (algs->unknown_size == libcall
24809 || !alg_usable_p (algs->unknown_size, memset)))
24811 enum stringop_alg alg;
24812 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
24814 /* If there aren't any usable algorithms or if recursing already,
24815 then recursing on smaller sizes or same size isn't going to
24816 find anything. Just return the simple byte-at-a-time copy loop. */
24817 if (!any_alg_usable_p || recur)
24819 /* Pick something reasonable. */
24820 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
24821 *dynamic_check = 128;
24822 return loop_1_byte;
24824 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
24825 zero_memset, dynamic_check, noalign, true);
24826 gcc_assert (*dynamic_check == -1);
24827 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24828 *dynamic_check = max;
24830 gcc_assert (alg != libcall);
24833 return (alg_usable_p (algs->unknown_size, memset)
24834 ? algs->unknown_size : libcall);
24837 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24838 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24840 decide_alignment (int align,
24841 enum stringop_alg alg,
24843 machine_mode move_mode)
24845 int desired_align = 0;
24847 gcc_assert (alg != no_stringop);
24849 if (alg == libcall)
24851 if (move_mode == VOIDmode)
24854 desired_align = GET_MODE_SIZE (move_mode);
24855 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24856 copying whole cacheline at once. */
24857 if (TARGET_PENTIUMPRO
24858 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24863 if (desired_align < align)
24864 desired_align = align;
24865 if (expected_size != -1 && expected_size < 4)
24866 desired_align = align;
24868 return desired_align;
24872 /* Helper function for memcpy. For QImode value 0xXY produce
24873 0xXYXYXYXY of wide specified by MODE. This is essentially
24874 a * 0x10101010, but we can do slightly better than
24875 synth_mult by unwinding the sequence by hand on CPUs with
24878 promote_duplicated_reg (machine_mode mode, rtx val)
24880 machine_mode valmode = GET_MODE (val);
24882 int nops = mode == DImode ? 3 : 2;
24884 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24885 if (val == const0_rtx)
24886 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24887 if (CONST_INT_P (val))
24889 HOST_WIDE_INT v = INTVAL (val) & 255;
24893 if (mode == DImode)
24894 v |= (v << 16) << 16;
24895 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24898 if (valmode == VOIDmode)
24900 if (valmode != QImode)
24901 val = gen_lowpart (QImode, val);
24902 if (mode == QImode)
24904 if (!TARGET_PARTIAL_REG_STALL)
24906 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24907 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24908 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24909 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24911 rtx reg = convert_modes (mode, QImode, val, true);
24912 tmp = promote_duplicated_reg (mode, const1_rtx);
24913 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24918 rtx reg = convert_modes (mode, QImode, val, true);
24920 if (!TARGET_PARTIAL_REG_STALL)
24921 if (mode == SImode)
24922 emit_insn (gen_movsi_insv_1 (reg, reg));
24924 emit_insn (gen_movdi_insv_1 (reg, reg));
24927 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24928 NULL, 1, OPTAB_DIRECT);
24930 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24932 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24933 NULL, 1, OPTAB_DIRECT);
24934 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24935 if (mode == SImode)
24937 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24938 NULL, 1, OPTAB_DIRECT);
24939 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24944 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24945 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24946 alignment from ALIGN to DESIRED_ALIGN. */
24948 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24954 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24955 promoted_val = promote_duplicated_reg (DImode, val);
24956 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24957 promoted_val = promote_duplicated_reg (SImode, val);
24958 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24959 promoted_val = promote_duplicated_reg (HImode, val);
24961 promoted_val = val;
24963 return promoted_val;
24966 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24967 operations when profitable. The code depends upon architecture, block size
24968 and alignment, but always has one of the following overall structures:
24970 Aligned move sequence:
24972 1) Prologue guard: Conditional that jumps up to epilogues for small
24973 blocks that can be handled by epilogue alone. This is faster
24974 but also needed for correctness, since prologue assume the block
24975 is larger than the desired alignment.
24977 Optional dynamic check for size and libcall for large
24978 blocks is emitted here too, with -minline-stringops-dynamically.
24980 2) Prologue: copy first few bytes in order to get destination
24981 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24982 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24983 copied. We emit either a jump tree on power of two sized
24984 blocks, or a byte loop.
24986 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24987 with specified algorithm.
24989 4) Epilogue: code copying tail of the block that is too small to be
24990 handled by main body (or up to size guarded by prologue guard).
24992 Misaligned move sequence
24994 1) missaligned move prologue/epilogue containing:
24995 a) Prologue handling small memory blocks and jumping to done_label
24996 (skipped if blocks are known to be large enough)
24997 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24998 needed by single possibly misaligned move
24999 (skipped if alignment is not needed)
25000 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
25002 2) Zero size guard dispatching to done_label, if needed
25004 3) dispatch to library call, if needed,
25006 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
25007 with specified algorithm. */
25009 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
25010 rtx align_exp, rtx expected_align_exp,
25011 rtx expected_size_exp, rtx min_size_exp,
25012 rtx max_size_exp, rtx probable_max_size_exp,
25017 rtx_code_label *label = NULL;
25019 rtx_code_label *jump_around_label = NULL;
25020 HOST_WIDE_INT align = 1;
25021 unsigned HOST_WIDE_INT count = 0;
25022 HOST_WIDE_INT expected_size = -1;
25023 int size_needed = 0, epilogue_size_needed;
25024 int desired_align = 0, align_bytes = 0;
25025 enum stringop_alg alg;
25026 rtx promoted_val = NULL;
25027 rtx vec_promoted_val = NULL;
25028 bool force_loopy_epilogue = false;
25030 bool need_zero_guard = false;
25032 machine_mode move_mode = VOIDmode;
25033 int unroll_factor = 1;
25034 /* TODO: Once value ranges are available, fill in proper data. */
25035 unsigned HOST_WIDE_INT min_size = 0;
25036 unsigned HOST_WIDE_INT max_size = -1;
25037 unsigned HOST_WIDE_INT probable_max_size = -1;
25038 bool misaligned_prologue_used = false;
25040 if (CONST_INT_P (align_exp))
25041 align = INTVAL (align_exp);
25042 /* i386 can do misaligned access on reasonably increased cost. */
25043 if (CONST_INT_P (expected_align_exp)
25044 && INTVAL (expected_align_exp) > align)
25045 align = INTVAL (expected_align_exp);
25046 /* ALIGN is the minimum of destination and source alignment, but we care here
25047 just about destination alignment. */
25049 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
25050 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
25052 if (CONST_INT_P (count_exp))
25054 min_size = max_size = probable_max_size = count = expected_size
25055 = INTVAL (count_exp);
25056 /* When COUNT is 0, there is nothing to do. */
25063 min_size = INTVAL (min_size_exp);
25065 max_size = INTVAL (max_size_exp);
25066 if (probable_max_size_exp)
25067 probable_max_size = INTVAL (probable_max_size_exp);
25068 if (CONST_INT_P (expected_size_exp))
25069 expected_size = INTVAL (expected_size_exp);
25072 /* Make sure we don't need to care about overflow later on. */
25073 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
25076 /* Step 0: Decide on preferred algorithm, desired alignment and
25077 size of chunks to be copied by main loop. */
25078 alg = decide_alg (count, expected_size, min_size, probable_max_size,
25080 issetmem && val_exp == const0_rtx,
25081 &dynamic_check, &noalign, false);
25082 if (alg == libcall)
25084 gcc_assert (alg != no_stringop);
25086 /* For now vector-version of memset is generated only for memory zeroing, as
25087 creating of promoted vector value is very cheap in this case. */
25088 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
25089 alg = unrolled_loop;
25092 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
25093 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
25095 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
25098 move_mode = word_mode;
25104 gcc_unreachable ();
25106 need_zero_guard = true;
25107 move_mode = QImode;
25110 need_zero_guard = true;
25112 case unrolled_loop:
25113 need_zero_guard = true;
25114 unroll_factor = (TARGET_64BIT ? 4 : 2);
25117 need_zero_guard = true;
25119 /* Find the widest supported mode. */
25120 move_mode = word_mode;
25121 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25122 != CODE_FOR_nothing)
25123 move_mode = GET_MODE_WIDER_MODE (move_mode);
25125 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25126 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25127 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25129 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25130 move_mode = mode_for_vector (word_mode, nunits);
25131 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25132 move_mode = word_mode;
25134 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25136 case rep_prefix_8_byte:
25137 move_mode = DImode;
25139 case rep_prefix_4_byte:
25140 move_mode = SImode;
25142 case rep_prefix_1_byte:
25143 move_mode = QImode;
25146 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25147 epilogue_size_needed = size_needed;
25149 /* If we are going to call any library calls conditionally, make sure any
25150 pending stack adjustment happen before the first conditional branch,
25151 otherwise they will be emitted before the library call only and won't
25152 happen from the other branches. */
25153 if (dynamic_check != -1)
25154 do_pending_stack_adjust ();
25156 desired_align = decide_alignment (align, alg, expected_size, move_mode);
25157 if (!TARGET_ALIGN_STRINGOPS || noalign)
25158 align = desired_align;
25160 /* Step 1: Prologue guard. */
25162 /* Alignment code needs count to be in register. */
25163 if (CONST_INT_P (count_exp) && desired_align > align)
25165 if (INTVAL (count_exp) > desired_align
25166 && INTVAL (count_exp) > size_needed)
25169 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25170 if (align_bytes <= 0)
25173 align_bytes = desired_align - align_bytes;
25175 if (align_bytes == 0)
25176 count_exp = force_reg (counter_mode (count_exp), count_exp);
25178 gcc_assert (desired_align >= 1 && align >= 1);
25180 /* Misaligned move sequences handle both prologue and epilogue at once.
25181 Default code generation results in a smaller code for large alignments
25182 and also avoids redundant job when sizes are known precisely. */
25183 misaligned_prologue_used
25184 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25185 && MAX (desired_align, epilogue_size_needed) <= 32
25186 && desired_align <= epilogue_size_needed
25187 && ((desired_align > align && !align_bytes)
25188 || (!count && epilogue_size_needed > 1)));
25190 /* Do the cheap promotion to allow better CSE across the
25191 main loop and epilogue (ie one load of the big constant in the
25193 For now the misaligned move sequences do not have fast path
25194 without broadcasting. */
25195 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25197 if (alg == vector_loop)
25199 gcc_assert (val_exp == const0_rtx);
25200 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25201 promoted_val = promote_duplicated_reg_to_size (val_exp,
25202 GET_MODE_SIZE (word_mode),
25203 desired_align, align);
25207 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25208 desired_align, align);
25211 /* Misaligned move sequences handles both prologues and epilogues at once.
25212 Default code generation results in smaller code for large alignments and
25213 also avoids redundant job when sizes are known precisely. */
25214 if (misaligned_prologue_used)
25216 /* Misaligned move prologue handled small blocks by itself. */
25217 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25218 (dst, src, &destreg, &srcreg,
25219 move_mode, promoted_val, vec_promoted_val,
25221 &jump_around_label,
25222 desired_align < align
25223 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25224 desired_align, align, &min_size, dynamic_check, issetmem);
25226 src = change_address (src, BLKmode, srcreg);
25227 dst = change_address (dst, BLKmode, destreg);
25228 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25229 epilogue_size_needed = 0;
25230 if (need_zero_guard
25231 && min_size < (unsigned HOST_WIDE_INT) size_needed)
25233 /* It is possible that we copied enough so the main loop will not
25235 gcc_assert (size_needed > 1);
25236 if (jump_around_label == NULL_RTX)
25237 jump_around_label = gen_label_rtx ();
25238 emit_cmp_and_jump_insns (count_exp,
25239 GEN_INT (size_needed),
25240 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25241 if (expected_size == -1
25242 || expected_size < (desired_align - align) / 2 + size_needed)
25243 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25245 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25248 /* Ensure that alignment prologue won't copy past end of block. */
25249 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25251 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25252 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25253 Make sure it is power of 2. */
25254 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25256 /* To improve performance of small blocks, we jump around the VAL
25257 promoting mode. This mean that if the promoted VAL is not constant,
25258 we might not use it in the epilogue and have to use byte
25260 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25261 force_loopy_epilogue = true;
25262 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25263 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25265 /* If main algorithm works on QImode, no epilogue is needed.
25266 For small sizes just don't align anything. */
25267 if (size_needed == 1)
25268 desired_align = align;
25273 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25275 label = gen_label_rtx ();
25276 emit_cmp_and_jump_insns (count_exp,
25277 GEN_INT (epilogue_size_needed),
25278 LTU, 0, counter_mode (count_exp), 1, label);
25279 if (expected_size == -1 || expected_size < epilogue_size_needed)
25280 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25282 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25286 /* Emit code to decide on runtime whether library call or inline should be
25288 if (dynamic_check != -1)
25290 if (!issetmem && CONST_INT_P (count_exp))
25292 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25294 emit_block_move_via_libcall (dst, src, count_exp, false);
25295 count_exp = const0_rtx;
25301 rtx_code_label *hot_label = gen_label_rtx ();
25302 if (jump_around_label == NULL_RTX)
25303 jump_around_label = gen_label_rtx ();
25304 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25305 LEU, 0, counter_mode (count_exp),
25307 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25309 set_storage_via_libcall (dst, count_exp, val_exp, false);
25311 emit_block_move_via_libcall (dst, src, count_exp, false);
25312 emit_jump (jump_around_label);
25313 emit_label (hot_label);
25317 /* Step 2: Alignment prologue. */
25318 /* Do the expensive promotion once we branched off the small blocks. */
25319 if (issetmem && !promoted_val)
25320 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25321 desired_align, align);
25323 if (desired_align > align && !misaligned_prologue_used)
25325 if (align_bytes == 0)
25327 /* Except for the first move in prologue, we no longer know
25328 constant offset in aliasing info. It don't seems to worth
25329 the pain to maintain it for the first move, so throw away
25331 dst = change_address (dst, BLKmode, destreg);
25333 src = change_address (src, BLKmode, srcreg);
25334 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25335 promoted_val, vec_promoted_val,
25336 count_exp, align, desired_align,
25338 /* At most desired_align - align bytes are copied. */
25339 if (min_size < (unsigned)(desired_align - align))
25342 min_size -= desired_align - align;
25346 /* If we know how many bytes need to be stored before dst is
25347 sufficiently aligned, maintain aliasing info accurately. */
25348 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25356 count_exp = plus_constant (counter_mode (count_exp),
25357 count_exp, -align_bytes);
25358 count -= align_bytes;
25359 min_size -= align_bytes;
25360 max_size -= align_bytes;
25362 if (need_zero_guard
25363 && min_size < (unsigned HOST_WIDE_INT) size_needed
25364 && (count < (unsigned HOST_WIDE_INT) size_needed
25365 || (align_bytes == 0
25366 && count < ((unsigned HOST_WIDE_INT) size_needed
25367 + desired_align - align))))
25369 /* It is possible that we copied enough so the main loop will not
25371 gcc_assert (size_needed > 1);
25372 if (label == NULL_RTX)
25373 label = gen_label_rtx ();
25374 emit_cmp_and_jump_insns (count_exp,
25375 GEN_INT (size_needed),
25376 LTU, 0, counter_mode (count_exp), 1, label);
25377 if (expected_size == -1
25378 || expected_size < (desired_align - align) / 2 + size_needed)
25379 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25381 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25384 if (label && size_needed == 1)
25386 emit_label (label);
25387 LABEL_NUSES (label) = 1;
25389 epilogue_size_needed = 1;
25391 promoted_val = val_exp;
25393 else if (label == NULL_RTX && !misaligned_prologue_used)
25394 epilogue_size_needed = size_needed;
25396 /* Step 3: Main loop. */
25403 gcc_unreachable ();
25406 case unrolled_loop:
25407 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25408 count_exp, move_mode, unroll_factor,
25409 expected_size, issetmem);
25412 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25413 vec_promoted_val, count_exp, move_mode,
25414 unroll_factor, expected_size, issetmem);
25416 case rep_prefix_8_byte:
25417 case rep_prefix_4_byte:
25418 case rep_prefix_1_byte:
25419 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25420 val_exp, count_exp, move_mode, issetmem);
25423 /* Adjust properly the offset of src and dest memory for aliasing. */
25424 if (CONST_INT_P (count_exp))
25427 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25428 (count / size_needed) * size_needed);
25429 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25430 (count / size_needed) * size_needed);
25435 src = change_address (src, BLKmode, srcreg);
25436 dst = change_address (dst, BLKmode, destreg);
25439 /* Step 4: Epilogue to copy the remaining bytes. */
25443 /* When the main loop is done, COUNT_EXP might hold original count,
25444 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25445 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25446 bytes. Compensate if needed. */
25448 if (size_needed < epilogue_size_needed)
25451 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25452 GEN_INT (size_needed - 1), count_exp, 1,
25454 if (tmp != count_exp)
25455 emit_move_insn (count_exp, tmp);
25457 emit_label (label);
25458 LABEL_NUSES (label) = 1;
25461 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25463 if (force_loopy_epilogue)
25464 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25465 epilogue_size_needed);
25469 expand_setmem_epilogue (dst, destreg, promoted_val,
25470 vec_promoted_val, count_exp,
25471 epilogue_size_needed);
25473 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25474 epilogue_size_needed);
25477 if (jump_around_label)
25478 emit_label (jump_around_label);
25483 /* Expand the appropriate insns for doing strlen if not just doing
25486 out = result, initialized with the start address
25487 align_rtx = alignment of the address.
25488 scratch = scratch register, initialized with the startaddress when
25489 not aligned, otherwise undefined
25491 This is just the body. It needs the initializations mentioned above and
25492 some address computing at the end. These things are done in i386.md. */
25495 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25499 rtx_code_label *align_2_label = NULL;
25500 rtx_code_label *align_3_label = NULL;
25501 rtx_code_label *align_4_label = gen_label_rtx ();
25502 rtx_code_label *end_0_label = gen_label_rtx ();
25504 rtx tmpreg = gen_reg_rtx (SImode);
25505 rtx scratch = gen_reg_rtx (SImode);
25509 if (CONST_INT_P (align_rtx))
25510 align = INTVAL (align_rtx);
25512 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25514 /* Is there a known alignment and is it less than 4? */
25517 rtx scratch1 = gen_reg_rtx (Pmode);
25518 emit_move_insn (scratch1, out);
25519 /* Is there a known alignment and is it not 2? */
25522 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25523 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25525 /* Leave just the 3 lower bits. */
25526 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25527 NULL_RTX, 0, OPTAB_WIDEN);
25529 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25530 Pmode, 1, align_4_label);
25531 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25532 Pmode, 1, align_2_label);
25533 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25534 Pmode, 1, align_3_label);
25538 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25539 check if is aligned to 4 - byte. */
25541 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25542 NULL_RTX, 0, OPTAB_WIDEN);
25544 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25545 Pmode, 1, align_4_label);
25548 mem = change_address (src, QImode, out);
25550 /* Now compare the bytes. */
25552 /* Compare the first n unaligned byte on a byte per byte basis. */
25553 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25554 QImode, 1, end_0_label);
25556 /* Increment the address. */
25557 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25559 /* Not needed with an alignment of 2 */
25562 emit_label (align_2_label);
25564 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25567 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25569 emit_label (align_3_label);
25572 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25575 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25578 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25579 align this loop. It gives only huge programs, but does not help to
25581 emit_label (align_4_label);
25583 mem = change_address (src, SImode, out);
25584 emit_move_insn (scratch, mem);
25585 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25587 /* This formula yields a nonzero result iff one of the bytes is zero.
25588 This saves three branches inside loop and many cycles. */
25590 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25591 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25592 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25593 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25594 gen_int_mode (0x80808080, SImode)));
25595 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25600 rtx reg = gen_reg_rtx (SImode);
25601 rtx reg2 = gen_reg_rtx (Pmode);
25602 emit_move_insn (reg, tmpreg);
25603 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25605 /* If zero is not in the first two bytes, move two bytes forward. */
25606 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25607 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25608 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25609 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25610 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25613 /* Emit lea manually to avoid clobbering of flags. */
25614 emit_insn (gen_rtx_SET (SImode, reg2,
25615 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25617 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25618 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25619 emit_insn (gen_rtx_SET (VOIDmode, out,
25620 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25626 rtx_code_label *end_2_label = gen_label_rtx ();
25627 /* Is zero in the first two bytes? */
25629 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25630 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25631 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25632 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25633 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25635 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25636 JUMP_LABEL (tmp) = end_2_label;
25638 /* Not in the first two. Move two bytes forward. */
25639 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25640 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25642 emit_label (end_2_label);
25646 /* Avoid branch in fixing the byte. */
25647 tmpreg = gen_lowpart (QImode, tmpreg);
25648 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
25649 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25650 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25651 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25653 emit_label (end_0_label);
25656 /* Expand strlen. */
25659 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25661 rtx addr, scratch1, scratch2, scratch3, scratch4;
25663 /* The generic case of strlen expander is long. Avoid it's
25664 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25666 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25667 && !TARGET_INLINE_ALL_STRINGOPS
25668 && !optimize_insn_for_size_p ()
25669 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25672 addr = force_reg (Pmode, XEXP (src, 0));
25673 scratch1 = gen_reg_rtx (Pmode);
25675 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25676 && !optimize_insn_for_size_p ())
25678 /* Well it seems that some optimizer does not combine a call like
25679 foo(strlen(bar), strlen(bar));
25680 when the move and the subtraction is done here. It does calculate
25681 the length just once when these instructions are done inside of
25682 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25683 often used and I use one fewer register for the lifetime of
25684 output_strlen_unroll() this is better. */
25686 emit_move_insn (out, addr);
25688 ix86_expand_strlensi_unroll_1 (out, src, align);
25690 /* strlensi_unroll_1 returns the address of the zero at the end of
25691 the string, like memchr(), so compute the length by subtracting
25692 the start address. */
25693 emit_insn (ix86_gen_sub3 (out, out, addr));
25699 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25700 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25703 scratch2 = gen_reg_rtx (Pmode);
25704 scratch3 = gen_reg_rtx (Pmode);
25705 scratch4 = force_reg (Pmode, constm1_rtx);
25707 emit_move_insn (scratch3, addr);
25708 eoschar = force_reg (QImode, eoschar);
25710 src = replace_equiv_address_nv (src, scratch3);
25712 /* If .md starts supporting :P, this can be done in .md. */
25713 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25714 scratch4), UNSPEC_SCAS);
25715 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25716 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25717 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25722 /* For given symbol (function) construct code to compute address of it's PLT
25723 entry in large x86-64 PIC model. */
25725 construct_plt_address (rtx symbol)
25729 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25730 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25731 gcc_assert (Pmode == DImode);
25733 tmp = gen_reg_rtx (Pmode);
25734 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25736 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25737 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25742 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25744 rtx pop, bool sibcall)
25747 rtx use = NULL, call;
25748 unsigned int vec_len = 0;
25750 if (pop == const0_rtx)
25752 gcc_assert (!TARGET_64BIT || !pop);
25754 if (TARGET_MACHO && !TARGET_64BIT)
25757 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25758 fnaddr = machopic_indirect_call_target (fnaddr);
25763 /* Static functions and indirect calls don't need the pic register. */
25766 || (ix86_cmodel == CM_LARGE_PIC
25767 && DEFAULT_ABI != MS_ABI))
25768 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25769 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25771 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25772 if (ix86_use_pseudo_pic_reg ())
25773 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25774 pic_offset_table_rtx);
25778 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25779 parameters passed in vector registers. */
25781 && (INTVAL (callarg2) > 0
25782 || (INTVAL (callarg2) == 0
25783 && (TARGET_SSE || !flag_skip_rax_setup))))
25785 rtx al = gen_rtx_REG (QImode, AX_REG);
25786 emit_move_insn (al, callarg2);
25787 use_reg (&use, al);
25790 if (ix86_cmodel == CM_LARGE_PIC
25793 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25794 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25795 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25797 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25798 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25800 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25801 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25804 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25808 /* We should add bounds as destination register in case
25809 pointer with bounds may be returned. */
25810 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25812 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25813 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25814 if (GET_CODE (retval) == PARALLEL)
25816 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25817 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25818 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25819 retval = chkp_join_splitted_slot (retval, par);
25823 retval = gen_rtx_PARALLEL (VOIDmode,
25824 gen_rtvec (3, retval, b0, b1));
25825 chkp_put_regs_to_expr_list (retval);
25829 call = gen_rtx_SET (VOIDmode, retval, call);
25831 vec[vec_len++] = call;
25835 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25836 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25837 vec[vec_len++] = pop;
25840 if (TARGET_64BIT_MS_ABI
25841 && (!callarg2 || INTVAL (callarg2) != -2))
25843 int const cregs_size
25844 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25847 for (i = 0; i < cregs_size; i++)
25849 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25850 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25852 clobber_reg (&use, gen_rtx_REG (mode, regno));
25857 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25858 call = emit_call_insn (call);
25860 CALL_INSN_FUNCTION_USAGE (call) = use;
25865 /* Output the assembly for a call instruction. */
25868 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25870 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25871 bool seh_nop_p = false;
25874 if (SIBLING_CALL_P (insn))
25877 xasm = "%!jmp\t%P0";
25878 /* SEH epilogue detection requires the indirect branch case
25879 to include REX.W. */
25880 else if (TARGET_SEH)
25881 xasm = "%!rex.W jmp %A0";
25883 xasm = "%!jmp\t%A0";
25885 output_asm_insn (xasm, &call_op);
25889 /* SEH unwinding can require an extra nop to be emitted in several
25890 circumstances. Determine if we have one of those. */
25895 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25897 /* If we get to another real insn, we don't need the nop. */
25901 /* If we get to the epilogue note, prevent a catch region from
25902 being adjacent to the standard epilogue sequence. If non-
25903 call-exceptions, we'll have done this during epilogue emission. */
25904 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25905 && !flag_non_call_exceptions
25906 && !can_throw_internal (insn))
25913 /* If we didn't find a real insn following the call, prevent the
25914 unwinder from looking into the next function. */
25920 xasm = "%!call\t%P0";
25922 xasm = "%!call\t%A0";
25924 output_asm_insn (xasm, &call_op);
25932 /* Clear stack slot assignments remembered from previous functions.
25933 This is called from INIT_EXPANDERS once before RTL is emitted for each
25936 static struct machine_function *
25937 ix86_init_machine_status (void)
25939 struct machine_function *f;
25941 f = ggc_cleared_alloc<machine_function> ();
25942 f->use_fast_prologue_epilogue_nregs = -1;
25943 f->call_abi = ix86_abi;
25948 /* Return a MEM corresponding to a stack slot with mode MODE.
25949 Allocate a new slot if necessary.
25951 The RTL for a function can have several slots available: N is
25952 which slot to use. */
25955 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25957 struct stack_local_entry *s;
25959 gcc_assert (n < MAX_386_STACK_LOCALS);
25961 for (s = ix86_stack_locals; s; s = s->next)
25962 if (s->mode == mode && s->n == n)
25963 return validize_mem (copy_rtx (s->rtl));
25965 s = ggc_alloc<stack_local_entry> ();
25968 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25970 s->next = ix86_stack_locals;
25971 ix86_stack_locals = s;
25972 return validize_mem (copy_rtx (s->rtl));
25976 ix86_instantiate_decls (void)
25978 struct stack_local_entry *s;
25980 for (s = ix86_stack_locals; s; s = s->next)
25981 if (s->rtl != NULL_RTX)
25982 instantiate_decl_rtl (s->rtl);
25985 /* Check whether x86 address PARTS is a pc-relative address. */
25988 rip_relative_addr_p (struct ix86_address *parts)
25990 rtx base, index, disp;
25992 base = parts->base;
25993 index = parts->index;
25994 disp = parts->disp;
25996 if (disp && !base && !index)
26002 if (GET_CODE (disp) == CONST)
26003 symbol = XEXP (disp, 0);
26004 if (GET_CODE (symbol) == PLUS
26005 && CONST_INT_P (XEXP (symbol, 1)))
26006 symbol = XEXP (symbol, 0);
26008 if (GET_CODE (symbol) == LABEL_REF
26009 || (GET_CODE (symbol) == SYMBOL_REF
26010 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
26011 || (GET_CODE (symbol) == UNSPEC
26012 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
26013 || XINT (symbol, 1) == UNSPEC_PCREL
26014 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
26021 /* Calculate the length of the memory address in the instruction encoding.
26022 Includes addr32 prefix, does not include the one-byte modrm, opcode,
26023 or other prefixes. We never generate addr32 prefix for LEA insn. */
26026 memory_address_length (rtx addr, bool lea)
26028 struct ix86_address parts;
26029 rtx base, index, disp;
26033 if (GET_CODE (addr) == PRE_DEC
26034 || GET_CODE (addr) == POST_INC
26035 || GET_CODE (addr) == PRE_MODIFY
26036 || GET_CODE (addr) == POST_MODIFY)
26039 ok = ix86_decompose_address (addr, &parts);
26042 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
26044 /* If this is not LEA instruction, add the length of addr32 prefix. */
26045 if (TARGET_64BIT && !lea
26046 && (SImode_address_operand (addr, VOIDmode)
26047 || (parts.base && GET_MODE (parts.base) == SImode)
26048 || (parts.index && GET_MODE (parts.index) == SImode)))
26052 index = parts.index;
26055 if (base && GET_CODE (base) == SUBREG)
26056 base = SUBREG_REG (base);
26057 if (index && GET_CODE (index) == SUBREG)
26058 index = SUBREG_REG (index);
26060 gcc_assert (base == NULL_RTX || REG_P (base));
26061 gcc_assert (index == NULL_RTX || REG_P (index));
26064 - esp as the base always wants an index,
26065 - ebp as the base always wants a displacement,
26066 - r12 as the base always wants an index,
26067 - r13 as the base always wants a displacement. */
26069 /* Register Indirect. */
26070 if (base && !index && !disp)
26072 /* esp (for its index) and ebp (for its displacement) need
26073 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
26075 if (base == arg_pointer_rtx
26076 || base == frame_pointer_rtx
26077 || REGNO (base) == SP_REG
26078 || REGNO (base) == BP_REG
26079 || REGNO (base) == R12_REG
26080 || REGNO (base) == R13_REG)
26084 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
26085 is not disp32, but disp32(%rip), so for disp32
26086 SIB byte is needed, unless print_operand_address
26087 optimizes it into disp32(%rip) or (%rip) is implied
26089 else if (disp && !base && !index)
26092 if (rip_relative_addr_p (&parts))
26097 /* Find the length of the displacement constant. */
26100 if (base && satisfies_constraint_K (disp))
26105 /* ebp always wants a displacement. Similarly r13. */
26106 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26109 /* An index requires the two-byte modrm form.... */
26111 /* ...like esp (or r12), which always wants an index. */
26112 || base == arg_pointer_rtx
26113 || base == frame_pointer_rtx
26114 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26121 /* Compute default value for "length_immediate" attribute. When SHORTFORM
26122 is set, expect that insn have 8bit immediate alternative. */
26124 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26128 extract_insn_cached (insn);
26129 for (i = recog_data.n_operands - 1; i >= 0; --i)
26130 if (CONSTANT_P (recog_data.operand[i]))
26132 enum attr_mode mode = get_attr_mode (insn);
26135 if (shortform && CONST_INT_P (recog_data.operand[i]))
26137 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26144 ival = trunc_int_for_mode (ival, HImode);
26147 ival = trunc_int_for_mode (ival, SImode);
26152 if (IN_RANGE (ival, -128, 127))
26169 /* Immediates for DImode instructions are encoded
26170 as 32bit sign extended values. */
26175 fatal_insn ("unknown insn mode", insn);
26181 /* Compute default value for "length_address" attribute. */
26183 ix86_attr_length_address_default (rtx_insn *insn)
26187 if (get_attr_type (insn) == TYPE_LEA)
26189 rtx set = PATTERN (insn), addr;
26191 if (GET_CODE (set) == PARALLEL)
26192 set = XVECEXP (set, 0, 0);
26194 gcc_assert (GET_CODE (set) == SET);
26196 addr = SET_SRC (set);
26198 return memory_address_length (addr, true);
26201 extract_insn_cached (insn);
26202 for (i = recog_data.n_operands - 1; i >= 0; --i)
26203 if (MEM_P (recog_data.operand[i]))
26205 constrain_operands_cached (insn, reload_completed);
26206 if (which_alternative != -1)
26208 const char *constraints = recog_data.constraints[i];
26209 int alt = which_alternative;
26211 while (*constraints == '=' || *constraints == '+')
26214 while (*constraints++ != ',')
26216 /* Skip ignored operands. */
26217 if (*constraints == 'X')
26220 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26225 /* Compute default value for "length_vex" attribute. It includes
26226 2 or 3 byte VEX prefix and 1 opcode byte. */
26229 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26234 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26235 byte VEX prefix. */
26236 if (!has_0f_opcode || has_vex_w)
26239 /* We can always use 2 byte VEX prefix in 32bit. */
26243 extract_insn_cached (insn);
26245 for (i = recog_data.n_operands - 1; i >= 0; --i)
26246 if (REG_P (recog_data.operand[i]))
26248 /* REX.W bit uses 3 byte VEX prefix. */
26249 if (GET_MODE (recog_data.operand[i]) == DImode
26250 && GENERAL_REG_P (recog_data.operand[i]))
26255 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26256 if (MEM_P (recog_data.operand[i])
26257 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26264 /* Return the maximum number of instructions a cpu can issue. */
26267 ix86_issue_rate (void)
26271 case PROCESSOR_PENTIUM:
26272 case PROCESSOR_BONNELL:
26273 case PROCESSOR_SILVERMONT:
26274 case PROCESSOR_KNL:
26275 case PROCESSOR_INTEL:
26277 case PROCESSOR_BTVER2:
26278 case PROCESSOR_PENTIUM4:
26279 case PROCESSOR_NOCONA:
26282 case PROCESSOR_PENTIUMPRO:
26283 case PROCESSOR_ATHLON:
26285 case PROCESSOR_AMDFAM10:
26286 case PROCESSOR_GENERIC:
26287 case PROCESSOR_BTVER1:
26290 case PROCESSOR_BDVER1:
26291 case PROCESSOR_BDVER2:
26292 case PROCESSOR_BDVER3:
26293 case PROCESSOR_BDVER4:
26294 case PROCESSOR_CORE2:
26295 case PROCESSOR_NEHALEM:
26296 case PROCESSOR_SANDYBRIDGE:
26297 case PROCESSOR_HASWELL:
26305 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26306 by DEP_INSN and nothing set by DEP_INSN. */
26309 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26313 /* Simplify the test for uninteresting insns. */
26314 if (insn_type != TYPE_SETCC
26315 && insn_type != TYPE_ICMOV
26316 && insn_type != TYPE_FCMOV
26317 && insn_type != TYPE_IBR)
26320 if ((set = single_set (dep_insn)) != 0)
26322 set = SET_DEST (set);
26325 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26326 && XVECLEN (PATTERN (dep_insn), 0) == 2
26327 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26328 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26330 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26331 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26336 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26339 /* This test is true if the dependent insn reads the flags but
26340 not any other potentially set register. */
26341 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26344 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26350 /* Return true iff USE_INSN has a memory address with operands set by
26354 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26357 extract_insn_cached (use_insn);
26358 for (i = recog_data.n_operands - 1; i >= 0; --i)
26359 if (MEM_P (recog_data.operand[i]))
26361 rtx addr = XEXP (recog_data.operand[i], 0);
26362 return modified_in_p (addr, set_insn) != 0;
26367 /* Helper function for exact_store_load_dependency.
26368 Return true if addr is found in insn. */
26370 exact_dependency_1 (rtx addr, rtx insn)
26372 enum rtx_code code;
26373 const char *format_ptr;
26376 code = GET_CODE (insn);
26380 if (rtx_equal_p (addr, insn))
26395 format_ptr = GET_RTX_FORMAT (code);
26396 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26398 switch (*format_ptr++)
26401 if (exact_dependency_1 (addr, XEXP (insn, i)))
26405 for (j = 0; j < XVECLEN (insn, i); j++)
26406 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26414 /* Return true if there exists exact dependency for store & load, i.e.
26415 the same memory address is used in them. */
26417 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26421 set1 = single_set (store);
26424 if (!MEM_P (SET_DEST (set1)))
26426 set2 = single_set (load);
26429 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26435 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26437 enum attr_type insn_type, dep_insn_type;
26438 enum attr_memory memory;
26440 int dep_insn_code_number;
26442 /* Anti and output dependencies have zero cost on all CPUs. */
26443 if (REG_NOTE_KIND (link) != 0)
26446 dep_insn_code_number = recog_memoized (dep_insn);
26448 /* If we can't recognize the insns, we can't really do anything. */
26449 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26452 insn_type = get_attr_type (insn);
26453 dep_insn_type = get_attr_type (dep_insn);
26457 case PROCESSOR_PENTIUM:
26458 /* Address Generation Interlock adds a cycle of latency. */
26459 if (insn_type == TYPE_LEA)
26461 rtx addr = PATTERN (insn);
26463 if (GET_CODE (addr) == PARALLEL)
26464 addr = XVECEXP (addr, 0, 0);
26466 gcc_assert (GET_CODE (addr) == SET);
26468 addr = SET_SRC (addr);
26469 if (modified_in_p (addr, dep_insn))
26472 else if (ix86_agi_dependent (dep_insn, insn))
26475 /* ??? Compares pair with jump/setcc. */
26476 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26479 /* Floating point stores require value to be ready one cycle earlier. */
26480 if (insn_type == TYPE_FMOV
26481 && get_attr_memory (insn) == MEMORY_STORE
26482 && !ix86_agi_dependent (dep_insn, insn))
26486 case PROCESSOR_PENTIUMPRO:
26487 /* INT->FP conversion is expensive. */
26488 if (get_attr_fp_int_src (dep_insn))
26491 /* There is one cycle extra latency between an FP op and a store. */
26492 if (insn_type == TYPE_FMOV
26493 && (set = single_set (dep_insn)) != NULL_RTX
26494 && (set2 = single_set (insn)) != NULL_RTX
26495 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26496 && MEM_P (SET_DEST (set2)))
26499 memory = get_attr_memory (insn);
26501 /* Show ability of reorder buffer to hide latency of load by executing
26502 in parallel with previous instruction in case
26503 previous instruction is not needed to compute the address. */
26504 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26505 && !ix86_agi_dependent (dep_insn, insn))
26507 /* Claim moves to take one cycle, as core can issue one load
26508 at time and the next load can start cycle later. */
26509 if (dep_insn_type == TYPE_IMOV
26510 || dep_insn_type == TYPE_FMOV)
26518 /* The esp dependency is resolved before
26519 the instruction is really finished. */
26520 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26521 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26524 /* INT->FP conversion is expensive. */
26525 if (get_attr_fp_int_src (dep_insn))
26528 memory = get_attr_memory (insn);
26530 /* Show ability of reorder buffer to hide latency of load by executing
26531 in parallel with previous instruction in case
26532 previous instruction is not needed to compute the address. */
26533 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26534 && !ix86_agi_dependent (dep_insn, insn))
26536 /* Claim moves to take one cycle, as core can issue one load
26537 at time and the next load can start cycle later. */
26538 if (dep_insn_type == TYPE_IMOV
26539 || dep_insn_type == TYPE_FMOV)
26548 case PROCESSOR_AMDFAM10:
26549 case PROCESSOR_BDVER1:
26550 case PROCESSOR_BDVER2:
26551 case PROCESSOR_BDVER3:
26552 case PROCESSOR_BDVER4:
26553 case PROCESSOR_BTVER1:
26554 case PROCESSOR_BTVER2:
26555 case PROCESSOR_GENERIC:
26556 /* Stack engine allows to execute push&pop instructions in parall. */
26557 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26558 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26562 case PROCESSOR_ATHLON:
26564 memory = get_attr_memory (insn);
26566 /* Show ability of reorder buffer to hide latency of load by executing
26567 in parallel with previous instruction in case
26568 previous instruction is not needed to compute the address. */
26569 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26570 && !ix86_agi_dependent (dep_insn, insn))
26572 enum attr_unit unit = get_attr_unit (insn);
26575 /* Because of the difference between the length of integer and
26576 floating unit pipeline preparation stages, the memory operands
26577 for floating point are cheaper.
26579 ??? For Athlon it the difference is most probably 2. */
26580 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26583 loadcost = TARGET_ATHLON ? 2 : 0;
26585 if (cost >= loadcost)
26592 case PROCESSOR_CORE2:
26593 case PROCESSOR_NEHALEM:
26594 case PROCESSOR_SANDYBRIDGE:
26595 case PROCESSOR_HASWELL:
26596 /* Stack engine allows to execute push&pop instructions in parall. */
26597 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26598 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26601 memory = get_attr_memory (insn);
26603 /* Show ability of reorder buffer to hide latency of load by executing
26604 in parallel with previous instruction in case
26605 previous instruction is not needed to compute the address. */
26606 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26607 && !ix86_agi_dependent (dep_insn, insn))
26616 case PROCESSOR_SILVERMONT:
26617 case PROCESSOR_KNL:
26618 case PROCESSOR_INTEL:
26619 if (!reload_completed)
26622 /* Increase cost of integer loads. */
26623 memory = get_attr_memory (dep_insn);
26624 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26626 enum attr_unit unit = get_attr_unit (dep_insn);
26627 if (unit == UNIT_INTEGER && cost == 1)
26629 if (memory == MEMORY_LOAD)
26633 /* Increase cost of ld/st for short int types only
26634 because of store forwarding issue. */
26635 rtx set = single_set (dep_insn);
26636 if (set && (GET_MODE (SET_DEST (set)) == QImode
26637 || GET_MODE (SET_DEST (set)) == HImode))
26639 /* Increase cost of store/load insn if exact
26640 dependence exists and it is load insn. */
26641 enum attr_memory insn_memory = get_attr_memory (insn);
26642 if (insn_memory == MEMORY_LOAD
26643 && exact_store_load_dependency (dep_insn, insn))
26657 /* How many alternative schedules to try. This should be as wide as the
26658 scheduling freedom in the DFA, but no wider. Making this value too
26659 large results extra work for the scheduler. */
26662 ia32_multipass_dfa_lookahead (void)
26666 case PROCESSOR_PENTIUM:
26669 case PROCESSOR_PENTIUMPRO:
26673 case PROCESSOR_BDVER1:
26674 case PROCESSOR_BDVER2:
26675 case PROCESSOR_BDVER3:
26676 case PROCESSOR_BDVER4:
26677 /* We use lookahead value 4 for BD both before and after reload
26678 schedules. Plan is to have value 8 included for O3. */
26681 case PROCESSOR_CORE2:
26682 case PROCESSOR_NEHALEM:
26683 case PROCESSOR_SANDYBRIDGE:
26684 case PROCESSOR_HASWELL:
26685 case PROCESSOR_BONNELL:
26686 case PROCESSOR_SILVERMONT:
26687 case PROCESSOR_KNL:
26688 case PROCESSOR_INTEL:
26689 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26690 as many instructions can be executed on a cycle, i.e.,
26691 issue_rate. I wonder why tuning for many CPUs does not do this. */
26692 if (reload_completed)
26693 return ix86_issue_rate ();
26694 /* Don't use lookahead for pre-reload schedule to save compile time. */
26702 /* Return true if target platform supports macro-fusion. */
26705 ix86_macro_fusion_p ()
26707 return TARGET_FUSE_CMP_AND_BRANCH;
26710 /* Check whether current microarchitecture support macro fusion
26711 for insn pair "CONDGEN + CONDJMP". Refer to
26712 "Intel Architectures Optimization Reference Manual". */
26715 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26718 enum rtx_code ccode;
26719 rtx compare_set = NULL_RTX, test_if, cond;
26720 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26722 if (!any_condjump_p (condjmp))
26725 if (get_attr_type (condgen) != TYPE_TEST
26726 && get_attr_type (condgen) != TYPE_ICMP
26727 && get_attr_type (condgen) != TYPE_INCDEC
26728 && get_attr_type (condgen) != TYPE_ALU)
26731 compare_set = single_set (condgen);
26732 if (compare_set == NULL_RTX
26733 && !TARGET_FUSE_ALU_AND_BRANCH)
26736 if (compare_set == NULL_RTX)
26739 rtx pat = PATTERN (condgen);
26740 for (i = 0; i < XVECLEN (pat, 0); i++)
26741 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26743 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26744 if (GET_CODE (set_src) == COMPARE)
26745 compare_set = XVECEXP (pat, 0, i);
26747 alu_set = XVECEXP (pat, 0, i);
26750 if (compare_set == NULL_RTX)
26752 src = SET_SRC (compare_set);
26753 if (GET_CODE (src) != COMPARE)
26756 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26758 if ((MEM_P (XEXP (src, 0))
26759 && CONST_INT_P (XEXP (src, 1)))
26760 || (MEM_P (XEXP (src, 1))
26761 && CONST_INT_P (XEXP (src, 0))))
26764 /* No fusion for RIP-relative address. */
26765 if (MEM_P (XEXP (src, 0)))
26766 addr = XEXP (XEXP (src, 0), 0);
26767 else if (MEM_P (XEXP (src, 1)))
26768 addr = XEXP (XEXP (src, 1), 0);
26771 ix86_address parts;
26772 int ok = ix86_decompose_address (addr, &parts);
26775 if (rip_relative_addr_p (&parts))
26779 test_if = SET_SRC (pc_set (condjmp));
26780 cond = XEXP (test_if, 0);
26781 ccode = GET_CODE (cond);
26782 /* Check whether conditional jump use Sign or Overflow Flags. */
26783 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26790 /* Return true for TYPE_TEST and TYPE_ICMP. */
26791 if (get_attr_type (condgen) == TYPE_TEST
26792 || get_attr_type (condgen) == TYPE_ICMP)
26795 /* The following is the case that macro-fusion for alu + jmp. */
26796 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26799 /* No fusion for alu op with memory destination operand. */
26800 dest = SET_DEST (alu_set);
26804 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26806 if (get_attr_type (condgen) == TYPE_INCDEC
26816 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26817 execution. It is applied if
26818 (1) IMUL instruction is on the top of list;
26819 (2) There exists the only producer of independent IMUL instruction in
26821 Return index of IMUL producer if it was found and -1 otherwise. */
26823 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26826 rtx set, insn1, insn2;
26827 sd_iterator_def sd_it;
26832 if (!TARGET_BONNELL)
26835 /* Check that IMUL instruction is on the top of ready list. */
26836 insn = ready[n_ready - 1];
26837 set = single_set (insn);
26840 if (!(GET_CODE (SET_SRC (set)) == MULT
26841 && GET_MODE (SET_SRC (set)) == SImode))
26844 /* Search for producer of independent IMUL instruction. */
26845 for (i = n_ready - 2; i >= 0; i--)
26848 if (!NONDEBUG_INSN_P (insn))
26850 /* Skip IMUL instruction. */
26851 insn2 = PATTERN (insn);
26852 if (GET_CODE (insn2) == PARALLEL)
26853 insn2 = XVECEXP (insn2, 0, 0);
26854 if (GET_CODE (insn2) == SET
26855 && GET_CODE (SET_SRC (insn2)) == MULT
26856 && GET_MODE (SET_SRC (insn2)) == SImode)
26859 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26862 con = DEP_CON (dep);
26863 if (!NONDEBUG_INSN_P (con))
26865 insn1 = PATTERN (con);
26866 if (GET_CODE (insn1) == PARALLEL)
26867 insn1 = XVECEXP (insn1, 0, 0);
26869 if (GET_CODE (insn1) == SET
26870 && GET_CODE (SET_SRC (insn1)) == MULT
26871 && GET_MODE (SET_SRC (insn1)) == SImode)
26873 sd_iterator_def sd_it1;
26875 /* Check if there is no other dependee for IMUL. */
26877 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26880 pro = DEP_PRO (dep1);
26881 if (!NONDEBUG_INSN_P (pro))
26896 /* Try to find the best candidate on the top of ready list if two insns
26897 have the same priority - candidate is best if its dependees were
26898 scheduled earlier. Applied for Silvermont only.
26899 Return true if top 2 insns must be interchanged. */
26901 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26903 rtx_insn *top = ready[n_ready - 1];
26904 rtx_insn *next = ready[n_ready - 2];
26906 sd_iterator_def sd_it;
26910 #define INSN_TICK(INSN) (HID (INSN)->tick)
26912 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26915 if (!NONDEBUG_INSN_P (top))
26917 if (!NONJUMP_INSN_P (top))
26919 if (!NONDEBUG_INSN_P (next))
26921 if (!NONJUMP_INSN_P (next))
26923 set = single_set (top);
26926 set = single_set (next);
26930 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26932 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26934 /* Determine winner more precise. */
26935 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26938 pro = DEP_PRO (dep);
26939 if (!NONDEBUG_INSN_P (pro))
26941 if (INSN_TICK (pro) > clock1)
26942 clock1 = INSN_TICK (pro);
26944 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26947 pro = DEP_PRO (dep);
26948 if (!NONDEBUG_INSN_P (pro))
26950 if (INSN_TICK (pro) > clock2)
26951 clock2 = INSN_TICK (pro);
26954 if (clock1 == clock2)
26956 /* Determine winner - load must win. */
26957 enum attr_memory memory1, memory2;
26958 memory1 = get_attr_memory (top);
26959 memory2 = get_attr_memory (next);
26960 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26963 return (bool) (clock2 < clock1);
26969 /* Perform possible reodering of ready list for Atom/Silvermont only.
26970 Return issue rate. */
26972 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26973 int *pn_ready, int clock_var)
26975 int issue_rate = -1;
26976 int n_ready = *pn_ready;
26981 /* Set up issue rate. */
26982 issue_rate = ix86_issue_rate ();
26984 /* Do reodering for BONNELL/SILVERMONT only. */
26985 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26988 /* Nothing to do if ready list contains only 1 instruction. */
26992 /* Do reodering for post-reload scheduler only. */
26993 if (!reload_completed)
26996 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26998 if (sched_verbose > 1)
26999 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
27000 INSN_UID (ready[index]));
27002 /* Put IMUL producer (ready[index]) at the top of ready list. */
27003 insn = ready[index];
27004 for (i = index; i < n_ready - 1; i++)
27005 ready[i] = ready[i + 1];
27006 ready[n_ready - 1] = insn;
27010 /* Skip selective scheduling since HID is not populated in it. */
27013 && swap_top_of_ready_list (ready, n_ready))
27015 if (sched_verbose > 1)
27016 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
27017 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
27018 /* Swap 2 top elements of ready list. */
27019 insn = ready[n_ready - 1];
27020 ready[n_ready - 1] = ready[n_ready - 2];
27021 ready[n_ready - 2] = insn;
27027 ix86_class_likely_spilled_p (reg_class_t);
27029 /* Returns true if lhs of insn is HW function argument register and set up
27030 is_spilled to true if it is likely spilled HW register. */
27032 insn_is_function_arg (rtx insn, bool* is_spilled)
27036 if (!NONDEBUG_INSN_P (insn))
27038 /* Call instructions are not movable, ignore it. */
27041 insn = PATTERN (insn);
27042 if (GET_CODE (insn) == PARALLEL)
27043 insn = XVECEXP (insn, 0, 0);
27044 if (GET_CODE (insn) != SET)
27046 dst = SET_DEST (insn);
27047 if (REG_P (dst) && HARD_REGISTER_P (dst)
27048 && ix86_function_arg_regno_p (REGNO (dst)))
27050 /* Is it likely spilled HW register? */
27051 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
27052 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
27053 *is_spilled = true;
27059 /* Add output dependencies for chain of function adjacent arguments if only
27060 there is a move to likely spilled HW register. Return first argument
27061 if at least one dependence was added or NULL otherwise. */
27063 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
27066 rtx_insn *last = call;
27067 rtx_insn *first_arg = NULL;
27068 bool is_spilled = false;
27070 head = PREV_INSN (head);
27072 /* Find nearest to call argument passing instruction. */
27075 last = PREV_INSN (last);
27078 if (!NONDEBUG_INSN_P (last))
27080 if (insn_is_function_arg (last, &is_spilled))
27088 insn = PREV_INSN (last);
27089 if (!INSN_P (insn))
27093 if (!NONDEBUG_INSN_P (insn))
27098 if (insn_is_function_arg (insn, &is_spilled))
27100 /* Add output depdendence between two function arguments if chain
27101 of output arguments contains likely spilled HW registers. */
27103 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27104 first_arg = last = insn;
27114 /* Add output or anti dependency from insn to first_arg to restrict its code
27117 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27122 /* Add anti dependencies for bounds stores. */
27124 && GET_CODE (PATTERN (insn)) == PARALLEL
27125 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27126 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27128 add_dependence (first_arg, insn, REG_DEP_ANTI);
27132 set = single_set (insn);
27135 tmp = SET_DEST (set);
27138 /* Add output dependency to the first function argument. */
27139 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27142 /* Add anti dependency. */
27143 add_dependence (first_arg, insn, REG_DEP_ANTI);
27146 /* Avoid cross block motion of function argument through adding dependency
27147 from the first non-jump instruction in bb. */
27149 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27151 rtx_insn *insn = BB_END (bb);
27155 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27157 rtx set = single_set (insn);
27160 avoid_func_arg_motion (arg, insn);
27164 if (insn == BB_HEAD (bb))
27166 insn = PREV_INSN (insn);
27170 /* Hook for pre-reload schedule - avoid motion of function arguments
27171 passed in likely spilled HW registers. */
27173 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27176 rtx_insn *first_arg = NULL;
27177 if (reload_completed)
27179 while (head != tail && DEBUG_INSN_P (head))
27180 head = NEXT_INSN (head);
27181 for (insn = tail; insn != head; insn = PREV_INSN (insn))
27182 if (INSN_P (insn) && CALL_P (insn))
27184 first_arg = add_parameter_dependencies (insn, head);
27187 /* Add dependee for first argument to predecessors if only
27188 region contains more than one block. */
27189 basic_block bb = BLOCK_FOR_INSN (insn);
27190 int rgn = CONTAINING_RGN (bb->index);
27191 int nr_blks = RGN_NR_BLOCKS (rgn);
27192 /* Skip trivial regions and region head blocks that can have
27193 predecessors outside of region. */
27194 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27199 /* Regions are SCCs with the exception of selective
27200 scheduling with pipelining of outer blocks enabled.
27201 So also check that immediate predecessors of a non-head
27202 block are in the same region. */
27203 FOR_EACH_EDGE (e, ei, bb->preds)
27205 /* Avoid creating of loop-carried dependencies through
27206 using topological ordering in the region. */
27207 if (rgn == CONTAINING_RGN (e->src->index)
27208 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27209 add_dependee_for_func_arg (first_arg, e->src);
27217 else if (first_arg)
27218 avoid_func_arg_motion (first_arg, insn);
27221 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27222 HW registers to maximum, to schedule them at soon as possible. These are
27223 moves from function argument registers at the top of the function entry
27224 and moves from function return value registers after call. */
27226 ix86_adjust_priority (rtx_insn *insn, int priority)
27230 if (reload_completed)
27233 if (!NONDEBUG_INSN_P (insn))
27236 set = single_set (insn);
27239 rtx tmp = SET_SRC (set);
27241 && HARD_REGISTER_P (tmp)
27242 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27243 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27244 return current_sched_info->sched_max_insns_priority;
27250 /* Model decoder of Core 2/i7.
27251 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27252 track the instruction fetch block boundaries and make sure that long
27253 (9+ bytes) instructions are assigned to D0. */
27255 /* Maximum length of an insn that can be handled by
27256 a secondary decoder unit. '8' for Core 2/i7. */
27257 static int core2i7_secondary_decoder_max_insn_size;
27259 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27260 '16' for Core 2/i7. */
27261 static int core2i7_ifetch_block_size;
27263 /* Maximum number of instructions decoder can handle per cycle.
27264 '6' for Core 2/i7. */
27265 static int core2i7_ifetch_block_max_insns;
27267 typedef struct ix86_first_cycle_multipass_data_ *
27268 ix86_first_cycle_multipass_data_t;
27269 typedef const struct ix86_first_cycle_multipass_data_ *
27270 const_ix86_first_cycle_multipass_data_t;
27272 /* A variable to store target state across calls to max_issue within
27274 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27275 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27277 /* Initialize DATA. */
27279 core2i7_first_cycle_multipass_init (void *_data)
27281 ix86_first_cycle_multipass_data_t data
27282 = (ix86_first_cycle_multipass_data_t) _data;
27284 data->ifetch_block_len = 0;
27285 data->ifetch_block_n_insns = 0;
27286 data->ready_try_change = NULL;
27287 data->ready_try_change_size = 0;
27290 /* Advancing the cycle; reset ifetch block counts. */
27292 core2i7_dfa_post_advance_cycle (void)
27294 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27296 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27298 data->ifetch_block_len = 0;
27299 data->ifetch_block_n_insns = 0;
27302 static int min_insn_size (rtx_insn *);
27304 /* Filter out insns from ready_try that the core will not be able to issue
27305 on current cycle due to decoder. */
27307 core2i7_first_cycle_multipass_filter_ready_try
27308 (const_ix86_first_cycle_multipass_data_t data,
27309 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27316 if (ready_try[n_ready])
27319 insn = get_ready_element (n_ready);
27320 insn_size = min_insn_size (insn);
27322 if (/* If this is a too long an insn for a secondary decoder ... */
27323 (!first_cycle_insn_p
27324 && insn_size > core2i7_secondary_decoder_max_insn_size)
27325 /* ... or it would not fit into the ifetch block ... */
27326 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27327 /* ... or the decoder is full already ... */
27328 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27329 /* ... mask the insn out. */
27331 ready_try[n_ready] = 1;
27333 if (data->ready_try_change)
27334 bitmap_set_bit (data->ready_try_change, n_ready);
27339 /* Prepare for a new round of multipass lookahead scheduling. */
27341 core2i7_first_cycle_multipass_begin (void *_data,
27342 signed char *ready_try, int n_ready,
27343 bool first_cycle_insn_p)
27345 ix86_first_cycle_multipass_data_t data
27346 = (ix86_first_cycle_multipass_data_t) _data;
27347 const_ix86_first_cycle_multipass_data_t prev_data
27348 = ix86_first_cycle_multipass_data;
27350 /* Restore the state from the end of the previous round. */
27351 data->ifetch_block_len = prev_data->ifetch_block_len;
27352 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27354 /* Filter instructions that cannot be issued on current cycle due to
27355 decoder restrictions. */
27356 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27357 first_cycle_insn_p);
27360 /* INSN is being issued in current solution. Account for its impact on
27361 the decoder model. */
27363 core2i7_first_cycle_multipass_issue (void *_data,
27364 signed char *ready_try, int n_ready,
27365 rtx_insn *insn, const void *_prev_data)
27367 ix86_first_cycle_multipass_data_t data
27368 = (ix86_first_cycle_multipass_data_t) _data;
27369 const_ix86_first_cycle_multipass_data_t prev_data
27370 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27372 int insn_size = min_insn_size (insn);
27374 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27375 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27376 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27377 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27379 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27380 if (!data->ready_try_change)
27382 data->ready_try_change = sbitmap_alloc (n_ready);
27383 data->ready_try_change_size = n_ready;
27385 else if (data->ready_try_change_size < n_ready)
27387 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27389 data->ready_try_change_size = n_ready;
27391 bitmap_clear (data->ready_try_change);
27393 /* Filter out insns from ready_try that the core will not be able to issue
27394 on current cycle due to decoder. */
27395 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27399 /* Revert the effect on ready_try. */
27401 core2i7_first_cycle_multipass_backtrack (const void *_data,
27402 signed char *ready_try,
27403 int n_ready ATTRIBUTE_UNUSED)
27405 const_ix86_first_cycle_multipass_data_t data
27406 = (const_ix86_first_cycle_multipass_data_t) _data;
27407 unsigned int i = 0;
27408 sbitmap_iterator sbi;
27410 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27411 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27417 /* Save the result of multipass lookahead scheduling for the next round. */
27419 core2i7_first_cycle_multipass_end (const void *_data)
27421 const_ix86_first_cycle_multipass_data_t data
27422 = (const_ix86_first_cycle_multipass_data_t) _data;
27423 ix86_first_cycle_multipass_data_t next_data
27424 = ix86_first_cycle_multipass_data;
27428 next_data->ifetch_block_len = data->ifetch_block_len;
27429 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27433 /* Deallocate target data. */
27435 core2i7_first_cycle_multipass_fini (void *_data)
27437 ix86_first_cycle_multipass_data_t data
27438 = (ix86_first_cycle_multipass_data_t) _data;
27440 if (data->ready_try_change)
27442 sbitmap_free (data->ready_try_change);
27443 data->ready_try_change = NULL;
27444 data->ready_try_change_size = 0;
27448 /* Prepare for scheduling pass. */
27450 ix86_sched_init_global (FILE *, int, int)
27452 /* Install scheduling hooks for current CPU. Some of these hooks are used
27453 in time-critical parts of the scheduler, so we only set them up when
27454 they are actually used. */
27457 case PROCESSOR_CORE2:
27458 case PROCESSOR_NEHALEM:
27459 case PROCESSOR_SANDYBRIDGE:
27460 case PROCESSOR_HASWELL:
27461 /* Do not perform multipass scheduling for pre-reload schedule
27462 to save compile time. */
27463 if (reload_completed)
27465 targetm.sched.dfa_post_advance_cycle
27466 = core2i7_dfa_post_advance_cycle;
27467 targetm.sched.first_cycle_multipass_init
27468 = core2i7_first_cycle_multipass_init;
27469 targetm.sched.first_cycle_multipass_begin
27470 = core2i7_first_cycle_multipass_begin;
27471 targetm.sched.first_cycle_multipass_issue
27472 = core2i7_first_cycle_multipass_issue;
27473 targetm.sched.first_cycle_multipass_backtrack
27474 = core2i7_first_cycle_multipass_backtrack;
27475 targetm.sched.first_cycle_multipass_end
27476 = core2i7_first_cycle_multipass_end;
27477 targetm.sched.first_cycle_multipass_fini
27478 = core2i7_first_cycle_multipass_fini;
27480 /* Set decoder parameters. */
27481 core2i7_secondary_decoder_max_insn_size = 8;
27482 core2i7_ifetch_block_size = 16;
27483 core2i7_ifetch_block_max_insns = 6;
27486 /* ... Fall through ... */
27488 targetm.sched.dfa_post_advance_cycle = NULL;
27489 targetm.sched.first_cycle_multipass_init = NULL;
27490 targetm.sched.first_cycle_multipass_begin = NULL;
27491 targetm.sched.first_cycle_multipass_issue = NULL;
27492 targetm.sched.first_cycle_multipass_backtrack = NULL;
27493 targetm.sched.first_cycle_multipass_end = NULL;
27494 targetm.sched.first_cycle_multipass_fini = NULL;
27500 /* Compute the alignment given to a constant that is being placed in memory.
27501 EXP is the constant and ALIGN is the alignment that the object would
27503 The value of this function is used instead of that alignment to align
27507 ix86_constant_alignment (tree exp, int align)
27509 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27510 || TREE_CODE (exp) == INTEGER_CST)
27512 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27514 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27517 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27518 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27519 return BITS_PER_WORD;
27524 /* Compute the alignment for a static variable.
27525 TYPE is the data type, and ALIGN is the alignment that
27526 the object would ordinarily have. The value of this function is used
27527 instead of that alignment to align the object. */
27530 ix86_data_alignment (tree type, int align, bool opt)
27532 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27533 for symbols from other compilation units or symbols that don't need
27534 to bind locally. In order to preserve some ABI compatibility with
27535 those compilers, ensure we don't decrease alignment from what we
27538 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27540 /* A data structure, equal or greater than the size of a cache line
27541 (64 bytes in the Pentium 4 and other recent Intel processors, including
27542 processors based on Intel Core microarchitecture) should be aligned
27543 so that its base address is a multiple of a cache line size. */
27546 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27548 if (max_align < BITS_PER_WORD)
27549 max_align = BITS_PER_WORD;
27551 switch (ix86_align_data_type)
27553 case ix86_align_data_type_abi: opt = false; break;
27554 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27555 case ix86_align_data_type_cacheline: break;
27559 && AGGREGATE_TYPE_P (type)
27560 && TYPE_SIZE (type)
27561 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27563 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27564 && align < max_align_compat)
27565 align = max_align_compat;
27566 if (wi::geu_p (TYPE_SIZE (type), max_align)
27567 && align < max_align)
27571 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27572 to 16byte boundary. */
27575 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27576 && TYPE_SIZE (type)
27577 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27578 && wi::geu_p (TYPE_SIZE (type), 128)
27586 if (TREE_CODE (type) == ARRAY_TYPE)
27588 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27590 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27593 else if (TREE_CODE (type) == COMPLEX_TYPE)
27596 if (TYPE_MODE (type) == DCmode && align < 64)
27598 if ((TYPE_MODE (type) == XCmode
27599 || TYPE_MODE (type) == TCmode) && align < 128)
27602 else if ((TREE_CODE (type) == RECORD_TYPE
27603 || TREE_CODE (type) == UNION_TYPE
27604 || TREE_CODE (type) == QUAL_UNION_TYPE)
27605 && TYPE_FIELDS (type))
27607 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27609 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27612 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27613 || TREE_CODE (type) == INTEGER_TYPE)
27615 if (TYPE_MODE (type) == DFmode && align < 64)
27617 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27624 /* Compute the alignment for a local variable or a stack slot. EXP is
27625 the data type or decl itself, MODE is the widest mode available and
27626 ALIGN is the alignment that the object would ordinarily have. The
27627 value of this macro is used instead of that alignment to align the
27631 ix86_local_alignment (tree exp, machine_mode mode,
27632 unsigned int align)
27636 if (exp && DECL_P (exp))
27638 type = TREE_TYPE (exp);
27647 /* Don't do dynamic stack realignment for long long objects with
27648 -mpreferred-stack-boundary=2. */
27651 && ix86_preferred_stack_boundary < 64
27652 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27653 && (!type || !TYPE_USER_ALIGN (type))
27654 && (!decl || !DECL_USER_ALIGN (decl)))
27657 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27658 register in MODE. We will return the largest alignment of XF
27662 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27663 align = GET_MODE_ALIGNMENT (DFmode);
27667 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27668 to 16byte boundary. Exact wording is:
27670 An array uses the same alignment as its elements, except that a local or
27671 global array variable of length at least 16 bytes or
27672 a C99 variable-length array variable always has alignment of at least 16 bytes.
27674 This was added to allow use of aligned SSE instructions at arrays. This
27675 rule is meant for static storage (where compiler can not do the analysis
27676 by itself). We follow it for automatic variables only when convenient.
27677 We fully control everything in the function compiled and functions from
27678 other unit can not rely on the alignment.
27680 Exclude va_list type. It is the common case of local array where
27681 we can not benefit from the alignment.
27683 TODO: Probably one should optimize for size only when var is not escaping. */
27684 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27687 if (AGGREGATE_TYPE_P (type)
27688 && (va_list_type_node == NULL_TREE
27689 || (TYPE_MAIN_VARIANT (type)
27690 != TYPE_MAIN_VARIANT (va_list_type_node)))
27691 && TYPE_SIZE (type)
27692 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27693 && wi::geu_p (TYPE_SIZE (type), 16)
27697 if (TREE_CODE (type) == ARRAY_TYPE)
27699 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27701 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27704 else if (TREE_CODE (type) == COMPLEX_TYPE)
27706 if (TYPE_MODE (type) == DCmode && align < 64)
27708 if ((TYPE_MODE (type) == XCmode
27709 || TYPE_MODE (type) == TCmode) && align < 128)
27712 else if ((TREE_CODE (type) == RECORD_TYPE
27713 || TREE_CODE (type) == UNION_TYPE
27714 || TREE_CODE (type) == QUAL_UNION_TYPE)
27715 && TYPE_FIELDS (type))
27717 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27719 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27722 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27723 || TREE_CODE (type) == INTEGER_TYPE)
27726 if (TYPE_MODE (type) == DFmode && align < 64)
27728 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27734 /* Compute the minimum required alignment for dynamic stack realignment
27735 purposes for a local variable, parameter or a stack slot. EXP is
27736 the data type or decl itself, MODE is its mode and ALIGN is the
27737 alignment that the object would ordinarily have. */
27740 ix86_minimum_alignment (tree exp, machine_mode mode,
27741 unsigned int align)
27745 if (exp && DECL_P (exp))
27747 type = TREE_TYPE (exp);
27756 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27759 /* Don't do dynamic stack realignment for long long objects with
27760 -mpreferred-stack-boundary=2. */
27761 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27762 && (!type || !TYPE_USER_ALIGN (type))
27763 && (!decl || !DECL_USER_ALIGN (decl)))
27769 /* Find a location for the static chain incoming to a nested function.
27770 This is a register, unless all free registers are used by arguments. */
27773 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27777 /* While this function won't be called by the middle-end when a static
27778 chain isn't needed, it's also used throughout the backend so it's
27779 easiest to keep this check centralized. */
27780 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27785 /* We always use R10 in 64-bit mode. */
27790 const_tree fntype, fndecl;
27793 /* By default in 32-bit mode we use ECX to pass the static chain. */
27796 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27798 fntype = TREE_TYPE (fndecl_or_type);
27799 fndecl = fndecl_or_type;
27803 fntype = fndecl_or_type;
27807 ccvt = ix86_get_callcvt (fntype);
27808 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27810 /* Fastcall functions use ecx/edx for arguments, which leaves
27811 us with EAX for the static chain.
27812 Thiscall functions use ecx for arguments, which also
27813 leaves us with EAX for the static chain. */
27816 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27818 /* Thiscall functions use ecx for arguments, which leaves
27819 us with EAX and EDX for the static chain.
27820 We are using for abi-compatibility EAX. */
27823 else if (ix86_function_regparm (fntype, fndecl) == 3)
27825 /* For regparm 3, we have no free call-clobbered registers in
27826 which to store the static chain. In order to implement this,
27827 we have the trampoline push the static chain to the stack.
27828 However, we can't push a value below the return address when
27829 we call the nested function directly, so we have to use an
27830 alternate entry point. For this we use ESI, and have the
27831 alternate entry point push ESI, so that things appear the
27832 same once we're executing the nested function. */
27835 if (fndecl == current_function_decl)
27836 ix86_static_chain_on_stack = true;
27837 return gen_frame_mem (SImode,
27838 plus_constant (Pmode,
27839 arg_pointer_rtx, -8));
27845 return gen_rtx_REG (Pmode, regno);
27848 /* Emit RTL insns to initialize the variable parts of a trampoline.
27849 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27850 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27851 to be passed to the target function. */
27854 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27860 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27866 /* Load the function address to r11. Try to load address using
27867 the shorter movl instead of movabs. We may want to support
27868 movq for kernel mode, but kernel does not use trampolines at
27869 the moment. FNADDR is a 32bit address and may not be in
27870 DImode when ptr_mode == SImode. Always use movl in this
27872 if (ptr_mode == SImode
27873 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27875 fnaddr = copy_addr_to_reg (fnaddr);
27877 mem = adjust_address (m_tramp, HImode, offset);
27878 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27880 mem = adjust_address (m_tramp, SImode, offset + 2);
27881 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27886 mem = adjust_address (m_tramp, HImode, offset);
27887 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27889 mem = adjust_address (m_tramp, DImode, offset + 2);
27890 emit_move_insn (mem, fnaddr);
27894 /* Load static chain using movabs to r10. Use the shorter movl
27895 instead of movabs when ptr_mode == SImode. */
27896 if (ptr_mode == SImode)
27907 mem = adjust_address (m_tramp, HImode, offset);
27908 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27910 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27911 emit_move_insn (mem, chain_value);
27914 /* Jump to r11; the last (unused) byte is a nop, only there to
27915 pad the write out to a single 32-bit store. */
27916 mem = adjust_address (m_tramp, SImode, offset);
27917 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27924 /* Depending on the static chain location, either load a register
27925 with a constant, or push the constant to the stack. All of the
27926 instructions are the same size. */
27927 chain = ix86_static_chain (fndecl, true);
27930 switch (REGNO (chain))
27933 opcode = 0xb8; break;
27935 opcode = 0xb9; break;
27937 gcc_unreachable ();
27943 mem = adjust_address (m_tramp, QImode, offset);
27944 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27946 mem = adjust_address (m_tramp, SImode, offset + 1);
27947 emit_move_insn (mem, chain_value);
27950 mem = adjust_address (m_tramp, QImode, offset);
27951 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27953 mem = adjust_address (m_tramp, SImode, offset + 1);
27955 /* Compute offset from the end of the jmp to the target function.
27956 In the case in which the trampoline stores the static chain on
27957 the stack, we need to skip the first insn which pushes the
27958 (call-saved) register static chain; this push is 1 byte. */
27960 disp = expand_binop (SImode, sub_optab, fnaddr,
27961 plus_constant (Pmode, XEXP (m_tramp, 0),
27962 offset - (MEM_P (chain) ? 1 : 0)),
27963 NULL_RTX, 1, OPTAB_DIRECT);
27964 emit_move_insn (mem, disp);
27967 gcc_assert (offset <= TRAMPOLINE_SIZE);
27969 #ifdef HAVE_ENABLE_EXECUTE_STACK
27970 #ifdef CHECK_EXECUTE_STACK_ENABLED
27971 if (CHECK_EXECUTE_STACK_ENABLED)
27973 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27974 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27978 /* The following file contains several enumerations and data structures
27979 built from the definitions in i386-builtin-types.def. */
27981 #include "i386-builtin-types.inc"
27983 /* Table for the ix86 builtin non-function types. */
27984 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27986 /* Retrieve an element from the above table, building some of
27987 the types lazily. */
27990 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27992 unsigned int index;
27995 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27997 type = ix86_builtin_type_tab[(int) tcode];
28001 gcc_assert (tcode > IX86_BT_LAST_PRIM);
28002 if (tcode <= IX86_BT_LAST_VECT)
28006 index = tcode - IX86_BT_LAST_PRIM - 1;
28007 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
28008 mode = ix86_builtin_type_vect_mode[index];
28010 type = build_vector_type_for_mode (itype, mode);
28016 index = tcode - IX86_BT_LAST_VECT - 1;
28017 if (tcode <= IX86_BT_LAST_PTR)
28018 quals = TYPE_UNQUALIFIED;
28020 quals = TYPE_QUAL_CONST;
28022 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
28023 if (quals != TYPE_UNQUALIFIED)
28024 itype = build_qualified_type (itype, quals);
28026 type = build_pointer_type (itype);
28029 ix86_builtin_type_tab[(int) tcode] = type;
28033 /* Table for the ix86 builtin function types. */
28034 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
28036 /* Retrieve an element from the above table, building some of
28037 the types lazily. */
28040 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28044 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28046 type = ix86_builtin_func_type_tab[(int) tcode];
28050 if (tcode <= IX86_BT_LAST_FUNC)
28052 unsigned start = ix86_builtin_func_start[(int) tcode];
28053 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28054 tree rtype, atype, args = void_list_node;
28057 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28058 for (i = after - 1; i > start; --i)
28060 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28061 args = tree_cons (NULL, atype, args);
28064 type = build_function_type (rtype, args);
28068 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28069 enum ix86_builtin_func_type icode;
28071 icode = ix86_builtin_func_alias_base[index];
28072 type = ix86_get_builtin_func_type (icode);
28075 ix86_builtin_func_type_tab[(int) tcode] = type;
28080 /* Codes for all the SSE/MMX builtins. */
28083 IX86_BUILTIN_ADDPS,
28084 IX86_BUILTIN_ADDSS,
28085 IX86_BUILTIN_DIVPS,
28086 IX86_BUILTIN_DIVSS,
28087 IX86_BUILTIN_MULPS,
28088 IX86_BUILTIN_MULSS,
28089 IX86_BUILTIN_SUBPS,
28090 IX86_BUILTIN_SUBSS,
28092 IX86_BUILTIN_CMPEQPS,
28093 IX86_BUILTIN_CMPLTPS,
28094 IX86_BUILTIN_CMPLEPS,
28095 IX86_BUILTIN_CMPGTPS,
28096 IX86_BUILTIN_CMPGEPS,
28097 IX86_BUILTIN_CMPNEQPS,
28098 IX86_BUILTIN_CMPNLTPS,
28099 IX86_BUILTIN_CMPNLEPS,
28100 IX86_BUILTIN_CMPNGTPS,
28101 IX86_BUILTIN_CMPNGEPS,
28102 IX86_BUILTIN_CMPORDPS,
28103 IX86_BUILTIN_CMPUNORDPS,
28104 IX86_BUILTIN_CMPEQSS,
28105 IX86_BUILTIN_CMPLTSS,
28106 IX86_BUILTIN_CMPLESS,
28107 IX86_BUILTIN_CMPNEQSS,
28108 IX86_BUILTIN_CMPNLTSS,
28109 IX86_BUILTIN_CMPNLESS,
28110 IX86_BUILTIN_CMPORDSS,
28111 IX86_BUILTIN_CMPUNORDSS,
28113 IX86_BUILTIN_COMIEQSS,
28114 IX86_BUILTIN_COMILTSS,
28115 IX86_BUILTIN_COMILESS,
28116 IX86_BUILTIN_COMIGTSS,
28117 IX86_BUILTIN_COMIGESS,
28118 IX86_BUILTIN_COMINEQSS,
28119 IX86_BUILTIN_UCOMIEQSS,
28120 IX86_BUILTIN_UCOMILTSS,
28121 IX86_BUILTIN_UCOMILESS,
28122 IX86_BUILTIN_UCOMIGTSS,
28123 IX86_BUILTIN_UCOMIGESS,
28124 IX86_BUILTIN_UCOMINEQSS,
28126 IX86_BUILTIN_CVTPI2PS,
28127 IX86_BUILTIN_CVTPS2PI,
28128 IX86_BUILTIN_CVTSI2SS,
28129 IX86_BUILTIN_CVTSI642SS,
28130 IX86_BUILTIN_CVTSS2SI,
28131 IX86_BUILTIN_CVTSS2SI64,
28132 IX86_BUILTIN_CVTTPS2PI,
28133 IX86_BUILTIN_CVTTSS2SI,
28134 IX86_BUILTIN_CVTTSS2SI64,
28136 IX86_BUILTIN_MAXPS,
28137 IX86_BUILTIN_MAXSS,
28138 IX86_BUILTIN_MINPS,
28139 IX86_BUILTIN_MINSS,
28141 IX86_BUILTIN_LOADUPS,
28142 IX86_BUILTIN_STOREUPS,
28143 IX86_BUILTIN_MOVSS,
28145 IX86_BUILTIN_MOVHLPS,
28146 IX86_BUILTIN_MOVLHPS,
28147 IX86_BUILTIN_LOADHPS,
28148 IX86_BUILTIN_LOADLPS,
28149 IX86_BUILTIN_STOREHPS,
28150 IX86_BUILTIN_STORELPS,
28152 IX86_BUILTIN_MASKMOVQ,
28153 IX86_BUILTIN_MOVMSKPS,
28154 IX86_BUILTIN_PMOVMSKB,
28156 IX86_BUILTIN_MOVNTPS,
28157 IX86_BUILTIN_MOVNTQ,
28159 IX86_BUILTIN_LOADDQU,
28160 IX86_BUILTIN_STOREDQU,
28162 IX86_BUILTIN_PACKSSWB,
28163 IX86_BUILTIN_PACKSSDW,
28164 IX86_BUILTIN_PACKUSWB,
28166 IX86_BUILTIN_PADDB,
28167 IX86_BUILTIN_PADDW,
28168 IX86_BUILTIN_PADDD,
28169 IX86_BUILTIN_PADDQ,
28170 IX86_BUILTIN_PADDSB,
28171 IX86_BUILTIN_PADDSW,
28172 IX86_BUILTIN_PADDUSB,
28173 IX86_BUILTIN_PADDUSW,
28174 IX86_BUILTIN_PSUBB,
28175 IX86_BUILTIN_PSUBW,
28176 IX86_BUILTIN_PSUBD,
28177 IX86_BUILTIN_PSUBQ,
28178 IX86_BUILTIN_PSUBSB,
28179 IX86_BUILTIN_PSUBSW,
28180 IX86_BUILTIN_PSUBUSB,
28181 IX86_BUILTIN_PSUBUSW,
28184 IX86_BUILTIN_PANDN,
28188 IX86_BUILTIN_PAVGB,
28189 IX86_BUILTIN_PAVGW,
28191 IX86_BUILTIN_PCMPEQB,
28192 IX86_BUILTIN_PCMPEQW,
28193 IX86_BUILTIN_PCMPEQD,
28194 IX86_BUILTIN_PCMPGTB,
28195 IX86_BUILTIN_PCMPGTW,
28196 IX86_BUILTIN_PCMPGTD,
28198 IX86_BUILTIN_PMADDWD,
28200 IX86_BUILTIN_PMAXSW,
28201 IX86_BUILTIN_PMAXUB,
28202 IX86_BUILTIN_PMINSW,
28203 IX86_BUILTIN_PMINUB,
28205 IX86_BUILTIN_PMULHUW,
28206 IX86_BUILTIN_PMULHW,
28207 IX86_BUILTIN_PMULLW,
28209 IX86_BUILTIN_PSADBW,
28210 IX86_BUILTIN_PSHUFW,
28212 IX86_BUILTIN_PSLLW,
28213 IX86_BUILTIN_PSLLD,
28214 IX86_BUILTIN_PSLLQ,
28215 IX86_BUILTIN_PSRAW,
28216 IX86_BUILTIN_PSRAD,
28217 IX86_BUILTIN_PSRLW,
28218 IX86_BUILTIN_PSRLD,
28219 IX86_BUILTIN_PSRLQ,
28220 IX86_BUILTIN_PSLLWI,
28221 IX86_BUILTIN_PSLLDI,
28222 IX86_BUILTIN_PSLLQI,
28223 IX86_BUILTIN_PSRAWI,
28224 IX86_BUILTIN_PSRADI,
28225 IX86_BUILTIN_PSRLWI,
28226 IX86_BUILTIN_PSRLDI,
28227 IX86_BUILTIN_PSRLQI,
28229 IX86_BUILTIN_PUNPCKHBW,
28230 IX86_BUILTIN_PUNPCKHWD,
28231 IX86_BUILTIN_PUNPCKHDQ,
28232 IX86_BUILTIN_PUNPCKLBW,
28233 IX86_BUILTIN_PUNPCKLWD,
28234 IX86_BUILTIN_PUNPCKLDQ,
28236 IX86_BUILTIN_SHUFPS,
28238 IX86_BUILTIN_RCPPS,
28239 IX86_BUILTIN_RCPSS,
28240 IX86_BUILTIN_RSQRTPS,
28241 IX86_BUILTIN_RSQRTPS_NR,
28242 IX86_BUILTIN_RSQRTSS,
28243 IX86_BUILTIN_RSQRTF,
28244 IX86_BUILTIN_SQRTPS,
28245 IX86_BUILTIN_SQRTPS_NR,
28246 IX86_BUILTIN_SQRTSS,
28248 IX86_BUILTIN_UNPCKHPS,
28249 IX86_BUILTIN_UNPCKLPS,
28251 IX86_BUILTIN_ANDPS,
28252 IX86_BUILTIN_ANDNPS,
28254 IX86_BUILTIN_XORPS,
28257 IX86_BUILTIN_LDMXCSR,
28258 IX86_BUILTIN_STMXCSR,
28259 IX86_BUILTIN_SFENCE,
28261 IX86_BUILTIN_FXSAVE,
28262 IX86_BUILTIN_FXRSTOR,
28263 IX86_BUILTIN_FXSAVE64,
28264 IX86_BUILTIN_FXRSTOR64,
28266 IX86_BUILTIN_XSAVE,
28267 IX86_BUILTIN_XRSTOR,
28268 IX86_BUILTIN_XSAVE64,
28269 IX86_BUILTIN_XRSTOR64,
28271 IX86_BUILTIN_XSAVEOPT,
28272 IX86_BUILTIN_XSAVEOPT64,
28274 IX86_BUILTIN_XSAVEC,
28275 IX86_BUILTIN_XSAVEC64,
28277 IX86_BUILTIN_XSAVES,
28278 IX86_BUILTIN_XRSTORS,
28279 IX86_BUILTIN_XSAVES64,
28280 IX86_BUILTIN_XRSTORS64,
28282 /* 3DNow! Original */
28283 IX86_BUILTIN_FEMMS,
28284 IX86_BUILTIN_PAVGUSB,
28285 IX86_BUILTIN_PF2ID,
28286 IX86_BUILTIN_PFACC,
28287 IX86_BUILTIN_PFADD,
28288 IX86_BUILTIN_PFCMPEQ,
28289 IX86_BUILTIN_PFCMPGE,
28290 IX86_BUILTIN_PFCMPGT,
28291 IX86_BUILTIN_PFMAX,
28292 IX86_BUILTIN_PFMIN,
28293 IX86_BUILTIN_PFMUL,
28294 IX86_BUILTIN_PFRCP,
28295 IX86_BUILTIN_PFRCPIT1,
28296 IX86_BUILTIN_PFRCPIT2,
28297 IX86_BUILTIN_PFRSQIT1,
28298 IX86_BUILTIN_PFRSQRT,
28299 IX86_BUILTIN_PFSUB,
28300 IX86_BUILTIN_PFSUBR,
28301 IX86_BUILTIN_PI2FD,
28302 IX86_BUILTIN_PMULHRW,
28304 /* 3DNow! Athlon Extensions */
28305 IX86_BUILTIN_PF2IW,
28306 IX86_BUILTIN_PFNACC,
28307 IX86_BUILTIN_PFPNACC,
28308 IX86_BUILTIN_PI2FW,
28309 IX86_BUILTIN_PSWAPDSI,
28310 IX86_BUILTIN_PSWAPDSF,
28313 IX86_BUILTIN_ADDPD,
28314 IX86_BUILTIN_ADDSD,
28315 IX86_BUILTIN_DIVPD,
28316 IX86_BUILTIN_DIVSD,
28317 IX86_BUILTIN_MULPD,
28318 IX86_BUILTIN_MULSD,
28319 IX86_BUILTIN_SUBPD,
28320 IX86_BUILTIN_SUBSD,
28322 IX86_BUILTIN_CMPEQPD,
28323 IX86_BUILTIN_CMPLTPD,
28324 IX86_BUILTIN_CMPLEPD,
28325 IX86_BUILTIN_CMPGTPD,
28326 IX86_BUILTIN_CMPGEPD,
28327 IX86_BUILTIN_CMPNEQPD,
28328 IX86_BUILTIN_CMPNLTPD,
28329 IX86_BUILTIN_CMPNLEPD,
28330 IX86_BUILTIN_CMPNGTPD,
28331 IX86_BUILTIN_CMPNGEPD,
28332 IX86_BUILTIN_CMPORDPD,
28333 IX86_BUILTIN_CMPUNORDPD,
28334 IX86_BUILTIN_CMPEQSD,
28335 IX86_BUILTIN_CMPLTSD,
28336 IX86_BUILTIN_CMPLESD,
28337 IX86_BUILTIN_CMPNEQSD,
28338 IX86_BUILTIN_CMPNLTSD,
28339 IX86_BUILTIN_CMPNLESD,
28340 IX86_BUILTIN_CMPORDSD,
28341 IX86_BUILTIN_CMPUNORDSD,
28343 IX86_BUILTIN_COMIEQSD,
28344 IX86_BUILTIN_COMILTSD,
28345 IX86_BUILTIN_COMILESD,
28346 IX86_BUILTIN_COMIGTSD,
28347 IX86_BUILTIN_COMIGESD,
28348 IX86_BUILTIN_COMINEQSD,
28349 IX86_BUILTIN_UCOMIEQSD,
28350 IX86_BUILTIN_UCOMILTSD,
28351 IX86_BUILTIN_UCOMILESD,
28352 IX86_BUILTIN_UCOMIGTSD,
28353 IX86_BUILTIN_UCOMIGESD,
28354 IX86_BUILTIN_UCOMINEQSD,
28356 IX86_BUILTIN_MAXPD,
28357 IX86_BUILTIN_MAXSD,
28358 IX86_BUILTIN_MINPD,
28359 IX86_BUILTIN_MINSD,
28361 IX86_BUILTIN_ANDPD,
28362 IX86_BUILTIN_ANDNPD,
28364 IX86_BUILTIN_XORPD,
28366 IX86_BUILTIN_SQRTPD,
28367 IX86_BUILTIN_SQRTSD,
28369 IX86_BUILTIN_UNPCKHPD,
28370 IX86_BUILTIN_UNPCKLPD,
28372 IX86_BUILTIN_SHUFPD,
28374 IX86_BUILTIN_LOADUPD,
28375 IX86_BUILTIN_STOREUPD,
28376 IX86_BUILTIN_MOVSD,
28378 IX86_BUILTIN_LOADHPD,
28379 IX86_BUILTIN_LOADLPD,
28381 IX86_BUILTIN_CVTDQ2PD,
28382 IX86_BUILTIN_CVTDQ2PS,
28384 IX86_BUILTIN_CVTPD2DQ,
28385 IX86_BUILTIN_CVTPD2PI,
28386 IX86_BUILTIN_CVTPD2PS,
28387 IX86_BUILTIN_CVTTPD2DQ,
28388 IX86_BUILTIN_CVTTPD2PI,
28390 IX86_BUILTIN_CVTPI2PD,
28391 IX86_BUILTIN_CVTSI2SD,
28392 IX86_BUILTIN_CVTSI642SD,
28394 IX86_BUILTIN_CVTSD2SI,
28395 IX86_BUILTIN_CVTSD2SI64,
28396 IX86_BUILTIN_CVTSD2SS,
28397 IX86_BUILTIN_CVTSS2SD,
28398 IX86_BUILTIN_CVTTSD2SI,
28399 IX86_BUILTIN_CVTTSD2SI64,
28401 IX86_BUILTIN_CVTPS2DQ,
28402 IX86_BUILTIN_CVTPS2PD,
28403 IX86_BUILTIN_CVTTPS2DQ,
28405 IX86_BUILTIN_MOVNTI,
28406 IX86_BUILTIN_MOVNTI64,
28407 IX86_BUILTIN_MOVNTPD,
28408 IX86_BUILTIN_MOVNTDQ,
28410 IX86_BUILTIN_MOVQ128,
28413 IX86_BUILTIN_MASKMOVDQU,
28414 IX86_BUILTIN_MOVMSKPD,
28415 IX86_BUILTIN_PMOVMSKB128,
28417 IX86_BUILTIN_PACKSSWB128,
28418 IX86_BUILTIN_PACKSSDW128,
28419 IX86_BUILTIN_PACKUSWB128,
28421 IX86_BUILTIN_PADDB128,
28422 IX86_BUILTIN_PADDW128,
28423 IX86_BUILTIN_PADDD128,
28424 IX86_BUILTIN_PADDQ128,
28425 IX86_BUILTIN_PADDSB128,
28426 IX86_BUILTIN_PADDSW128,
28427 IX86_BUILTIN_PADDUSB128,
28428 IX86_BUILTIN_PADDUSW128,
28429 IX86_BUILTIN_PSUBB128,
28430 IX86_BUILTIN_PSUBW128,
28431 IX86_BUILTIN_PSUBD128,
28432 IX86_BUILTIN_PSUBQ128,
28433 IX86_BUILTIN_PSUBSB128,
28434 IX86_BUILTIN_PSUBSW128,
28435 IX86_BUILTIN_PSUBUSB128,
28436 IX86_BUILTIN_PSUBUSW128,
28438 IX86_BUILTIN_PAND128,
28439 IX86_BUILTIN_PANDN128,
28440 IX86_BUILTIN_POR128,
28441 IX86_BUILTIN_PXOR128,
28443 IX86_BUILTIN_PAVGB128,
28444 IX86_BUILTIN_PAVGW128,
28446 IX86_BUILTIN_PCMPEQB128,
28447 IX86_BUILTIN_PCMPEQW128,
28448 IX86_BUILTIN_PCMPEQD128,
28449 IX86_BUILTIN_PCMPGTB128,
28450 IX86_BUILTIN_PCMPGTW128,
28451 IX86_BUILTIN_PCMPGTD128,
28453 IX86_BUILTIN_PMADDWD128,
28455 IX86_BUILTIN_PMAXSW128,
28456 IX86_BUILTIN_PMAXUB128,
28457 IX86_BUILTIN_PMINSW128,
28458 IX86_BUILTIN_PMINUB128,
28460 IX86_BUILTIN_PMULUDQ,
28461 IX86_BUILTIN_PMULUDQ128,
28462 IX86_BUILTIN_PMULHUW128,
28463 IX86_BUILTIN_PMULHW128,
28464 IX86_BUILTIN_PMULLW128,
28466 IX86_BUILTIN_PSADBW128,
28467 IX86_BUILTIN_PSHUFHW,
28468 IX86_BUILTIN_PSHUFLW,
28469 IX86_BUILTIN_PSHUFD,
28471 IX86_BUILTIN_PSLLDQI128,
28472 IX86_BUILTIN_PSLLWI128,
28473 IX86_BUILTIN_PSLLDI128,
28474 IX86_BUILTIN_PSLLQI128,
28475 IX86_BUILTIN_PSRAWI128,
28476 IX86_BUILTIN_PSRADI128,
28477 IX86_BUILTIN_PSRLDQI128,
28478 IX86_BUILTIN_PSRLWI128,
28479 IX86_BUILTIN_PSRLDI128,
28480 IX86_BUILTIN_PSRLQI128,
28482 IX86_BUILTIN_PSLLDQ128,
28483 IX86_BUILTIN_PSLLW128,
28484 IX86_BUILTIN_PSLLD128,
28485 IX86_BUILTIN_PSLLQ128,
28486 IX86_BUILTIN_PSRAW128,
28487 IX86_BUILTIN_PSRAD128,
28488 IX86_BUILTIN_PSRLW128,
28489 IX86_BUILTIN_PSRLD128,
28490 IX86_BUILTIN_PSRLQ128,
28492 IX86_BUILTIN_PUNPCKHBW128,
28493 IX86_BUILTIN_PUNPCKHWD128,
28494 IX86_BUILTIN_PUNPCKHDQ128,
28495 IX86_BUILTIN_PUNPCKHQDQ128,
28496 IX86_BUILTIN_PUNPCKLBW128,
28497 IX86_BUILTIN_PUNPCKLWD128,
28498 IX86_BUILTIN_PUNPCKLDQ128,
28499 IX86_BUILTIN_PUNPCKLQDQ128,
28501 IX86_BUILTIN_CLFLUSH,
28502 IX86_BUILTIN_MFENCE,
28503 IX86_BUILTIN_LFENCE,
28504 IX86_BUILTIN_PAUSE,
28506 IX86_BUILTIN_FNSTENV,
28507 IX86_BUILTIN_FLDENV,
28508 IX86_BUILTIN_FNSTSW,
28509 IX86_BUILTIN_FNCLEX,
28511 IX86_BUILTIN_BSRSI,
28512 IX86_BUILTIN_BSRDI,
28513 IX86_BUILTIN_RDPMC,
28514 IX86_BUILTIN_RDTSC,
28515 IX86_BUILTIN_RDTSCP,
28516 IX86_BUILTIN_ROLQI,
28517 IX86_BUILTIN_ROLHI,
28518 IX86_BUILTIN_RORQI,
28519 IX86_BUILTIN_RORHI,
28522 IX86_BUILTIN_ADDSUBPS,
28523 IX86_BUILTIN_HADDPS,
28524 IX86_BUILTIN_HSUBPS,
28525 IX86_BUILTIN_MOVSHDUP,
28526 IX86_BUILTIN_MOVSLDUP,
28527 IX86_BUILTIN_ADDSUBPD,
28528 IX86_BUILTIN_HADDPD,
28529 IX86_BUILTIN_HSUBPD,
28530 IX86_BUILTIN_LDDQU,
28532 IX86_BUILTIN_MONITOR,
28533 IX86_BUILTIN_MWAIT,
28536 IX86_BUILTIN_PHADDW,
28537 IX86_BUILTIN_PHADDD,
28538 IX86_BUILTIN_PHADDSW,
28539 IX86_BUILTIN_PHSUBW,
28540 IX86_BUILTIN_PHSUBD,
28541 IX86_BUILTIN_PHSUBSW,
28542 IX86_BUILTIN_PMADDUBSW,
28543 IX86_BUILTIN_PMULHRSW,
28544 IX86_BUILTIN_PSHUFB,
28545 IX86_BUILTIN_PSIGNB,
28546 IX86_BUILTIN_PSIGNW,
28547 IX86_BUILTIN_PSIGND,
28548 IX86_BUILTIN_PALIGNR,
28549 IX86_BUILTIN_PABSB,
28550 IX86_BUILTIN_PABSW,
28551 IX86_BUILTIN_PABSD,
28553 IX86_BUILTIN_PHADDW128,
28554 IX86_BUILTIN_PHADDD128,
28555 IX86_BUILTIN_PHADDSW128,
28556 IX86_BUILTIN_PHSUBW128,
28557 IX86_BUILTIN_PHSUBD128,
28558 IX86_BUILTIN_PHSUBSW128,
28559 IX86_BUILTIN_PMADDUBSW128,
28560 IX86_BUILTIN_PMULHRSW128,
28561 IX86_BUILTIN_PSHUFB128,
28562 IX86_BUILTIN_PSIGNB128,
28563 IX86_BUILTIN_PSIGNW128,
28564 IX86_BUILTIN_PSIGND128,
28565 IX86_BUILTIN_PALIGNR128,
28566 IX86_BUILTIN_PABSB128,
28567 IX86_BUILTIN_PABSW128,
28568 IX86_BUILTIN_PABSD128,
28570 /* AMDFAM10 - SSE4A New Instructions. */
28571 IX86_BUILTIN_MOVNTSD,
28572 IX86_BUILTIN_MOVNTSS,
28573 IX86_BUILTIN_EXTRQI,
28574 IX86_BUILTIN_EXTRQ,
28575 IX86_BUILTIN_INSERTQI,
28576 IX86_BUILTIN_INSERTQ,
28579 IX86_BUILTIN_BLENDPD,
28580 IX86_BUILTIN_BLENDPS,
28581 IX86_BUILTIN_BLENDVPD,
28582 IX86_BUILTIN_BLENDVPS,
28583 IX86_BUILTIN_PBLENDVB128,
28584 IX86_BUILTIN_PBLENDW128,
28589 IX86_BUILTIN_INSERTPS128,
28591 IX86_BUILTIN_MOVNTDQA,
28592 IX86_BUILTIN_MPSADBW128,
28593 IX86_BUILTIN_PACKUSDW128,
28594 IX86_BUILTIN_PCMPEQQ,
28595 IX86_BUILTIN_PHMINPOSUW128,
28597 IX86_BUILTIN_PMAXSB128,
28598 IX86_BUILTIN_PMAXSD128,
28599 IX86_BUILTIN_PMAXUD128,
28600 IX86_BUILTIN_PMAXUW128,
28602 IX86_BUILTIN_PMINSB128,
28603 IX86_BUILTIN_PMINSD128,
28604 IX86_BUILTIN_PMINUD128,
28605 IX86_BUILTIN_PMINUW128,
28607 IX86_BUILTIN_PMOVSXBW128,
28608 IX86_BUILTIN_PMOVSXBD128,
28609 IX86_BUILTIN_PMOVSXBQ128,
28610 IX86_BUILTIN_PMOVSXWD128,
28611 IX86_BUILTIN_PMOVSXWQ128,
28612 IX86_BUILTIN_PMOVSXDQ128,
28614 IX86_BUILTIN_PMOVZXBW128,
28615 IX86_BUILTIN_PMOVZXBD128,
28616 IX86_BUILTIN_PMOVZXBQ128,
28617 IX86_BUILTIN_PMOVZXWD128,
28618 IX86_BUILTIN_PMOVZXWQ128,
28619 IX86_BUILTIN_PMOVZXDQ128,
28621 IX86_BUILTIN_PMULDQ128,
28622 IX86_BUILTIN_PMULLD128,
28624 IX86_BUILTIN_ROUNDSD,
28625 IX86_BUILTIN_ROUNDSS,
28627 IX86_BUILTIN_ROUNDPD,
28628 IX86_BUILTIN_ROUNDPS,
28630 IX86_BUILTIN_FLOORPD,
28631 IX86_BUILTIN_CEILPD,
28632 IX86_BUILTIN_TRUNCPD,
28633 IX86_BUILTIN_RINTPD,
28634 IX86_BUILTIN_ROUNDPD_AZ,
28636 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28637 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28638 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28640 IX86_BUILTIN_FLOORPS,
28641 IX86_BUILTIN_CEILPS,
28642 IX86_BUILTIN_TRUNCPS,
28643 IX86_BUILTIN_RINTPS,
28644 IX86_BUILTIN_ROUNDPS_AZ,
28646 IX86_BUILTIN_FLOORPS_SFIX,
28647 IX86_BUILTIN_CEILPS_SFIX,
28648 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28650 IX86_BUILTIN_PTESTZ,
28651 IX86_BUILTIN_PTESTC,
28652 IX86_BUILTIN_PTESTNZC,
28654 IX86_BUILTIN_VEC_INIT_V2SI,
28655 IX86_BUILTIN_VEC_INIT_V4HI,
28656 IX86_BUILTIN_VEC_INIT_V8QI,
28657 IX86_BUILTIN_VEC_EXT_V2DF,
28658 IX86_BUILTIN_VEC_EXT_V2DI,
28659 IX86_BUILTIN_VEC_EXT_V4SF,
28660 IX86_BUILTIN_VEC_EXT_V4SI,
28661 IX86_BUILTIN_VEC_EXT_V8HI,
28662 IX86_BUILTIN_VEC_EXT_V2SI,
28663 IX86_BUILTIN_VEC_EXT_V4HI,
28664 IX86_BUILTIN_VEC_EXT_V16QI,
28665 IX86_BUILTIN_VEC_SET_V2DI,
28666 IX86_BUILTIN_VEC_SET_V4SF,
28667 IX86_BUILTIN_VEC_SET_V4SI,
28668 IX86_BUILTIN_VEC_SET_V8HI,
28669 IX86_BUILTIN_VEC_SET_V4HI,
28670 IX86_BUILTIN_VEC_SET_V16QI,
28672 IX86_BUILTIN_VEC_PACK_SFIX,
28673 IX86_BUILTIN_VEC_PACK_SFIX256,
28676 IX86_BUILTIN_CRC32QI,
28677 IX86_BUILTIN_CRC32HI,
28678 IX86_BUILTIN_CRC32SI,
28679 IX86_BUILTIN_CRC32DI,
28681 IX86_BUILTIN_PCMPESTRI128,
28682 IX86_BUILTIN_PCMPESTRM128,
28683 IX86_BUILTIN_PCMPESTRA128,
28684 IX86_BUILTIN_PCMPESTRC128,
28685 IX86_BUILTIN_PCMPESTRO128,
28686 IX86_BUILTIN_PCMPESTRS128,
28687 IX86_BUILTIN_PCMPESTRZ128,
28688 IX86_BUILTIN_PCMPISTRI128,
28689 IX86_BUILTIN_PCMPISTRM128,
28690 IX86_BUILTIN_PCMPISTRA128,
28691 IX86_BUILTIN_PCMPISTRC128,
28692 IX86_BUILTIN_PCMPISTRO128,
28693 IX86_BUILTIN_PCMPISTRS128,
28694 IX86_BUILTIN_PCMPISTRZ128,
28696 IX86_BUILTIN_PCMPGTQ,
28698 /* AES instructions */
28699 IX86_BUILTIN_AESENC128,
28700 IX86_BUILTIN_AESENCLAST128,
28701 IX86_BUILTIN_AESDEC128,
28702 IX86_BUILTIN_AESDECLAST128,
28703 IX86_BUILTIN_AESIMC128,
28704 IX86_BUILTIN_AESKEYGENASSIST128,
28706 /* PCLMUL instruction */
28707 IX86_BUILTIN_PCLMULQDQ128,
28710 IX86_BUILTIN_ADDPD256,
28711 IX86_BUILTIN_ADDPS256,
28712 IX86_BUILTIN_ADDSUBPD256,
28713 IX86_BUILTIN_ADDSUBPS256,
28714 IX86_BUILTIN_ANDPD256,
28715 IX86_BUILTIN_ANDPS256,
28716 IX86_BUILTIN_ANDNPD256,
28717 IX86_BUILTIN_ANDNPS256,
28718 IX86_BUILTIN_BLENDPD256,
28719 IX86_BUILTIN_BLENDPS256,
28720 IX86_BUILTIN_BLENDVPD256,
28721 IX86_BUILTIN_BLENDVPS256,
28722 IX86_BUILTIN_DIVPD256,
28723 IX86_BUILTIN_DIVPS256,
28724 IX86_BUILTIN_DPPS256,
28725 IX86_BUILTIN_HADDPD256,
28726 IX86_BUILTIN_HADDPS256,
28727 IX86_BUILTIN_HSUBPD256,
28728 IX86_BUILTIN_HSUBPS256,
28729 IX86_BUILTIN_MAXPD256,
28730 IX86_BUILTIN_MAXPS256,
28731 IX86_BUILTIN_MINPD256,
28732 IX86_BUILTIN_MINPS256,
28733 IX86_BUILTIN_MULPD256,
28734 IX86_BUILTIN_MULPS256,
28735 IX86_BUILTIN_ORPD256,
28736 IX86_BUILTIN_ORPS256,
28737 IX86_BUILTIN_SHUFPD256,
28738 IX86_BUILTIN_SHUFPS256,
28739 IX86_BUILTIN_SUBPD256,
28740 IX86_BUILTIN_SUBPS256,
28741 IX86_BUILTIN_XORPD256,
28742 IX86_BUILTIN_XORPS256,
28743 IX86_BUILTIN_CMPSD,
28744 IX86_BUILTIN_CMPSS,
28745 IX86_BUILTIN_CMPPD,
28746 IX86_BUILTIN_CMPPS,
28747 IX86_BUILTIN_CMPPD256,
28748 IX86_BUILTIN_CMPPS256,
28749 IX86_BUILTIN_CVTDQ2PD256,
28750 IX86_BUILTIN_CVTDQ2PS256,
28751 IX86_BUILTIN_CVTPD2PS256,
28752 IX86_BUILTIN_CVTPS2DQ256,
28753 IX86_BUILTIN_CVTPS2PD256,
28754 IX86_BUILTIN_CVTTPD2DQ256,
28755 IX86_BUILTIN_CVTPD2DQ256,
28756 IX86_BUILTIN_CVTTPS2DQ256,
28757 IX86_BUILTIN_EXTRACTF128PD256,
28758 IX86_BUILTIN_EXTRACTF128PS256,
28759 IX86_BUILTIN_EXTRACTF128SI256,
28760 IX86_BUILTIN_VZEROALL,
28761 IX86_BUILTIN_VZEROUPPER,
28762 IX86_BUILTIN_VPERMILVARPD,
28763 IX86_BUILTIN_VPERMILVARPS,
28764 IX86_BUILTIN_VPERMILVARPD256,
28765 IX86_BUILTIN_VPERMILVARPS256,
28766 IX86_BUILTIN_VPERMILPD,
28767 IX86_BUILTIN_VPERMILPS,
28768 IX86_BUILTIN_VPERMILPD256,
28769 IX86_BUILTIN_VPERMILPS256,
28770 IX86_BUILTIN_VPERMIL2PD,
28771 IX86_BUILTIN_VPERMIL2PS,
28772 IX86_BUILTIN_VPERMIL2PD256,
28773 IX86_BUILTIN_VPERMIL2PS256,
28774 IX86_BUILTIN_VPERM2F128PD256,
28775 IX86_BUILTIN_VPERM2F128PS256,
28776 IX86_BUILTIN_VPERM2F128SI256,
28777 IX86_BUILTIN_VBROADCASTSS,
28778 IX86_BUILTIN_VBROADCASTSD256,
28779 IX86_BUILTIN_VBROADCASTSS256,
28780 IX86_BUILTIN_VBROADCASTPD256,
28781 IX86_BUILTIN_VBROADCASTPS256,
28782 IX86_BUILTIN_VINSERTF128PD256,
28783 IX86_BUILTIN_VINSERTF128PS256,
28784 IX86_BUILTIN_VINSERTF128SI256,
28785 IX86_BUILTIN_LOADUPD256,
28786 IX86_BUILTIN_LOADUPS256,
28787 IX86_BUILTIN_STOREUPD256,
28788 IX86_BUILTIN_STOREUPS256,
28789 IX86_BUILTIN_LDDQU256,
28790 IX86_BUILTIN_MOVNTDQ256,
28791 IX86_BUILTIN_MOVNTPD256,
28792 IX86_BUILTIN_MOVNTPS256,
28793 IX86_BUILTIN_LOADDQU256,
28794 IX86_BUILTIN_STOREDQU256,
28795 IX86_BUILTIN_MASKLOADPD,
28796 IX86_BUILTIN_MASKLOADPS,
28797 IX86_BUILTIN_MASKSTOREPD,
28798 IX86_BUILTIN_MASKSTOREPS,
28799 IX86_BUILTIN_MASKLOADPD256,
28800 IX86_BUILTIN_MASKLOADPS256,
28801 IX86_BUILTIN_MASKSTOREPD256,
28802 IX86_BUILTIN_MASKSTOREPS256,
28803 IX86_BUILTIN_MOVSHDUP256,
28804 IX86_BUILTIN_MOVSLDUP256,
28805 IX86_BUILTIN_MOVDDUP256,
28807 IX86_BUILTIN_SQRTPD256,
28808 IX86_BUILTIN_SQRTPS256,
28809 IX86_BUILTIN_SQRTPS_NR256,
28810 IX86_BUILTIN_RSQRTPS256,
28811 IX86_BUILTIN_RSQRTPS_NR256,
28813 IX86_BUILTIN_RCPPS256,
28815 IX86_BUILTIN_ROUNDPD256,
28816 IX86_BUILTIN_ROUNDPS256,
28818 IX86_BUILTIN_FLOORPD256,
28819 IX86_BUILTIN_CEILPD256,
28820 IX86_BUILTIN_TRUNCPD256,
28821 IX86_BUILTIN_RINTPD256,
28822 IX86_BUILTIN_ROUNDPD_AZ256,
28824 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28825 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28826 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28828 IX86_BUILTIN_FLOORPS256,
28829 IX86_BUILTIN_CEILPS256,
28830 IX86_BUILTIN_TRUNCPS256,
28831 IX86_BUILTIN_RINTPS256,
28832 IX86_BUILTIN_ROUNDPS_AZ256,
28834 IX86_BUILTIN_FLOORPS_SFIX256,
28835 IX86_BUILTIN_CEILPS_SFIX256,
28836 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28838 IX86_BUILTIN_UNPCKHPD256,
28839 IX86_BUILTIN_UNPCKLPD256,
28840 IX86_BUILTIN_UNPCKHPS256,
28841 IX86_BUILTIN_UNPCKLPS256,
28843 IX86_BUILTIN_SI256_SI,
28844 IX86_BUILTIN_PS256_PS,
28845 IX86_BUILTIN_PD256_PD,
28846 IX86_BUILTIN_SI_SI256,
28847 IX86_BUILTIN_PS_PS256,
28848 IX86_BUILTIN_PD_PD256,
28850 IX86_BUILTIN_VTESTZPD,
28851 IX86_BUILTIN_VTESTCPD,
28852 IX86_BUILTIN_VTESTNZCPD,
28853 IX86_BUILTIN_VTESTZPS,
28854 IX86_BUILTIN_VTESTCPS,
28855 IX86_BUILTIN_VTESTNZCPS,
28856 IX86_BUILTIN_VTESTZPD256,
28857 IX86_BUILTIN_VTESTCPD256,
28858 IX86_BUILTIN_VTESTNZCPD256,
28859 IX86_BUILTIN_VTESTZPS256,
28860 IX86_BUILTIN_VTESTCPS256,
28861 IX86_BUILTIN_VTESTNZCPS256,
28862 IX86_BUILTIN_PTESTZ256,
28863 IX86_BUILTIN_PTESTC256,
28864 IX86_BUILTIN_PTESTNZC256,
28866 IX86_BUILTIN_MOVMSKPD256,
28867 IX86_BUILTIN_MOVMSKPS256,
28870 IX86_BUILTIN_MPSADBW256,
28871 IX86_BUILTIN_PABSB256,
28872 IX86_BUILTIN_PABSW256,
28873 IX86_BUILTIN_PABSD256,
28874 IX86_BUILTIN_PACKSSDW256,
28875 IX86_BUILTIN_PACKSSWB256,
28876 IX86_BUILTIN_PACKUSDW256,
28877 IX86_BUILTIN_PACKUSWB256,
28878 IX86_BUILTIN_PADDB256,
28879 IX86_BUILTIN_PADDW256,
28880 IX86_BUILTIN_PADDD256,
28881 IX86_BUILTIN_PADDQ256,
28882 IX86_BUILTIN_PADDSB256,
28883 IX86_BUILTIN_PADDSW256,
28884 IX86_BUILTIN_PADDUSB256,
28885 IX86_BUILTIN_PADDUSW256,
28886 IX86_BUILTIN_PALIGNR256,
28887 IX86_BUILTIN_AND256I,
28888 IX86_BUILTIN_ANDNOT256I,
28889 IX86_BUILTIN_PAVGB256,
28890 IX86_BUILTIN_PAVGW256,
28891 IX86_BUILTIN_PBLENDVB256,
28892 IX86_BUILTIN_PBLENDVW256,
28893 IX86_BUILTIN_PCMPEQB256,
28894 IX86_BUILTIN_PCMPEQW256,
28895 IX86_BUILTIN_PCMPEQD256,
28896 IX86_BUILTIN_PCMPEQQ256,
28897 IX86_BUILTIN_PCMPGTB256,
28898 IX86_BUILTIN_PCMPGTW256,
28899 IX86_BUILTIN_PCMPGTD256,
28900 IX86_BUILTIN_PCMPGTQ256,
28901 IX86_BUILTIN_PHADDW256,
28902 IX86_BUILTIN_PHADDD256,
28903 IX86_BUILTIN_PHADDSW256,
28904 IX86_BUILTIN_PHSUBW256,
28905 IX86_BUILTIN_PHSUBD256,
28906 IX86_BUILTIN_PHSUBSW256,
28907 IX86_BUILTIN_PMADDUBSW256,
28908 IX86_BUILTIN_PMADDWD256,
28909 IX86_BUILTIN_PMAXSB256,
28910 IX86_BUILTIN_PMAXSW256,
28911 IX86_BUILTIN_PMAXSD256,
28912 IX86_BUILTIN_PMAXUB256,
28913 IX86_BUILTIN_PMAXUW256,
28914 IX86_BUILTIN_PMAXUD256,
28915 IX86_BUILTIN_PMINSB256,
28916 IX86_BUILTIN_PMINSW256,
28917 IX86_BUILTIN_PMINSD256,
28918 IX86_BUILTIN_PMINUB256,
28919 IX86_BUILTIN_PMINUW256,
28920 IX86_BUILTIN_PMINUD256,
28921 IX86_BUILTIN_PMOVMSKB256,
28922 IX86_BUILTIN_PMOVSXBW256,
28923 IX86_BUILTIN_PMOVSXBD256,
28924 IX86_BUILTIN_PMOVSXBQ256,
28925 IX86_BUILTIN_PMOVSXWD256,
28926 IX86_BUILTIN_PMOVSXWQ256,
28927 IX86_BUILTIN_PMOVSXDQ256,
28928 IX86_BUILTIN_PMOVZXBW256,
28929 IX86_BUILTIN_PMOVZXBD256,
28930 IX86_BUILTIN_PMOVZXBQ256,
28931 IX86_BUILTIN_PMOVZXWD256,
28932 IX86_BUILTIN_PMOVZXWQ256,
28933 IX86_BUILTIN_PMOVZXDQ256,
28934 IX86_BUILTIN_PMULDQ256,
28935 IX86_BUILTIN_PMULHRSW256,
28936 IX86_BUILTIN_PMULHUW256,
28937 IX86_BUILTIN_PMULHW256,
28938 IX86_BUILTIN_PMULLW256,
28939 IX86_BUILTIN_PMULLD256,
28940 IX86_BUILTIN_PMULUDQ256,
28941 IX86_BUILTIN_POR256,
28942 IX86_BUILTIN_PSADBW256,
28943 IX86_BUILTIN_PSHUFB256,
28944 IX86_BUILTIN_PSHUFD256,
28945 IX86_BUILTIN_PSHUFHW256,
28946 IX86_BUILTIN_PSHUFLW256,
28947 IX86_BUILTIN_PSIGNB256,
28948 IX86_BUILTIN_PSIGNW256,
28949 IX86_BUILTIN_PSIGND256,
28950 IX86_BUILTIN_PSLLDQI256,
28951 IX86_BUILTIN_PSLLWI256,
28952 IX86_BUILTIN_PSLLW256,
28953 IX86_BUILTIN_PSLLDI256,
28954 IX86_BUILTIN_PSLLD256,
28955 IX86_BUILTIN_PSLLQI256,
28956 IX86_BUILTIN_PSLLQ256,
28957 IX86_BUILTIN_PSRAWI256,
28958 IX86_BUILTIN_PSRAW256,
28959 IX86_BUILTIN_PSRADI256,
28960 IX86_BUILTIN_PSRAD256,
28961 IX86_BUILTIN_PSRLDQI256,
28962 IX86_BUILTIN_PSRLWI256,
28963 IX86_BUILTIN_PSRLW256,
28964 IX86_BUILTIN_PSRLDI256,
28965 IX86_BUILTIN_PSRLD256,
28966 IX86_BUILTIN_PSRLQI256,
28967 IX86_BUILTIN_PSRLQ256,
28968 IX86_BUILTIN_PSUBB256,
28969 IX86_BUILTIN_PSUBW256,
28970 IX86_BUILTIN_PSUBD256,
28971 IX86_BUILTIN_PSUBQ256,
28972 IX86_BUILTIN_PSUBSB256,
28973 IX86_BUILTIN_PSUBSW256,
28974 IX86_BUILTIN_PSUBUSB256,
28975 IX86_BUILTIN_PSUBUSW256,
28976 IX86_BUILTIN_PUNPCKHBW256,
28977 IX86_BUILTIN_PUNPCKHWD256,
28978 IX86_BUILTIN_PUNPCKHDQ256,
28979 IX86_BUILTIN_PUNPCKHQDQ256,
28980 IX86_BUILTIN_PUNPCKLBW256,
28981 IX86_BUILTIN_PUNPCKLWD256,
28982 IX86_BUILTIN_PUNPCKLDQ256,
28983 IX86_BUILTIN_PUNPCKLQDQ256,
28984 IX86_BUILTIN_PXOR256,
28985 IX86_BUILTIN_MOVNTDQA256,
28986 IX86_BUILTIN_VBROADCASTSS_PS,
28987 IX86_BUILTIN_VBROADCASTSS_PS256,
28988 IX86_BUILTIN_VBROADCASTSD_PD256,
28989 IX86_BUILTIN_VBROADCASTSI256,
28990 IX86_BUILTIN_PBLENDD256,
28991 IX86_BUILTIN_PBLENDD128,
28992 IX86_BUILTIN_PBROADCASTB256,
28993 IX86_BUILTIN_PBROADCASTW256,
28994 IX86_BUILTIN_PBROADCASTD256,
28995 IX86_BUILTIN_PBROADCASTQ256,
28996 IX86_BUILTIN_PBROADCASTB128,
28997 IX86_BUILTIN_PBROADCASTW128,
28998 IX86_BUILTIN_PBROADCASTD128,
28999 IX86_BUILTIN_PBROADCASTQ128,
29000 IX86_BUILTIN_VPERMVARSI256,
29001 IX86_BUILTIN_VPERMDF256,
29002 IX86_BUILTIN_VPERMVARSF256,
29003 IX86_BUILTIN_VPERMDI256,
29004 IX86_BUILTIN_VPERMTI256,
29005 IX86_BUILTIN_VEXTRACT128I256,
29006 IX86_BUILTIN_VINSERT128I256,
29007 IX86_BUILTIN_MASKLOADD,
29008 IX86_BUILTIN_MASKLOADQ,
29009 IX86_BUILTIN_MASKLOADD256,
29010 IX86_BUILTIN_MASKLOADQ256,
29011 IX86_BUILTIN_MASKSTORED,
29012 IX86_BUILTIN_MASKSTOREQ,
29013 IX86_BUILTIN_MASKSTORED256,
29014 IX86_BUILTIN_MASKSTOREQ256,
29015 IX86_BUILTIN_PSLLVV4DI,
29016 IX86_BUILTIN_PSLLVV2DI,
29017 IX86_BUILTIN_PSLLVV8SI,
29018 IX86_BUILTIN_PSLLVV4SI,
29019 IX86_BUILTIN_PSRAVV8SI,
29020 IX86_BUILTIN_PSRAVV4SI,
29021 IX86_BUILTIN_PSRLVV4DI,
29022 IX86_BUILTIN_PSRLVV2DI,
29023 IX86_BUILTIN_PSRLVV8SI,
29024 IX86_BUILTIN_PSRLVV4SI,
29026 IX86_BUILTIN_GATHERSIV2DF,
29027 IX86_BUILTIN_GATHERSIV4DF,
29028 IX86_BUILTIN_GATHERDIV2DF,
29029 IX86_BUILTIN_GATHERDIV4DF,
29030 IX86_BUILTIN_GATHERSIV4SF,
29031 IX86_BUILTIN_GATHERSIV8SF,
29032 IX86_BUILTIN_GATHERDIV4SF,
29033 IX86_BUILTIN_GATHERDIV8SF,
29034 IX86_BUILTIN_GATHERSIV2DI,
29035 IX86_BUILTIN_GATHERSIV4DI,
29036 IX86_BUILTIN_GATHERDIV2DI,
29037 IX86_BUILTIN_GATHERDIV4DI,
29038 IX86_BUILTIN_GATHERSIV4SI,
29039 IX86_BUILTIN_GATHERSIV8SI,
29040 IX86_BUILTIN_GATHERDIV4SI,
29041 IX86_BUILTIN_GATHERDIV8SI,
29044 IX86_BUILTIN_SI512_SI256,
29045 IX86_BUILTIN_PD512_PD256,
29046 IX86_BUILTIN_PS512_PS256,
29047 IX86_BUILTIN_SI512_SI,
29048 IX86_BUILTIN_PD512_PD,
29049 IX86_BUILTIN_PS512_PS,
29050 IX86_BUILTIN_ADDPD512,
29051 IX86_BUILTIN_ADDPS512,
29052 IX86_BUILTIN_ADDSD_ROUND,
29053 IX86_BUILTIN_ADDSS_ROUND,
29054 IX86_BUILTIN_ALIGND512,
29055 IX86_BUILTIN_ALIGNQ512,
29056 IX86_BUILTIN_BLENDMD512,
29057 IX86_BUILTIN_BLENDMPD512,
29058 IX86_BUILTIN_BLENDMPS512,
29059 IX86_BUILTIN_BLENDMQ512,
29060 IX86_BUILTIN_BROADCASTF32X4_512,
29061 IX86_BUILTIN_BROADCASTF64X4_512,
29062 IX86_BUILTIN_BROADCASTI32X4_512,
29063 IX86_BUILTIN_BROADCASTI64X4_512,
29064 IX86_BUILTIN_BROADCASTSD512,
29065 IX86_BUILTIN_BROADCASTSS512,
29066 IX86_BUILTIN_CMPD512,
29067 IX86_BUILTIN_CMPPD512,
29068 IX86_BUILTIN_CMPPS512,
29069 IX86_BUILTIN_CMPQ512,
29070 IX86_BUILTIN_CMPSD_MASK,
29071 IX86_BUILTIN_CMPSS_MASK,
29072 IX86_BUILTIN_COMIDF,
29073 IX86_BUILTIN_COMISF,
29074 IX86_BUILTIN_COMPRESSPD512,
29075 IX86_BUILTIN_COMPRESSPDSTORE512,
29076 IX86_BUILTIN_COMPRESSPS512,
29077 IX86_BUILTIN_COMPRESSPSSTORE512,
29078 IX86_BUILTIN_CVTDQ2PD512,
29079 IX86_BUILTIN_CVTDQ2PS512,
29080 IX86_BUILTIN_CVTPD2DQ512,
29081 IX86_BUILTIN_CVTPD2PS512,
29082 IX86_BUILTIN_CVTPD2UDQ512,
29083 IX86_BUILTIN_CVTPH2PS512,
29084 IX86_BUILTIN_CVTPS2DQ512,
29085 IX86_BUILTIN_CVTPS2PD512,
29086 IX86_BUILTIN_CVTPS2PH512,
29087 IX86_BUILTIN_CVTPS2UDQ512,
29088 IX86_BUILTIN_CVTSD2SS_ROUND,
29089 IX86_BUILTIN_CVTSI2SD64,
29090 IX86_BUILTIN_CVTSI2SS32,
29091 IX86_BUILTIN_CVTSI2SS64,
29092 IX86_BUILTIN_CVTSS2SD_ROUND,
29093 IX86_BUILTIN_CVTTPD2DQ512,
29094 IX86_BUILTIN_CVTTPD2UDQ512,
29095 IX86_BUILTIN_CVTTPS2DQ512,
29096 IX86_BUILTIN_CVTTPS2UDQ512,
29097 IX86_BUILTIN_CVTUDQ2PD512,
29098 IX86_BUILTIN_CVTUDQ2PS512,
29099 IX86_BUILTIN_CVTUSI2SD32,
29100 IX86_BUILTIN_CVTUSI2SD64,
29101 IX86_BUILTIN_CVTUSI2SS32,
29102 IX86_BUILTIN_CVTUSI2SS64,
29103 IX86_BUILTIN_DIVPD512,
29104 IX86_BUILTIN_DIVPS512,
29105 IX86_BUILTIN_DIVSD_ROUND,
29106 IX86_BUILTIN_DIVSS_ROUND,
29107 IX86_BUILTIN_EXPANDPD512,
29108 IX86_BUILTIN_EXPANDPD512Z,
29109 IX86_BUILTIN_EXPANDPDLOAD512,
29110 IX86_BUILTIN_EXPANDPDLOAD512Z,
29111 IX86_BUILTIN_EXPANDPS512,
29112 IX86_BUILTIN_EXPANDPS512Z,
29113 IX86_BUILTIN_EXPANDPSLOAD512,
29114 IX86_BUILTIN_EXPANDPSLOAD512Z,
29115 IX86_BUILTIN_EXTRACTF32X4,
29116 IX86_BUILTIN_EXTRACTF64X4,
29117 IX86_BUILTIN_EXTRACTI32X4,
29118 IX86_BUILTIN_EXTRACTI64X4,
29119 IX86_BUILTIN_FIXUPIMMPD512_MASK,
29120 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29121 IX86_BUILTIN_FIXUPIMMPS512_MASK,
29122 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29123 IX86_BUILTIN_FIXUPIMMSD128_MASK,
29124 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29125 IX86_BUILTIN_FIXUPIMMSS128_MASK,
29126 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29127 IX86_BUILTIN_GETEXPPD512,
29128 IX86_BUILTIN_GETEXPPS512,
29129 IX86_BUILTIN_GETEXPSD128,
29130 IX86_BUILTIN_GETEXPSS128,
29131 IX86_BUILTIN_GETMANTPD512,
29132 IX86_BUILTIN_GETMANTPS512,
29133 IX86_BUILTIN_GETMANTSD128,
29134 IX86_BUILTIN_GETMANTSS128,
29135 IX86_BUILTIN_INSERTF32X4,
29136 IX86_BUILTIN_INSERTF64X4,
29137 IX86_BUILTIN_INSERTI32X4,
29138 IX86_BUILTIN_INSERTI64X4,
29139 IX86_BUILTIN_LOADAPD512,
29140 IX86_BUILTIN_LOADAPS512,
29141 IX86_BUILTIN_LOADDQUDI512,
29142 IX86_BUILTIN_LOADDQUSI512,
29143 IX86_BUILTIN_LOADUPD512,
29144 IX86_BUILTIN_LOADUPS512,
29145 IX86_BUILTIN_MAXPD512,
29146 IX86_BUILTIN_MAXPS512,
29147 IX86_BUILTIN_MAXSD_ROUND,
29148 IX86_BUILTIN_MAXSS_ROUND,
29149 IX86_BUILTIN_MINPD512,
29150 IX86_BUILTIN_MINPS512,
29151 IX86_BUILTIN_MINSD_ROUND,
29152 IX86_BUILTIN_MINSS_ROUND,
29153 IX86_BUILTIN_MOVAPD512,
29154 IX86_BUILTIN_MOVAPS512,
29155 IX86_BUILTIN_MOVDDUP512,
29156 IX86_BUILTIN_MOVDQA32LOAD512,
29157 IX86_BUILTIN_MOVDQA32STORE512,
29158 IX86_BUILTIN_MOVDQA32_512,
29159 IX86_BUILTIN_MOVDQA64LOAD512,
29160 IX86_BUILTIN_MOVDQA64STORE512,
29161 IX86_BUILTIN_MOVDQA64_512,
29162 IX86_BUILTIN_MOVNTDQ512,
29163 IX86_BUILTIN_MOVNTDQA512,
29164 IX86_BUILTIN_MOVNTPD512,
29165 IX86_BUILTIN_MOVNTPS512,
29166 IX86_BUILTIN_MOVSHDUP512,
29167 IX86_BUILTIN_MOVSLDUP512,
29168 IX86_BUILTIN_MULPD512,
29169 IX86_BUILTIN_MULPS512,
29170 IX86_BUILTIN_MULSD_ROUND,
29171 IX86_BUILTIN_MULSS_ROUND,
29172 IX86_BUILTIN_PABSD512,
29173 IX86_BUILTIN_PABSQ512,
29174 IX86_BUILTIN_PADDD512,
29175 IX86_BUILTIN_PADDQ512,
29176 IX86_BUILTIN_PANDD512,
29177 IX86_BUILTIN_PANDND512,
29178 IX86_BUILTIN_PANDNQ512,
29179 IX86_BUILTIN_PANDQ512,
29180 IX86_BUILTIN_PBROADCASTD512,
29181 IX86_BUILTIN_PBROADCASTD512_GPR,
29182 IX86_BUILTIN_PBROADCASTMB512,
29183 IX86_BUILTIN_PBROADCASTMW512,
29184 IX86_BUILTIN_PBROADCASTQ512,
29185 IX86_BUILTIN_PBROADCASTQ512_GPR,
29186 IX86_BUILTIN_PCMPEQD512_MASK,
29187 IX86_BUILTIN_PCMPEQQ512_MASK,
29188 IX86_BUILTIN_PCMPGTD512_MASK,
29189 IX86_BUILTIN_PCMPGTQ512_MASK,
29190 IX86_BUILTIN_PCOMPRESSD512,
29191 IX86_BUILTIN_PCOMPRESSDSTORE512,
29192 IX86_BUILTIN_PCOMPRESSQ512,
29193 IX86_BUILTIN_PCOMPRESSQSTORE512,
29194 IX86_BUILTIN_PEXPANDD512,
29195 IX86_BUILTIN_PEXPANDD512Z,
29196 IX86_BUILTIN_PEXPANDDLOAD512,
29197 IX86_BUILTIN_PEXPANDDLOAD512Z,
29198 IX86_BUILTIN_PEXPANDQ512,
29199 IX86_BUILTIN_PEXPANDQ512Z,
29200 IX86_BUILTIN_PEXPANDQLOAD512,
29201 IX86_BUILTIN_PEXPANDQLOAD512Z,
29202 IX86_BUILTIN_PMAXSD512,
29203 IX86_BUILTIN_PMAXSQ512,
29204 IX86_BUILTIN_PMAXUD512,
29205 IX86_BUILTIN_PMAXUQ512,
29206 IX86_BUILTIN_PMINSD512,
29207 IX86_BUILTIN_PMINSQ512,
29208 IX86_BUILTIN_PMINUD512,
29209 IX86_BUILTIN_PMINUQ512,
29210 IX86_BUILTIN_PMOVDB512,
29211 IX86_BUILTIN_PMOVDB512_MEM,
29212 IX86_BUILTIN_PMOVDW512,
29213 IX86_BUILTIN_PMOVDW512_MEM,
29214 IX86_BUILTIN_PMOVQB512,
29215 IX86_BUILTIN_PMOVQB512_MEM,
29216 IX86_BUILTIN_PMOVQD512,
29217 IX86_BUILTIN_PMOVQD512_MEM,
29218 IX86_BUILTIN_PMOVQW512,
29219 IX86_BUILTIN_PMOVQW512_MEM,
29220 IX86_BUILTIN_PMOVSDB512,
29221 IX86_BUILTIN_PMOVSDB512_MEM,
29222 IX86_BUILTIN_PMOVSDW512,
29223 IX86_BUILTIN_PMOVSDW512_MEM,
29224 IX86_BUILTIN_PMOVSQB512,
29225 IX86_BUILTIN_PMOVSQB512_MEM,
29226 IX86_BUILTIN_PMOVSQD512,
29227 IX86_BUILTIN_PMOVSQD512_MEM,
29228 IX86_BUILTIN_PMOVSQW512,
29229 IX86_BUILTIN_PMOVSQW512_MEM,
29230 IX86_BUILTIN_PMOVSXBD512,
29231 IX86_BUILTIN_PMOVSXBQ512,
29232 IX86_BUILTIN_PMOVSXDQ512,
29233 IX86_BUILTIN_PMOVSXWD512,
29234 IX86_BUILTIN_PMOVSXWQ512,
29235 IX86_BUILTIN_PMOVUSDB512,
29236 IX86_BUILTIN_PMOVUSDB512_MEM,
29237 IX86_BUILTIN_PMOVUSDW512,
29238 IX86_BUILTIN_PMOVUSDW512_MEM,
29239 IX86_BUILTIN_PMOVUSQB512,
29240 IX86_BUILTIN_PMOVUSQB512_MEM,
29241 IX86_BUILTIN_PMOVUSQD512,
29242 IX86_BUILTIN_PMOVUSQD512_MEM,
29243 IX86_BUILTIN_PMOVUSQW512,
29244 IX86_BUILTIN_PMOVUSQW512_MEM,
29245 IX86_BUILTIN_PMOVZXBD512,
29246 IX86_BUILTIN_PMOVZXBQ512,
29247 IX86_BUILTIN_PMOVZXDQ512,
29248 IX86_BUILTIN_PMOVZXWD512,
29249 IX86_BUILTIN_PMOVZXWQ512,
29250 IX86_BUILTIN_PMULDQ512,
29251 IX86_BUILTIN_PMULLD512,
29252 IX86_BUILTIN_PMULUDQ512,
29253 IX86_BUILTIN_PORD512,
29254 IX86_BUILTIN_PORQ512,
29255 IX86_BUILTIN_PROLD512,
29256 IX86_BUILTIN_PROLQ512,
29257 IX86_BUILTIN_PROLVD512,
29258 IX86_BUILTIN_PROLVQ512,
29259 IX86_BUILTIN_PRORD512,
29260 IX86_BUILTIN_PRORQ512,
29261 IX86_BUILTIN_PRORVD512,
29262 IX86_BUILTIN_PRORVQ512,
29263 IX86_BUILTIN_PSHUFD512,
29264 IX86_BUILTIN_PSLLD512,
29265 IX86_BUILTIN_PSLLDI512,
29266 IX86_BUILTIN_PSLLQ512,
29267 IX86_BUILTIN_PSLLQI512,
29268 IX86_BUILTIN_PSLLVV16SI,
29269 IX86_BUILTIN_PSLLVV8DI,
29270 IX86_BUILTIN_PSRAD512,
29271 IX86_BUILTIN_PSRADI512,
29272 IX86_BUILTIN_PSRAQ512,
29273 IX86_BUILTIN_PSRAQI512,
29274 IX86_BUILTIN_PSRAVV16SI,
29275 IX86_BUILTIN_PSRAVV8DI,
29276 IX86_BUILTIN_PSRLD512,
29277 IX86_BUILTIN_PSRLDI512,
29278 IX86_BUILTIN_PSRLQ512,
29279 IX86_BUILTIN_PSRLQI512,
29280 IX86_BUILTIN_PSRLVV16SI,
29281 IX86_BUILTIN_PSRLVV8DI,
29282 IX86_BUILTIN_PSUBD512,
29283 IX86_BUILTIN_PSUBQ512,
29284 IX86_BUILTIN_PTESTMD512,
29285 IX86_BUILTIN_PTESTMQ512,
29286 IX86_BUILTIN_PTESTNMD512,
29287 IX86_BUILTIN_PTESTNMQ512,
29288 IX86_BUILTIN_PUNPCKHDQ512,
29289 IX86_BUILTIN_PUNPCKHQDQ512,
29290 IX86_BUILTIN_PUNPCKLDQ512,
29291 IX86_BUILTIN_PUNPCKLQDQ512,
29292 IX86_BUILTIN_PXORD512,
29293 IX86_BUILTIN_PXORQ512,
29294 IX86_BUILTIN_RCP14PD512,
29295 IX86_BUILTIN_RCP14PS512,
29296 IX86_BUILTIN_RCP14SD,
29297 IX86_BUILTIN_RCP14SS,
29298 IX86_BUILTIN_RNDSCALEPD,
29299 IX86_BUILTIN_RNDSCALEPS,
29300 IX86_BUILTIN_RNDSCALESD,
29301 IX86_BUILTIN_RNDSCALESS,
29302 IX86_BUILTIN_RSQRT14PD512,
29303 IX86_BUILTIN_RSQRT14PS512,
29304 IX86_BUILTIN_RSQRT14SD,
29305 IX86_BUILTIN_RSQRT14SS,
29306 IX86_BUILTIN_SCALEFPD512,
29307 IX86_BUILTIN_SCALEFPS512,
29308 IX86_BUILTIN_SCALEFSD,
29309 IX86_BUILTIN_SCALEFSS,
29310 IX86_BUILTIN_SHUFPD512,
29311 IX86_BUILTIN_SHUFPS512,
29312 IX86_BUILTIN_SHUF_F32x4,
29313 IX86_BUILTIN_SHUF_F64x2,
29314 IX86_BUILTIN_SHUF_I32x4,
29315 IX86_BUILTIN_SHUF_I64x2,
29316 IX86_BUILTIN_SQRTPD512,
29317 IX86_BUILTIN_SQRTPD512_MASK,
29318 IX86_BUILTIN_SQRTPS512_MASK,
29319 IX86_BUILTIN_SQRTPS_NR512,
29320 IX86_BUILTIN_SQRTSD_ROUND,
29321 IX86_BUILTIN_SQRTSS_ROUND,
29322 IX86_BUILTIN_STOREAPD512,
29323 IX86_BUILTIN_STOREAPS512,
29324 IX86_BUILTIN_STOREDQUDI512,
29325 IX86_BUILTIN_STOREDQUSI512,
29326 IX86_BUILTIN_STOREUPD512,
29327 IX86_BUILTIN_STOREUPS512,
29328 IX86_BUILTIN_SUBPD512,
29329 IX86_BUILTIN_SUBPS512,
29330 IX86_BUILTIN_SUBSD_ROUND,
29331 IX86_BUILTIN_SUBSS_ROUND,
29332 IX86_BUILTIN_UCMPD512,
29333 IX86_BUILTIN_UCMPQ512,
29334 IX86_BUILTIN_UNPCKHPD512,
29335 IX86_BUILTIN_UNPCKHPS512,
29336 IX86_BUILTIN_UNPCKLPD512,
29337 IX86_BUILTIN_UNPCKLPS512,
29338 IX86_BUILTIN_VCVTSD2SI32,
29339 IX86_BUILTIN_VCVTSD2SI64,
29340 IX86_BUILTIN_VCVTSD2USI32,
29341 IX86_BUILTIN_VCVTSD2USI64,
29342 IX86_BUILTIN_VCVTSS2SI32,
29343 IX86_BUILTIN_VCVTSS2SI64,
29344 IX86_BUILTIN_VCVTSS2USI32,
29345 IX86_BUILTIN_VCVTSS2USI64,
29346 IX86_BUILTIN_VCVTTSD2SI32,
29347 IX86_BUILTIN_VCVTTSD2SI64,
29348 IX86_BUILTIN_VCVTTSD2USI32,
29349 IX86_BUILTIN_VCVTTSD2USI64,
29350 IX86_BUILTIN_VCVTTSS2SI32,
29351 IX86_BUILTIN_VCVTTSS2SI64,
29352 IX86_BUILTIN_VCVTTSS2USI32,
29353 IX86_BUILTIN_VCVTTSS2USI64,
29354 IX86_BUILTIN_VFMADDPD512_MASK,
29355 IX86_BUILTIN_VFMADDPD512_MASK3,
29356 IX86_BUILTIN_VFMADDPD512_MASKZ,
29357 IX86_BUILTIN_VFMADDPS512_MASK,
29358 IX86_BUILTIN_VFMADDPS512_MASK3,
29359 IX86_BUILTIN_VFMADDPS512_MASKZ,
29360 IX86_BUILTIN_VFMADDSD3_ROUND,
29361 IX86_BUILTIN_VFMADDSS3_ROUND,
29362 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29363 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29364 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29365 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29366 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29367 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29368 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29369 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29370 IX86_BUILTIN_VFMSUBPD512_MASK3,
29371 IX86_BUILTIN_VFMSUBPS512_MASK3,
29372 IX86_BUILTIN_VFMSUBSD3_MASK3,
29373 IX86_BUILTIN_VFMSUBSS3_MASK3,
29374 IX86_BUILTIN_VFNMADDPD512_MASK,
29375 IX86_BUILTIN_VFNMADDPS512_MASK,
29376 IX86_BUILTIN_VFNMSUBPD512_MASK,
29377 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29378 IX86_BUILTIN_VFNMSUBPS512_MASK,
29379 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29380 IX86_BUILTIN_VPCLZCNTD512,
29381 IX86_BUILTIN_VPCLZCNTQ512,
29382 IX86_BUILTIN_VPCONFLICTD512,
29383 IX86_BUILTIN_VPCONFLICTQ512,
29384 IX86_BUILTIN_VPERMDF512,
29385 IX86_BUILTIN_VPERMDI512,
29386 IX86_BUILTIN_VPERMI2VARD512,
29387 IX86_BUILTIN_VPERMI2VARPD512,
29388 IX86_BUILTIN_VPERMI2VARPS512,
29389 IX86_BUILTIN_VPERMI2VARQ512,
29390 IX86_BUILTIN_VPERMILPD512,
29391 IX86_BUILTIN_VPERMILPS512,
29392 IX86_BUILTIN_VPERMILVARPD512,
29393 IX86_BUILTIN_VPERMILVARPS512,
29394 IX86_BUILTIN_VPERMT2VARD512,
29395 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29396 IX86_BUILTIN_VPERMT2VARPD512,
29397 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29398 IX86_BUILTIN_VPERMT2VARPS512,
29399 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29400 IX86_BUILTIN_VPERMT2VARQ512,
29401 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29402 IX86_BUILTIN_VPERMVARDF512,
29403 IX86_BUILTIN_VPERMVARDI512,
29404 IX86_BUILTIN_VPERMVARSF512,
29405 IX86_BUILTIN_VPERMVARSI512,
29406 IX86_BUILTIN_VTERNLOGD512_MASK,
29407 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29408 IX86_BUILTIN_VTERNLOGQ512_MASK,
29409 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29411 /* Mask arithmetic operations */
29412 IX86_BUILTIN_KAND16,
29413 IX86_BUILTIN_KANDN16,
29414 IX86_BUILTIN_KNOT16,
29415 IX86_BUILTIN_KOR16,
29416 IX86_BUILTIN_KORTESTC16,
29417 IX86_BUILTIN_KORTESTZ16,
29418 IX86_BUILTIN_KUNPCKBW,
29419 IX86_BUILTIN_KXNOR16,
29420 IX86_BUILTIN_KXOR16,
29421 IX86_BUILTIN_KMOV16,
29424 IX86_BUILTIN_PMOVUSQD256_MEM,
29425 IX86_BUILTIN_PMOVUSQD128_MEM,
29426 IX86_BUILTIN_PMOVSQD256_MEM,
29427 IX86_BUILTIN_PMOVSQD128_MEM,
29428 IX86_BUILTIN_PMOVQD256_MEM,
29429 IX86_BUILTIN_PMOVQD128_MEM,
29430 IX86_BUILTIN_PMOVUSQW256_MEM,
29431 IX86_BUILTIN_PMOVUSQW128_MEM,
29432 IX86_BUILTIN_PMOVSQW256_MEM,
29433 IX86_BUILTIN_PMOVSQW128_MEM,
29434 IX86_BUILTIN_PMOVQW256_MEM,
29435 IX86_BUILTIN_PMOVQW128_MEM,
29436 IX86_BUILTIN_PMOVUSQB256_MEM,
29437 IX86_BUILTIN_PMOVUSQB128_MEM,
29438 IX86_BUILTIN_PMOVSQB256_MEM,
29439 IX86_BUILTIN_PMOVSQB128_MEM,
29440 IX86_BUILTIN_PMOVQB256_MEM,
29441 IX86_BUILTIN_PMOVQB128_MEM,
29442 IX86_BUILTIN_PMOVUSDW256_MEM,
29443 IX86_BUILTIN_PMOVUSDW128_MEM,
29444 IX86_BUILTIN_PMOVSDW256_MEM,
29445 IX86_BUILTIN_PMOVSDW128_MEM,
29446 IX86_BUILTIN_PMOVDW256_MEM,
29447 IX86_BUILTIN_PMOVDW128_MEM,
29448 IX86_BUILTIN_PMOVUSDB256_MEM,
29449 IX86_BUILTIN_PMOVUSDB128_MEM,
29450 IX86_BUILTIN_PMOVSDB256_MEM,
29451 IX86_BUILTIN_PMOVSDB128_MEM,
29452 IX86_BUILTIN_PMOVDB256_MEM,
29453 IX86_BUILTIN_PMOVDB128_MEM,
29454 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29455 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29456 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29457 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29458 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29459 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29460 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29461 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29462 IX86_BUILTIN_LOADAPD256_MASK,
29463 IX86_BUILTIN_LOADAPD128_MASK,
29464 IX86_BUILTIN_LOADAPS256_MASK,
29465 IX86_BUILTIN_LOADAPS128_MASK,
29466 IX86_BUILTIN_STOREAPD256_MASK,
29467 IX86_BUILTIN_STOREAPD128_MASK,
29468 IX86_BUILTIN_STOREAPS256_MASK,
29469 IX86_BUILTIN_STOREAPS128_MASK,
29470 IX86_BUILTIN_LOADUPD256_MASK,
29471 IX86_BUILTIN_LOADUPD128_MASK,
29472 IX86_BUILTIN_LOADUPS256_MASK,
29473 IX86_BUILTIN_LOADUPS128_MASK,
29474 IX86_BUILTIN_STOREUPD256_MASK,
29475 IX86_BUILTIN_STOREUPD128_MASK,
29476 IX86_BUILTIN_STOREUPS256_MASK,
29477 IX86_BUILTIN_STOREUPS128_MASK,
29478 IX86_BUILTIN_LOADDQUDI256_MASK,
29479 IX86_BUILTIN_LOADDQUDI128_MASK,
29480 IX86_BUILTIN_LOADDQUSI256_MASK,
29481 IX86_BUILTIN_LOADDQUSI128_MASK,
29482 IX86_BUILTIN_LOADDQUHI256_MASK,
29483 IX86_BUILTIN_LOADDQUHI128_MASK,
29484 IX86_BUILTIN_LOADDQUQI256_MASK,
29485 IX86_BUILTIN_LOADDQUQI128_MASK,
29486 IX86_BUILTIN_STOREDQUDI256_MASK,
29487 IX86_BUILTIN_STOREDQUDI128_MASK,
29488 IX86_BUILTIN_STOREDQUSI256_MASK,
29489 IX86_BUILTIN_STOREDQUSI128_MASK,
29490 IX86_BUILTIN_STOREDQUHI256_MASK,
29491 IX86_BUILTIN_STOREDQUHI128_MASK,
29492 IX86_BUILTIN_STOREDQUQI256_MASK,
29493 IX86_BUILTIN_STOREDQUQI128_MASK,
29494 IX86_BUILTIN_COMPRESSPDSTORE256,
29495 IX86_BUILTIN_COMPRESSPDSTORE128,
29496 IX86_BUILTIN_COMPRESSPSSTORE256,
29497 IX86_BUILTIN_COMPRESSPSSTORE128,
29498 IX86_BUILTIN_PCOMPRESSQSTORE256,
29499 IX86_BUILTIN_PCOMPRESSQSTORE128,
29500 IX86_BUILTIN_PCOMPRESSDSTORE256,
29501 IX86_BUILTIN_PCOMPRESSDSTORE128,
29502 IX86_BUILTIN_EXPANDPDLOAD256,
29503 IX86_BUILTIN_EXPANDPDLOAD128,
29504 IX86_BUILTIN_EXPANDPSLOAD256,
29505 IX86_BUILTIN_EXPANDPSLOAD128,
29506 IX86_BUILTIN_PEXPANDQLOAD256,
29507 IX86_BUILTIN_PEXPANDQLOAD128,
29508 IX86_BUILTIN_PEXPANDDLOAD256,
29509 IX86_BUILTIN_PEXPANDDLOAD128,
29510 IX86_BUILTIN_EXPANDPDLOAD256Z,
29511 IX86_BUILTIN_EXPANDPDLOAD128Z,
29512 IX86_BUILTIN_EXPANDPSLOAD256Z,
29513 IX86_BUILTIN_EXPANDPSLOAD128Z,
29514 IX86_BUILTIN_PEXPANDQLOAD256Z,
29515 IX86_BUILTIN_PEXPANDQLOAD128Z,
29516 IX86_BUILTIN_PEXPANDDLOAD256Z,
29517 IX86_BUILTIN_PEXPANDDLOAD128Z,
29518 IX86_BUILTIN_PALIGNR256_MASK,
29519 IX86_BUILTIN_PALIGNR128_MASK,
29520 IX86_BUILTIN_MOVDQA64_256_MASK,
29521 IX86_BUILTIN_MOVDQA64_128_MASK,
29522 IX86_BUILTIN_MOVDQA32_256_MASK,
29523 IX86_BUILTIN_MOVDQA32_128_MASK,
29524 IX86_BUILTIN_MOVAPD256_MASK,
29525 IX86_BUILTIN_MOVAPD128_MASK,
29526 IX86_BUILTIN_MOVAPS256_MASK,
29527 IX86_BUILTIN_MOVAPS128_MASK,
29528 IX86_BUILTIN_MOVDQUHI256_MASK,
29529 IX86_BUILTIN_MOVDQUHI128_MASK,
29530 IX86_BUILTIN_MOVDQUQI256_MASK,
29531 IX86_BUILTIN_MOVDQUQI128_MASK,
29532 IX86_BUILTIN_MINPS128_MASK,
29533 IX86_BUILTIN_MAXPS128_MASK,
29534 IX86_BUILTIN_MINPD128_MASK,
29535 IX86_BUILTIN_MAXPD128_MASK,
29536 IX86_BUILTIN_MAXPD256_MASK,
29537 IX86_BUILTIN_MAXPS256_MASK,
29538 IX86_BUILTIN_MINPD256_MASK,
29539 IX86_BUILTIN_MINPS256_MASK,
29540 IX86_BUILTIN_MULPS128_MASK,
29541 IX86_BUILTIN_DIVPS128_MASK,
29542 IX86_BUILTIN_MULPD128_MASK,
29543 IX86_BUILTIN_DIVPD128_MASK,
29544 IX86_BUILTIN_DIVPD256_MASK,
29545 IX86_BUILTIN_DIVPS256_MASK,
29546 IX86_BUILTIN_MULPD256_MASK,
29547 IX86_BUILTIN_MULPS256_MASK,
29548 IX86_BUILTIN_ADDPD128_MASK,
29549 IX86_BUILTIN_ADDPD256_MASK,
29550 IX86_BUILTIN_ADDPS128_MASK,
29551 IX86_BUILTIN_ADDPS256_MASK,
29552 IX86_BUILTIN_SUBPD128_MASK,
29553 IX86_BUILTIN_SUBPD256_MASK,
29554 IX86_BUILTIN_SUBPS128_MASK,
29555 IX86_BUILTIN_SUBPS256_MASK,
29556 IX86_BUILTIN_XORPD256_MASK,
29557 IX86_BUILTIN_XORPD128_MASK,
29558 IX86_BUILTIN_XORPS256_MASK,
29559 IX86_BUILTIN_XORPS128_MASK,
29560 IX86_BUILTIN_ORPD256_MASK,
29561 IX86_BUILTIN_ORPD128_MASK,
29562 IX86_BUILTIN_ORPS256_MASK,
29563 IX86_BUILTIN_ORPS128_MASK,
29564 IX86_BUILTIN_BROADCASTF32x2_256,
29565 IX86_BUILTIN_BROADCASTI32x2_256,
29566 IX86_BUILTIN_BROADCASTI32x2_128,
29567 IX86_BUILTIN_BROADCASTF64X2_256,
29568 IX86_BUILTIN_BROADCASTI64X2_256,
29569 IX86_BUILTIN_BROADCASTF32X4_256,
29570 IX86_BUILTIN_BROADCASTI32X4_256,
29571 IX86_BUILTIN_EXTRACTF32X4_256,
29572 IX86_BUILTIN_EXTRACTI32X4_256,
29573 IX86_BUILTIN_DBPSADBW256,
29574 IX86_BUILTIN_DBPSADBW128,
29575 IX86_BUILTIN_CVTTPD2QQ256,
29576 IX86_BUILTIN_CVTTPD2QQ128,
29577 IX86_BUILTIN_CVTTPD2UQQ256,
29578 IX86_BUILTIN_CVTTPD2UQQ128,
29579 IX86_BUILTIN_CVTPD2QQ256,
29580 IX86_BUILTIN_CVTPD2QQ128,
29581 IX86_BUILTIN_CVTPD2UQQ256,
29582 IX86_BUILTIN_CVTPD2UQQ128,
29583 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29584 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29585 IX86_BUILTIN_CVTTPS2QQ256,
29586 IX86_BUILTIN_CVTTPS2QQ128,
29587 IX86_BUILTIN_CVTTPS2UQQ256,
29588 IX86_BUILTIN_CVTTPS2UQQ128,
29589 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29590 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29591 IX86_BUILTIN_CVTTPS2UDQ256,
29592 IX86_BUILTIN_CVTTPS2UDQ128,
29593 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29594 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29595 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29596 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29597 IX86_BUILTIN_CVTPD2DQ256_MASK,
29598 IX86_BUILTIN_CVTPD2DQ128_MASK,
29599 IX86_BUILTIN_CVTDQ2PD256_MASK,
29600 IX86_BUILTIN_CVTDQ2PD128_MASK,
29601 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29602 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29603 IX86_BUILTIN_CVTDQ2PS256_MASK,
29604 IX86_BUILTIN_CVTDQ2PS128_MASK,
29605 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29606 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29607 IX86_BUILTIN_CVTPS2PD256_MASK,
29608 IX86_BUILTIN_CVTPS2PD128_MASK,
29609 IX86_BUILTIN_PBROADCASTB256_MASK,
29610 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29611 IX86_BUILTIN_PBROADCASTB128_MASK,
29612 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29613 IX86_BUILTIN_PBROADCASTW256_MASK,
29614 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29615 IX86_BUILTIN_PBROADCASTW128_MASK,
29616 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29617 IX86_BUILTIN_PBROADCASTD256_MASK,
29618 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29619 IX86_BUILTIN_PBROADCASTD128_MASK,
29620 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29621 IX86_BUILTIN_PBROADCASTQ256_MASK,
29622 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29623 IX86_BUILTIN_PBROADCASTQ128_MASK,
29624 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29625 IX86_BUILTIN_BROADCASTSS256,
29626 IX86_BUILTIN_BROADCASTSS128,
29627 IX86_BUILTIN_BROADCASTSD256,
29628 IX86_BUILTIN_EXTRACTF64X2_256,
29629 IX86_BUILTIN_EXTRACTI64X2_256,
29630 IX86_BUILTIN_INSERTF32X4_256,
29631 IX86_BUILTIN_INSERTI32X4_256,
29632 IX86_BUILTIN_PMOVSXBW256_MASK,
29633 IX86_BUILTIN_PMOVSXBW128_MASK,
29634 IX86_BUILTIN_PMOVSXBD256_MASK,
29635 IX86_BUILTIN_PMOVSXBD128_MASK,
29636 IX86_BUILTIN_PMOVSXBQ256_MASK,
29637 IX86_BUILTIN_PMOVSXBQ128_MASK,
29638 IX86_BUILTIN_PMOVSXWD256_MASK,
29639 IX86_BUILTIN_PMOVSXWD128_MASK,
29640 IX86_BUILTIN_PMOVSXWQ256_MASK,
29641 IX86_BUILTIN_PMOVSXWQ128_MASK,
29642 IX86_BUILTIN_PMOVSXDQ256_MASK,
29643 IX86_BUILTIN_PMOVSXDQ128_MASK,
29644 IX86_BUILTIN_PMOVZXBW256_MASK,
29645 IX86_BUILTIN_PMOVZXBW128_MASK,
29646 IX86_BUILTIN_PMOVZXBD256_MASK,
29647 IX86_BUILTIN_PMOVZXBD128_MASK,
29648 IX86_BUILTIN_PMOVZXBQ256_MASK,
29649 IX86_BUILTIN_PMOVZXBQ128_MASK,
29650 IX86_BUILTIN_PMOVZXWD256_MASK,
29651 IX86_BUILTIN_PMOVZXWD128_MASK,
29652 IX86_BUILTIN_PMOVZXWQ256_MASK,
29653 IX86_BUILTIN_PMOVZXWQ128_MASK,
29654 IX86_BUILTIN_PMOVZXDQ256_MASK,
29655 IX86_BUILTIN_PMOVZXDQ128_MASK,
29656 IX86_BUILTIN_REDUCEPD256_MASK,
29657 IX86_BUILTIN_REDUCEPD128_MASK,
29658 IX86_BUILTIN_REDUCEPS256_MASK,
29659 IX86_BUILTIN_REDUCEPS128_MASK,
29660 IX86_BUILTIN_REDUCESD_MASK,
29661 IX86_BUILTIN_REDUCESS_MASK,
29662 IX86_BUILTIN_VPERMVARHI256_MASK,
29663 IX86_BUILTIN_VPERMVARHI128_MASK,
29664 IX86_BUILTIN_VPERMT2VARHI256,
29665 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29666 IX86_BUILTIN_VPERMT2VARHI128,
29667 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29668 IX86_BUILTIN_VPERMI2VARHI256,
29669 IX86_BUILTIN_VPERMI2VARHI128,
29670 IX86_BUILTIN_RCP14PD256,
29671 IX86_BUILTIN_RCP14PD128,
29672 IX86_BUILTIN_RCP14PS256,
29673 IX86_BUILTIN_RCP14PS128,
29674 IX86_BUILTIN_RSQRT14PD256_MASK,
29675 IX86_BUILTIN_RSQRT14PD128_MASK,
29676 IX86_BUILTIN_RSQRT14PS256_MASK,
29677 IX86_BUILTIN_RSQRT14PS128_MASK,
29678 IX86_BUILTIN_SQRTPD256_MASK,
29679 IX86_BUILTIN_SQRTPD128_MASK,
29680 IX86_BUILTIN_SQRTPS256_MASK,
29681 IX86_BUILTIN_SQRTPS128_MASK,
29682 IX86_BUILTIN_PADDB128_MASK,
29683 IX86_BUILTIN_PADDW128_MASK,
29684 IX86_BUILTIN_PADDD128_MASK,
29685 IX86_BUILTIN_PADDQ128_MASK,
29686 IX86_BUILTIN_PSUBB128_MASK,
29687 IX86_BUILTIN_PSUBW128_MASK,
29688 IX86_BUILTIN_PSUBD128_MASK,
29689 IX86_BUILTIN_PSUBQ128_MASK,
29690 IX86_BUILTIN_PADDSB128_MASK,
29691 IX86_BUILTIN_PADDSW128_MASK,
29692 IX86_BUILTIN_PSUBSB128_MASK,
29693 IX86_BUILTIN_PSUBSW128_MASK,
29694 IX86_BUILTIN_PADDUSB128_MASK,
29695 IX86_BUILTIN_PADDUSW128_MASK,
29696 IX86_BUILTIN_PSUBUSB128_MASK,
29697 IX86_BUILTIN_PSUBUSW128_MASK,
29698 IX86_BUILTIN_PADDB256_MASK,
29699 IX86_BUILTIN_PADDW256_MASK,
29700 IX86_BUILTIN_PADDD256_MASK,
29701 IX86_BUILTIN_PADDQ256_MASK,
29702 IX86_BUILTIN_PADDSB256_MASK,
29703 IX86_BUILTIN_PADDSW256_MASK,
29704 IX86_BUILTIN_PADDUSB256_MASK,
29705 IX86_BUILTIN_PADDUSW256_MASK,
29706 IX86_BUILTIN_PSUBB256_MASK,
29707 IX86_BUILTIN_PSUBW256_MASK,
29708 IX86_BUILTIN_PSUBD256_MASK,
29709 IX86_BUILTIN_PSUBQ256_MASK,
29710 IX86_BUILTIN_PSUBSB256_MASK,
29711 IX86_BUILTIN_PSUBSW256_MASK,
29712 IX86_BUILTIN_PSUBUSB256_MASK,
29713 IX86_BUILTIN_PSUBUSW256_MASK,
29714 IX86_BUILTIN_SHUF_F64x2_256,
29715 IX86_BUILTIN_SHUF_I64x2_256,
29716 IX86_BUILTIN_SHUF_I32x4_256,
29717 IX86_BUILTIN_SHUF_F32x4_256,
29718 IX86_BUILTIN_PMOVWB128,
29719 IX86_BUILTIN_PMOVWB256,
29720 IX86_BUILTIN_PMOVSWB128,
29721 IX86_BUILTIN_PMOVSWB256,
29722 IX86_BUILTIN_PMOVUSWB128,
29723 IX86_BUILTIN_PMOVUSWB256,
29724 IX86_BUILTIN_PMOVDB128,
29725 IX86_BUILTIN_PMOVDB256,
29726 IX86_BUILTIN_PMOVSDB128,
29727 IX86_BUILTIN_PMOVSDB256,
29728 IX86_BUILTIN_PMOVUSDB128,
29729 IX86_BUILTIN_PMOVUSDB256,
29730 IX86_BUILTIN_PMOVDW128,
29731 IX86_BUILTIN_PMOVDW256,
29732 IX86_BUILTIN_PMOVSDW128,
29733 IX86_BUILTIN_PMOVSDW256,
29734 IX86_BUILTIN_PMOVUSDW128,
29735 IX86_BUILTIN_PMOVUSDW256,
29736 IX86_BUILTIN_PMOVQB128,
29737 IX86_BUILTIN_PMOVQB256,
29738 IX86_BUILTIN_PMOVSQB128,
29739 IX86_BUILTIN_PMOVSQB256,
29740 IX86_BUILTIN_PMOVUSQB128,
29741 IX86_BUILTIN_PMOVUSQB256,
29742 IX86_BUILTIN_PMOVQW128,
29743 IX86_BUILTIN_PMOVQW256,
29744 IX86_BUILTIN_PMOVSQW128,
29745 IX86_BUILTIN_PMOVSQW256,
29746 IX86_BUILTIN_PMOVUSQW128,
29747 IX86_BUILTIN_PMOVUSQW256,
29748 IX86_BUILTIN_PMOVQD128,
29749 IX86_BUILTIN_PMOVQD256,
29750 IX86_BUILTIN_PMOVSQD128,
29751 IX86_BUILTIN_PMOVSQD256,
29752 IX86_BUILTIN_PMOVUSQD128,
29753 IX86_BUILTIN_PMOVUSQD256,
29754 IX86_BUILTIN_RANGEPD256,
29755 IX86_BUILTIN_RANGEPD128,
29756 IX86_BUILTIN_RANGEPS256,
29757 IX86_BUILTIN_RANGEPS128,
29758 IX86_BUILTIN_GETEXPPS256,
29759 IX86_BUILTIN_GETEXPPD256,
29760 IX86_BUILTIN_GETEXPPS128,
29761 IX86_BUILTIN_GETEXPPD128,
29762 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29763 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29764 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29765 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29766 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29767 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29768 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29769 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29770 IX86_BUILTIN_PABSQ256,
29771 IX86_BUILTIN_PABSQ128,
29772 IX86_BUILTIN_PABSD256_MASK,
29773 IX86_BUILTIN_PABSD128_MASK,
29774 IX86_BUILTIN_PMULHRSW256_MASK,
29775 IX86_BUILTIN_PMULHRSW128_MASK,
29776 IX86_BUILTIN_PMULHUW128_MASK,
29777 IX86_BUILTIN_PMULHUW256_MASK,
29778 IX86_BUILTIN_PMULHW256_MASK,
29779 IX86_BUILTIN_PMULHW128_MASK,
29780 IX86_BUILTIN_PMULLW256_MASK,
29781 IX86_BUILTIN_PMULLW128_MASK,
29782 IX86_BUILTIN_PMULLQ256,
29783 IX86_BUILTIN_PMULLQ128,
29784 IX86_BUILTIN_ANDPD256_MASK,
29785 IX86_BUILTIN_ANDPD128_MASK,
29786 IX86_BUILTIN_ANDPS256_MASK,
29787 IX86_BUILTIN_ANDPS128_MASK,
29788 IX86_BUILTIN_ANDNPD256_MASK,
29789 IX86_BUILTIN_ANDNPD128_MASK,
29790 IX86_BUILTIN_ANDNPS256_MASK,
29791 IX86_BUILTIN_ANDNPS128_MASK,
29792 IX86_BUILTIN_PSLLWI128_MASK,
29793 IX86_BUILTIN_PSLLDI128_MASK,
29794 IX86_BUILTIN_PSLLQI128_MASK,
29795 IX86_BUILTIN_PSLLW128_MASK,
29796 IX86_BUILTIN_PSLLD128_MASK,
29797 IX86_BUILTIN_PSLLQ128_MASK,
29798 IX86_BUILTIN_PSLLWI256_MASK ,
29799 IX86_BUILTIN_PSLLW256_MASK,
29800 IX86_BUILTIN_PSLLDI256_MASK,
29801 IX86_BUILTIN_PSLLD256_MASK,
29802 IX86_BUILTIN_PSLLQI256_MASK,
29803 IX86_BUILTIN_PSLLQ256_MASK,
29804 IX86_BUILTIN_PSRADI128_MASK,
29805 IX86_BUILTIN_PSRAD128_MASK,
29806 IX86_BUILTIN_PSRADI256_MASK,
29807 IX86_BUILTIN_PSRAD256_MASK,
29808 IX86_BUILTIN_PSRAQI128_MASK,
29809 IX86_BUILTIN_PSRAQ128_MASK,
29810 IX86_BUILTIN_PSRAQI256_MASK,
29811 IX86_BUILTIN_PSRAQ256_MASK,
29812 IX86_BUILTIN_PANDD256,
29813 IX86_BUILTIN_PANDD128,
29814 IX86_BUILTIN_PSRLDI128_MASK,
29815 IX86_BUILTIN_PSRLD128_MASK,
29816 IX86_BUILTIN_PSRLDI256_MASK,
29817 IX86_BUILTIN_PSRLD256_MASK,
29818 IX86_BUILTIN_PSRLQI128_MASK,
29819 IX86_BUILTIN_PSRLQ128_MASK,
29820 IX86_BUILTIN_PSRLQI256_MASK,
29821 IX86_BUILTIN_PSRLQ256_MASK,
29822 IX86_BUILTIN_PANDQ256,
29823 IX86_BUILTIN_PANDQ128,
29824 IX86_BUILTIN_PANDND256,
29825 IX86_BUILTIN_PANDND128,
29826 IX86_BUILTIN_PANDNQ256,
29827 IX86_BUILTIN_PANDNQ128,
29828 IX86_BUILTIN_PORD256,
29829 IX86_BUILTIN_PORD128,
29830 IX86_BUILTIN_PORQ256,
29831 IX86_BUILTIN_PORQ128,
29832 IX86_BUILTIN_PXORD256,
29833 IX86_BUILTIN_PXORD128,
29834 IX86_BUILTIN_PXORQ256,
29835 IX86_BUILTIN_PXORQ128,
29836 IX86_BUILTIN_PACKSSWB256_MASK,
29837 IX86_BUILTIN_PACKSSWB128_MASK,
29838 IX86_BUILTIN_PACKUSWB256_MASK,
29839 IX86_BUILTIN_PACKUSWB128_MASK,
29840 IX86_BUILTIN_RNDSCALEPS256,
29841 IX86_BUILTIN_RNDSCALEPD256,
29842 IX86_BUILTIN_RNDSCALEPS128,
29843 IX86_BUILTIN_RNDSCALEPD128,
29844 IX86_BUILTIN_VTERNLOGQ256_MASK,
29845 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29846 IX86_BUILTIN_VTERNLOGD256_MASK,
29847 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29848 IX86_BUILTIN_VTERNLOGQ128_MASK,
29849 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29850 IX86_BUILTIN_VTERNLOGD128_MASK,
29851 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29852 IX86_BUILTIN_SCALEFPD256,
29853 IX86_BUILTIN_SCALEFPS256,
29854 IX86_BUILTIN_SCALEFPD128,
29855 IX86_BUILTIN_SCALEFPS128,
29856 IX86_BUILTIN_VFMADDPD256_MASK,
29857 IX86_BUILTIN_VFMADDPD256_MASK3,
29858 IX86_BUILTIN_VFMADDPD256_MASKZ,
29859 IX86_BUILTIN_VFMADDPD128_MASK,
29860 IX86_BUILTIN_VFMADDPD128_MASK3,
29861 IX86_BUILTIN_VFMADDPD128_MASKZ,
29862 IX86_BUILTIN_VFMADDPS256_MASK,
29863 IX86_BUILTIN_VFMADDPS256_MASK3,
29864 IX86_BUILTIN_VFMADDPS256_MASKZ,
29865 IX86_BUILTIN_VFMADDPS128_MASK,
29866 IX86_BUILTIN_VFMADDPS128_MASK3,
29867 IX86_BUILTIN_VFMADDPS128_MASKZ,
29868 IX86_BUILTIN_VFMSUBPD256_MASK3,
29869 IX86_BUILTIN_VFMSUBPD128_MASK3,
29870 IX86_BUILTIN_VFMSUBPS256_MASK3,
29871 IX86_BUILTIN_VFMSUBPS128_MASK3,
29872 IX86_BUILTIN_VFNMADDPD256_MASK,
29873 IX86_BUILTIN_VFNMADDPD128_MASK,
29874 IX86_BUILTIN_VFNMADDPS256_MASK,
29875 IX86_BUILTIN_VFNMADDPS128_MASK,
29876 IX86_BUILTIN_VFNMSUBPD256_MASK,
29877 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29878 IX86_BUILTIN_VFNMSUBPD128_MASK,
29879 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29880 IX86_BUILTIN_VFNMSUBPS256_MASK,
29881 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29882 IX86_BUILTIN_VFNMSUBPS128_MASK,
29883 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29884 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29885 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29886 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29887 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29888 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29889 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29890 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29891 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29892 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29893 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29894 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29895 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29896 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29897 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29898 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29899 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29900 IX86_BUILTIN_INSERTF64X2_256,
29901 IX86_BUILTIN_INSERTI64X2_256,
29902 IX86_BUILTIN_PSRAVV16HI,
29903 IX86_BUILTIN_PSRAVV8HI,
29904 IX86_BUILTIN_PMADDUBSW256_MASK,
29905 IX86_BUILTIN_PMADDUBSW128_MASK,
29906 IX86_BUILTIN_PMADDWD256_MASK,
29907 IX86_BUILTIN_PMADDWD128_MASK,
29908 IX86_BUILTIN_PSRLVV16HI,
29909 IX86_BUILTIN_PSRLVV8HI,
29910 IX86_BUILTIN_CVTPS2DQ256_MASK,
29911 IX86_BUILTIN_CVTPS2DQ128_MASK,
29912 IX86_BUILTIN_CVTPS2UDQ256,
29913 IX86_BUILTIN_CVTPS2UDQ128,
29914 IX86_BUILTIN_CVTPS2QQ256,
29915 IX86_BUILTIN_CVTPS2QQ128,
29916 IX86_BUILTIN_CVTPS2UQQ256,
29917 IX86_BUILTIN_CVTPS2UQQ128,
29918 IX86_BUILTIN_GETMANTPS256,
29919 IX86_BUILTIN_GETMANTPS128,
29920 IX86_BUILTIN_GETMANTPD256,
29921 IX86_BUILTIN_GETMANTPD128,
29922 IX86_BUILTIN_MOVDDUP256_MASK,
29923 IX86_BUILTIN_MOVDDUP128_MASK,
29924 IX86_BUILTIN_MOVSHDUP256_MASK,
29925 IX86_BUILTIN_MOVSHDUP128_MASK,
29926 IX86_BUILTIN_MOVSLDUP256_MASK,
29927 IX86_BUILTIN_MOVSLDUP128_MASK,
29928 IX86_BUILTIN_CVTQQ2PS256,
29929 IX86_BUILTIN_CVTQQ2PS128,
29930 IX86_BUILTIN_CVTUQQ2PS256,
29931 IX86_BUILTIN_CVTUQQ2PS128,
29932 IX86_BUILTIN_CVTQQ2PD256,
29933 IX86_BUILTIN_CVTQQ2PD128,
29934 IX86_BUILTIN_CVTUQQ2PD256,
29935 IX86_BUILTIN_CVTUQQ2PD128,
29936 IX86_BUILTIN_VPERMT2VARQ256,
29937 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29938 IX86_BUILTIN_VPERMT2VARD256,
29939 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29940 IX86_BUILTIN_VPERMI2VARQ256,
29941 IX86_BUILTIN_VPERMI2VARD256,
29942 IX86_BUILTIN_VPERMT2VARPD256,
29943 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29944 IX86_BUILTIN_VPERMT2VARPS256,
29945 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29946 IX86_BUILTIN_VPERMI2VARPD256,
29947 IX86_BUILTIN_VPERMI2VARPS256,
29948 IX86_BUILTIN_VPERMT2VARQ128,
29949 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29950 IX86_BUILTIN_VPERMT2VARD128,
29951 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29952 IX86_BUILTIN_VPERMI2VARQ128,
29953 IX86_BUILTIN_VPERMI2VARD128,
29954 IX86_BUILTIN_VPERMT2VARPD128,
29955 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29956 IX86_BUILTIN_VPERMT2VARPS128,
29957 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29958 IX86_BUILTIN_VPERMI2VARPD128,
29959 IX86_BUILTIN_VPERMI2VARPS128,
29960 IX86_BUILTIN_PSHUFB256_MASK,
29961 IX86_BUILTIN_PSHUFB128_MASK,
29962 IX86_BUILTIN_PSHUFHW256_MASK,
29963 IX86_BUILTIN_PSHUFHW128_MASK,
29964 IX86_BUILTIN_PSHUFLW256_MASK,
29965 IX86_BUILTIN_PSHUFLW128_MASK,
29966 IX86_BUILTIN_PSHUFD256_MASK,
29967 IX86_BUILTIN_PSHUFD128_MASK,
29968 IX86_BUILTIN_SHUFPD256_MASK,
29969 IX86_BUILTIN_SHUFPD128_MASK,
29970 IX86_BUILTIN_SHUFPS256_MASK,
29971 IX86_BUILTIN_SHUFPS128_MASK,
29972 IX86_BUILTIN_PROLVQ256,
29973 IX86_BUILTIN_PROLVQ128,
29974 IX86_BUILTIN_PROLQ256,
29975 IX86_BUILTIN_PROLQ128,
29976 IX86_BUILTIN_PRORVQ256,
29977 IX86_BUILTIN_PRORVQ128,
29978 IX86_BUILTIN_PRORQ256,
29979 IX86_BUILTIN_PRORQ128,
29980 IX86_BUILTIN_PSRAVQ128,
29981 IX86_BUILTIN_PSRAVQ256,
29982 IX86_BUILTIN_PSLLVV4DI_MASK,
29983 IX86_BUILTIN_PSLLVV2DI_MASK,
29984 IX86_BUILTIN_PSLLVV8SI_MASK,
29985 IX86_BUILTIN_PSLLVV4SI_MASK,
29986 IX86_BUILTIN_PSRAVV8SI_MASK,
29987 IX86_BUILTIN_PSRAVV4SI_MASK,
29988 IX86_BUILTIN_PSRLVV4DI_MASK,
29989 IX86_BUILTIN_PSRLVV2DI_MASK,
29990 IX86_BUILTIN_PSRLVV8SI_MASK,
29991 IX86_BUILTIN_PSRLVV4SI_MASK,
29992 IX86_BUILTIN_PSRAWI256_MASK,
29993 IX86_BUILTIN_PSRAW256_MASK,
29994 IX86_BUILTIN_PSRAWI128_MASK,
29995 IX86_BUILTIN_PSRAW128_MASK,
29996 IX86_BUILTIN_PSRLWI256_MASK,
29997 IX86_BUILTIN_PSRLW256_MASK,
29998 IX86_BUILTIN_PSRLWI128_MASK,
29999 IX86_BUILTIN_PSRLW128_MASK,
30000 IX86_BUILTIN_PRORVD256,
30001 IX86_BUILTIN_PROLVD256,
30002 IX86_BUILTIN_PRORD256,
30003 IX86_BUILTIN_PROLD256,
30004 IX86_BUILTIN_PRORVD128,
30005 IX86_BUILTIN_PROLVD128,
30006 IX86_BUILTIN_PRORD128,
30007 IX86_BUILTIN_PROLD128,
30008 IX86_BUILTIN_FPCLASSPD256,
30009 IX86_BUILTIN_FPCLASSPD128,
30010 IX86_BUILTIN_FPCLASSSD,
30011 IX86_BUILTIN_FPCLASSPS256,
30012 IX86_BUILTIN_FPCLASSPS128,
30013 IX86_BUILTIN_FPCLASSSS,
30014 IX86_BUILTIN_CVTB2MASK128,
30015 IX86_BUILTIN_CVTB2MASK256,
30016 IX86_BUILTIN_CVTW2MASK128,
30017 IX86_BUILTIN_CVTW2MASK256,
30018 IX86_BUILTIN_CVTD2MASK128,
30019 IX86_BUILTIN_CVTD2MASK256,
30020 IX86_BUILTIN_CVTQ2MASK128,
30021 IX86_BUILTIN_CVTQ2MASK256,
30022 IX86_BUILTIN_CVTMASK2B128,
30023 IX86_BUILTIN_CVTMASK2B256,
30024 IX86_BUILTIN_CVTMASK2W128,
30025 IX86_BUILTIN_CVTMASK2W256,
30026 IX86_BUILTIN_CVTMASK2D128,
30027 IX86_BUILTIN_CVTMASK2D256,
30028 IX86_BUILTIN_CVTMASK2Q128,
30029 IX86_BUILTIN_CVTMASK2Q256,
30030 IX86_BUILTIN_PCMPEQB128_MASK,
30031 IX86_BUILTIN_PCMPEQB256_MASK,
30032 IX86_BUILTIN_PCMPEQW128_MASK,
30033 IX86_BUILTIN_PCMPEQW256_MASK,
30034 IX86_BUILTIN_PCMPEQD128_MASK,
30035 IX86_BUILTIN_PCMPEQD256_MASK,
30036 IX86_BUILTIN_PCMPEQQ128_MASK,
30037 IX86_BUILTIN_PCMPEQQ256_MASK,
30038 IX86_BUILTIN_PCMPGTB128_MASK,
30039 IX86_BUILTIN_PCMPGTB256_MASK,
30040 IX86_BUILTIN_PCMPGTW128_MASK,
30041 IX86_BUILTIN_PCMPGTW256_MASK,
30042 IX86_BUILTIN_PCMPGTD128_MASK,
30043 IX86_BUILTIN_PCMPGTD256_MASK,
30044 IX86_BUILTIN_PCMPGTQ128_MASK,
30045 IX86_BUILTIN_PCMPGTQ256_MASK,
30046 IX86_BUILTIN_PTESTMB128,
30047 IX86_BUILTIN_PTESTMB256,
30048 IX86_BUILTIN_PTESTMW128,
30049 IX86_BUILTIN_PTESTMW256,
30050 IX86_BUILTIN_PTESTMD128,
30051 IX86_BUILTIN_PTESTMD256,
30052 IX86_BUILTIN_PTESTMQ128,
30053 IX86_BUILTIN_PTESTMQ256,
30054 IX86_BUILTIN_PTESTNMB128,
30055 IX86_BUILTIN_PTESTNMB256,
30056 IX86_BUILTIN_PTESTNMW128,
30057 IX86_BUILTIN_PTESTNMW256,
30058 IX86_BUILTIN_PTESTNMD128,
30059 IX86_BUILTIN_PTESTNMD256,
30060 IX86_BUILTIN_PTESTNMQ128,
30061 IX86_BUILTIN_PTESTNMQ256,
30062 IX86_BUILTIN_PBROADCASTMB128,
30063 IX86_BUILTIN_PBROADCASTMB256,
30064 IX86_BUILTIN_PBROADCASTMW128,
30065 IX86_BUILTIN_PBROADCASTMW256,
30066 IX86_BUILTIN_COMPRESSPD256,
30067 IX86_BUILTIN_COMPRESSPD128,
30068 IX86_BUILTIN_COMPRESSPS256,
30069 IX86_BUILTIN_COMPRESSPS128,
30070 IX86_BUILTIN_PCOMPRESSQ256,
30071 IX86_BUILTIN_PCOMPRESSQ128,
30072 IX86_BUILTIN_PCOMPRESSD256,
30073 IX86_BUILTIN_PCOMPRESSD128,
30074 IX86_BUILTIN_EXPANDPD256,
30075 IX86_BUILTIN_EXPANDPD128,
30076 IX86_BUILTIN_EXPANDPS256,
30077 IX86_BUILTIN_EXPANDPS128,
30078 IX86_BUILTIN_PEXPANDQ256,
30079 IX86_BUILTIN_PEXPANDQ128,
30080 IX86_BUILTIN_PEXPANDD256,
30081 IX86_BUILTIN_PEXPANDD128,
30082 IX86_BUILTIN_EXPANDPD256Z,
30083 IX86_BUILTIN_EXPANDPD128Z,
30084 IX86_BUILTIN_EXPANDPS256Z,
30085 IX86_BUILTIN_EXPANDPS128Z,
30086 IX86_BUILTIN_PEXPANDQ256Z,
30087 IX86_BUILTIN_PEXPANDQ128Z,
30088 IX86_BUILTIN_PEXPANDD256Z,
30089 IX86_BUILTIN_PEXPANDD128Z,
30090 IX86_BUILTIN_PMAXSD256_MASK,
30091 IX86_BUILTIN_PMINSD256_MASK,
30092 IX86_BUILTIN_PMAXUD256_MASK,
30093 IX86_BUILTIN_PMINUD256_MASK,
30094 IX86_BUILTIN_PMAXSD128_MASK,
30095 IX86_BUILTIN_PMINSD128_MASK,
30096 IX86_BUILTIN_PMAXUD128_MASK,
30097 IX86_BUILTIN_PMINUD128_MASK,
30098 IX86_BUILTIN_PMAXSQ256_MASK,
30099 IX86_BUILTIN_PMINSQ256_MASK,
30100 IX86_BUILTIN_PMAXUQ256_MASK,
30101 IX86_BUILTIN_PMINUQ256_MASK,
30102 IX86_BUILTIN_PMAXSQ128_MASK,
30103 IX86_BUILTIN_PMINSQ128_MASK,
30104 IX86_BUILTIN_PMAXUQ128_MASK,
30105 IX86_BUILTIN_PMINUQ128_MASK,
30106 IX86_BUILTIN_PMINSB256_MASK,
30107 IX86_BUILTIN_PMINUB256_MASK,
30108 IX86_BUILTIN_PMAXSB256_MASK,
30109 IX86_BUILTIN_PMAXUB256_MASK,
30110 IX86_BUILTIN_PMINSB128_MASK,
30111 IX86_BUILTIN_PMINUB128_MASK,
30112 IX86_BUILTIN_PMAXSB128_MASK,
30113 IX86_BUILTIN_PMAXUB128_MASK,
30114 IX86_BUILTIN_PMINSW256_MASK,
30115 IX86_BUILTIN_PMINUW256_MASK,
30116 IX86_BUILTIN_PMAXSW256_MASK,
30117 IX86_BUILTIN_PMAXUW256_MASK,
30118 IX86_BUILTIN_PMINSW128_MASK,
30119 IX86_BUILTIN_PMINUW128_MASK,
30120 IX86_BUILTIN_PMAXSW128_MASK,
30121 IX86_BUILTIN_PMAXUW128_MASK,
30122 IX86_BUILTIN_VPCONFLICTQ256,
30123 IX86_BUILTIN_VPCONFLICTD256,
30124 IX86_BUILTIN_VPCLZCNTQ256,
30125 IX86_BUILTIN_VPCLZCNTD256,
30126 IX86_BUILTIN_UNPCKHPD256_MASK,
30127 IX86_BUILTIN_UNPCKHPD128_MASK,
30128 IX86_BUILTIN_UNPCKHPS256_MASK,
30129 IX86_BUILTIN_UNPCKHPS128_MASK,
30130 IX86_BUILTIN_UNPCKLPD256_MASK,
30131 IX86_BUILTIN_UNPCKLPD128_MASK,
30132 IX86_BUILTIN_UNPCKLPS256_MASK,
30133 IX86_BUILTIN_VPCONFLICTQ128,
30134 IX86_BUILTIN_VPCONFLICTD128,
30135 IX86_BUILTIN_VPCLZCNTQ128,
30136 IX86_BUILTIN_VPCLZCNTD128,
30137 IX86_BUILTIN_UNPCKLPS128_MASK,
30138 IX86_BUILTIN_ALIGND256,
30139 IX86_BUILTIN_ALIGNQ256,
30140 IX86_BUILTIN_ALIGND128,
30141 IX86_BUILTIN_ALIGNQ128,
30142 IX86_BUILTIN_CVTPS2PH256_MASK,
30143 IX86_BUILTIN_CVTPS2PH_MASK,
30144 IX86_BUILTIN_CVTPH2PS_MASK,
30145 IX86_BUILTIN_CVTPH2PS256_MASK,
30146 IX86_BUILTIN_PUNPCKHDQ128_MASK,
30147 IX86_BUILTIN_PUNPCKHDQ256_MASK,
30148 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30149 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30150 IX86_BUILTIN_PUNPCKLDQ128_MASK,
30151 IX86_BUILTIN_PUNPCKLDQ256_MASK,
30152 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30153 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30154 IX86_BUILTIN_PUNPCKHBW128_MASK,
30155 IX86_BUILTIN_PUNPCKHBW256_MASK,
30156 IX86_BUILTIN_PUNPCKHWD128_MASK,
30157 IX86_BUILTIN_PUNPCKHWD256_MASK,
30158 IX86_BUILTIN_PUNPCKLBW128_MASK,
30159 IX86_BUILTIN_PUNPCKLBW256_MASK,
30160 IX86_BUILTIN_PUNPCKLWD128_MASK,
30161 IX86_BUILTIN_PUNPCKLWD256_MASK,
30162 IX86_BUILTIN_PSLLVV16HI,
30163 IX86_BUILTIN_PSLLVV8HI,
30164 IX86_BUILTIN_PACKSSDW256_MASK,
30165 IX86_BUILTIN_PACKSSDW128_MASK,
30166 IX86_BUILTIN_PACKUSDW256_MASK,
30167 IX86_BUILTIN_PACKUSDW128_MASK,
30168 IX86_BUILTIN_PAVGB256_MASK,
30169 IX86_BUILTIN_PAVGW256_MASK,
30170 IX86_BUILTIN_PAVGB128_MASK,
30171 IX86_BUILTIN_PAVGW128_MASK,
30172 IX86_BUILTIN_VPERMVARSF256_MASK,
30173 IX86_BUILTIN_VPERMVARDF256_MASK,
30174 IX86_BUILTIN_VPERMDF256_MASK,
30175 IX86_BUILTIN_PABSB256_MASK,
30176 IX86_BUILTIN_PABSB128_MASK,
30177 IX86_BUILTIN_PABSW256_MASK,
30178 IX86_BUILTIN_PABSW128_MASK,
30179 IX86_BUILTIN_VPERMILVARPD_MASK,
30180 IX86_BUILTIN_VPERMILVARPS_MASK,
30181 IX86_BUILTIN_VPERMILVARPD256_MASK,
30182 IX86_BUILTIN_VPERMILVARPS256_MASK,
30183 IX86_BUILTIN_VPERMILPD_MASK,
30184 IX86_BUILTIN_VPERMILPS_MASK,
30185 IX86_BUILTIN_VPERMILPD256_MASK,
30186 IX86_BUILTIN_VPERMILPS256_MASK,
30187 IX86_BUILTIN_BLENDMQ256,
30188 IX86_BUILTIN_BLENDMD256,
30189 IX86_BUILTIN_BLENDMPD256,
30190 IX86_BUILTIN_BLENDMPS256,
30191 IX86_BUILTIN_BLENDMQ128,
30192 IX86_BUILTIN_BLENDMD128,
30193 IX86_BUILTIN_BLENDMPD128,
30194 IX86_BUILTIN_BLENDMPS128,
30195 IX86_BUILTIN_BLENDMW256,
30196 IX86_BUILTIN_BLENDMB256,
30197 IX86_BUILTIN_BLENDMW128,
30198 IX86_BUILTIN_BLENDMB128,
30199 IX86_BUILTIN_PMULLD256_MASK,
30200 IX86_BUILTIN_PMULLD128_MASK,
30201 IX86_BUILTIN_PMULUDQ256_MASK,
30202 IX86_BUILTIN_PMULDQ256_MASK,
30203 IX86_BUILTIN_PMULDQ128_MASK,
30204 IX86_BUILTIN_PMULUDQ128_MASK,
30205 IX86_BUILTIN_CVTPD2PS256_MASK,
30206 IX86_BUILTIN_CVTPD2PS_MASK,
30207 IX86_BUILTIN_VPERMVARSI256_MASK,
30208 IX86_BUILTIN_VPERMVARDI256_MASK,
30209 IX86_BUILTIN_VPERMDI256_MASK,
30210 IX86_BUILTIN_CMPQ256,
30211 IX86_BUILTIN_CMPD256,
30212 IX86_BUILTIN_UCMPQ256,
30213 IX86_BUILTIN_UCMPD256,
30214 IX86_BUILTIN_CMPB256,
30215 IX86_BUILTIN_CMPW256,
30216 IX86_BUILTIN_UCMPB256,
30217 IX86_BUILTIN_UCMPW256,
30218 IX86_BUILTIN_CMPPD256_MASK,
30219 IX86_BUILTIN_CMPPS256_MASK,
30220 IX86_BUILTIN_CMPQ128,
30221 IX86_BUILTIN_CMPD128,
30222 IX86_BUILTIN_UCMPQ128,
30223 IX86_BUILTIN_UCMPD128,
30224 IX86_BUILTIN_CMPB128,
30225 IX86_BUILTIN_CMPW128,
30226 IX86_BUILTIN_UCMPB128,
30227 IX86_BUILTIN_UCMPW128,
30228 IX86_BUILTIN_CMPPD128_MASK,
30229 IX86_BUILTIN_CMPPS128_MASK,
30231 IX86_BUILTIN_GATHER3SIV8SF,
30232 IX86_BUILTIN_GATHER3SIV4SF,
30233 IX86_BUILTIN_GATHER3SIV4DF,
30234 IX86_BUILTIN_GATHER3SIV2DF,
30235 IX86_BUILTIN_GATHER3DIV8SF,
30236 IX86_BUILTIN_GATHER3DIV4SF,
30237 IX86_BUILTIN_GATHER3DIV4DF,
30238 IX86_BUILTIN_GATHER3DIV2DF,
30239 IX86_BUILTIN_GATHER3SIV8SI,
30240 IX86_BUILTIN_GATHER3SIV4SI,
30241 IX86_BUILTIN_GATHER3SIV4DI,
30242 IX86_BUILTIN_GATHER3SIV2DI,
30243 IX86_BUILTIN_GATHER3DIV8SI,
30244 IX86_BUILTIN_GATHER3DIV4SI,
30245 IX86_BUILTIN_GATHER3DIV4DI,
30246 IX86_BUILTIN_GATHER3DIV2DI,
30247 IX86_BUILTIN_SCATTERSIV8SF,
30248 IX86_BUILTIN_SCATTERSIV4SF,
30249 IX86_BUILTIN_SCATTERSIV4DF,
30250 IX86_BUILTIN_SCATTERSIV2DF,
30251 IX86_BUILTIN_SCATTERDIV8SF,
30252 IX86_BUILTIN_SCATTERDIV4SF,
30253 IX86_BUILTIN_SCATTERDIV4DF,
30254 IX86_BUILTIN_SCATTERDIV2DF,
30255 IX86_BUILTIN_SCATTERSIV8SI,
30256 IX86_BUILTIN_SCATTERSIV4SI,
30257 IX86_BUILTIN_SCATTERSIV4DI,
30258 IX86_BUILTIN_SCATTERSIV2DI,
30259 IX86_BUILTIN_SCATTERDIV8SI,
30260 IX86_BUILTIN_SCATTERDIV4SI,
30261 IX86_BUILTIN_SCATTERDIV4DI,
30262 IX86_BUILTIN_SCATTERDIV2DI,
30265 IX86_BUILTIN_RANGESD128,
30266 IX86_BUILTIN_RANGESS128,
30267 IX86_BUILTIN_KUNPCKWD,
30268 IX86_BUILTIN_KUNPCKDQ,
30269 IX86_BUILTIN_BROADCASTF32x2_512,
30270 IX86_BUILTIN_BROADCASTI32x2_512,
30271 IX86_BUILTIN_BROADCASTF64X2_512,
30272 IX86_BUILTIN_BROADCASTI64X2_512,
30273 IX86_BUILTIN_BROADCASTF32X8_512,
30274 IX86_BUILTIN_BROADCASTI32X8_512,
30275 IX86_BUILTIN_EXTRACTF64X2_512,
30276 IX86_BUILTIN_EXTRACTF32X8,
30277 IX86_BUILTIN_EXTRACTI64X2_512,
30278 IX86_BUILTIN_EXTRACTI32X8,
30279 IX86_BUILTIN_REDUCEPD512_MASK,
30280 IX86_BUILTIN_REDUCEPS512_MASK,
30281 IX86_BUILTIN_PMULLQ512,
30282 IX86_BUILTIN_XORPD512,
30283 IX86_BUILTIN_XORPS512,
30284 IX86_BUILTIN_ORPD512,
30285 IX86_BUILTIN_ORPS512,
30286 IX86_BUILTIN_ANDPD512,
30287 IX86_BUILTIN_ANDPS512,
30288 IX86_BUILTIN_ANDNPD512,
30289 IX86_BUILTIN_ANDNPS512,
30290 IX86_BUILTIN_INSERTF32X8,
30291 IX86_BUILTIN_INSERTI32X8,
30292 IX86_BUILTIN_INSERTF64X2_512,
30293 IX86_BUILTIN_INSERTI64X2_512,
30294 IX86_BUILTIN_FPCLASSPD512,
30295 IX86_BUILTIN_FPCLASSPS512,
30296 IX86_BUILTIN_CVTD2MASK512,
30297 IX86_BUILTIN_CVTQ2MASK512,
30298 IX86_BUILTIN_CVTMASK2D512,
30299 IX86_BUILTIN_CVTMASK2Q512,
30300 IX86_BUILTIN_CVTPD2QQ512,
30301 IX86_BUILTIN_CVTPS2QQ512,
30302 IX86_BUILTIN_CVTPD2UQQ512,
30303 IX86_BUILTIN_CVTPS2UQQ512,
30304 IX86_BUILTIN_CVTQQ2PS512,
30305 IX86_BUILTIN_CVTUQQ2PS512,
30306 IX86_BUILTIN_CVTQQ2PD512,
30307 IX86_BUILTIN_CVTUQQ2PD512,
30308 IX86_BUILTIN_CVTTPS2QQ512,
30309 IX86_BUILTIN_CVTTPS2UQQ512,
30310 IX86_BUILTIN_CVTTPD2QQ512,
30311 IX86_BUILTIN_CVTTPD2UQQ512,
30312 IX86_BUILTIN_RANGEPS512,
30313 IX86_BUILTIN_RANGEPD512,
30316 IX86_BUILTIN_PACKUSDW512,
30317 IX86_BUILTIN_PACKSSDW512,
30318 IX86_BUILTIN_LOADDQUHI512_MASK,
30319 IX86_BUILTIN_LOADDQUQI512_MASK,
30320 IX86_BUILTIN_PSLLDQ512,
30321 IX86_BUILTIN_PSRLDQ512,
30322 IX86_BUILTIN_STOREDQUHI512_MASK,
30323 IX86_BUILTIN_STOREDQUQI512_MASK,
30324 IX86_BUILTIN_PALIGNR512,
30325 IX86_BUILTIN_PALIGNR512_MASK,
30326 IX86_BUILTIN_MOVDQUHI512_MASK,
30327 IX86_BUILTIN_MOVDQUQI512_MASK,
30328 IX86_BUILTIN_PSADBW512,
30329 IX86_BUILTIN_DBPSADBW512,
30330 IX86_BUILTIN_PBROADCASTB512,
30331 IX86_BUILTIN_PBROADCASTB512_GPR,
30332 IX86_BUILTIN_PBROADCASTW512,
30333 IX86_BUILTIN_PBROADCASTW512_GPR,
30334 IX86_BUILTIN_PMOVSXBW512_MASK,
30335 IX86_BUILTIN_PMOVZXBW512_MASK,
30336 IX86_BUILTIN_VPERMVARHI512_MASK,
30337 IX86_BUILTIN_VPERMT2VARHI512,
30338 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30339 IX86_BUILTIN_VPERMI2VARHI512,
30340 IX86_BUILTIN_PAVGB512,
30341 IX86_BUILTIN_PAVGW512,
30342 IX86_BUILTIN_PADDB512,
30343 IX86_BUILTIN_PSUBB512,
30344 IX86_BUILTIN_PSUBSB512,
30345 IX86_BUILTIN_PADDSB512,
30346 IX86_BUILTIN_PSUBUSB512,
30347 IX86_BUILTIN_PADDUSB512,
30348 IX86_BUILTIN_PSUBW512,
30349 IX86_BUILTIN_PADDW512,
30350 IX86_BUILTIN_PSUBSW512,
30351 IX86_BUILTIN_PADDSW512,
30352 IX86_BUILTIN_PSUBUSW512,
30353 IX86_BUILTIN_PADDUSW512,
30354 IX86_BUILTIN_PMAXUW512,
30355 IX86_BUILTIN_PMAXSW512,
30356 IX86_BUILTIN_PMINUW512,
30357 IX86_BUILTIN_PMINSW512,
30358 IX86_BUILTIN_PMAXUB512,
30359 IX86_BUILTIN_PMAXSB512,
30360 IX86_BUILTIN_PMINUB512,
30361 IX86_BUILTIN_PMINSB512,
30362 IX86_BUILTIN_PMOVWB512,
30363 IX86_BUILTIN_PMOVSWB512,
30364 IX86_BUILTIN_PMOVUSWB512,
30365 IX86_BUILTIN_PMULHRSW512_MASK,
30366 IX86_BUILTIN_PMULHUW512_MASK,
30367 IX86_BUILTIN_PMULHW512_MASK,
30368 IX86_BUILTIN_PMULLW512_MASK,
30369 IX86_BUILTIN_PSLLWI512_MASK,
30370 IX86_BUILTIN_PSLLW512_MASK,
30371 IX86_BUILTIN_PACKSSWB512,
30372 IX86_BUILTIN_PACKUSWB512,
30373 IX86_BUILTIN_PSRAVV32HI,
30374 IX86_BUILTIN_PMADDUBSW512_MASK,
30375 IX86_BUILTIN_PMADDWD512_MASK,
30376 IX86_BUILTIN_PSRLVV32HI,
30377 IX86_BUILTIN_PUNPCKHBW512,
30378 IX86_BUILTIN_PUNPCKHWD512,
30379 IX86_BUILTIN_PUNPCKLBW512,
30380 IX86_BUILTIN_PUNPCKLWD512,
30381 IX86_BUILTIN_PSHUFB512,
30382 IX86_BUILTIN_PSHUFHW512,
30383 IX86_BUILTIN_PSHUFLW512,
30384 IX86_BUILTIN_PSRAWI512,
30385 IX86_BUILTIN_PSRAW512,
30386 IX86_BUILTIN_PSRLWI512,
30387 IX86_BUILTIN_PSRLW512,
30388 IX86_BUILTIN_CVTB2MASK512,
30389 IX86_BUILTIN_CVTW2MASK512,
30390 IX86_BUILTIN_CVTMASK2B512,
30391 IX86_BUILTIN_CVTMASK2W512,
30392 IX86_BUILTIN_PCMPEQB512_MASK,
30393 IX86_BUILTIN_PCMPEQW512_MASK,
30394 IX86_BUILTIN_PCMPGTB512_MASK,
30395 IX86_BUILTIN_PCMPGTW512_MASK,
30396 IX86_BUILTIN_PTESTMB512,
30397 IX86_BUILTIN_PTESTMW512,
30398 IX86_BUILTIN_PTESTNMB512,
30399 IX86_BUILTIN_PTESTNMW512,
30400 IX86_BUILTIN_PSLLVV32HI,
30401 IX86_BUILTIN_PABSB512,
30402 IX86_BUILTIN_PABSW512,
30403 IX86_BUILTIN_BLENDMW512,
30404 IX86_BUILTIN_BLENDMB512,
30405 IX86_BUILTIN_CMPB512,
30406 IX86_BUILTIN_CMPW512,
30407 IX86_BUILTIN_UCMPB512,
30408 IX86_BUILTIN_UCMPW512,
30410 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30411 where all operands are 32-byte or 64-byte wide respectively. */
30412 IX86_BUILTIN_GATHERALTSIV4DF,
30413 IX86_BUILTIN_GATHERALTDIV8SF,
30414 IX86_BUILTIN_GATHERALTSIV4DI,
30415 IX86_BUILTIN_GATHERALTDIV8SI,
30416 IX86_BUILTIN_GATHER3ALTDIV16SF,
30417 IX86_BUILTIN_GATHER3ALTDIV16SI,
30418 IX86_BUILTIN_GATHER3ALTSIV4DF,
30419 IX86_BUILTIN_GATHER3ALTDIV8SF,
30420 IX86_BUILTIN_GATHER3ALTSIV4DI,
30421 IX86_BUILTIN_GATHER3ALTDIV8SI,
30422 IX86_BUILTIN_GATHER3ALTSIV8DF,
30423 IX86_BUILTIN_GATHER3ALTSIV8DI,
30424 IX86_BUILTIN_GATHER3DIV16SF,
30425 IX86_BUILTIN_GATHER3DIV16SI,
30426 IX86_BUILTIN_GATHER3DIV8DF,
30427 IX86_BUILTIN_GATHER3DIV8DI,
30428 IX86_BUILTIN_GATHER3SIV16SF,
30429 IX86_BUILTIN_GATHER3SIV16SI,
30430 IX86_BUILTIN_GATHER3SIV8DF,
30431 IX86_BUILTIN_GATHER3SIV8DI,
30432 IX86_BUILTIN_SCATTERDIV16SF,
30433 IX86_BUILTIN_SCATTERDIV16SI,
30434 IX86_BUILTIN_SCATTERDIV8DF,
30435 IX86_BUILTIN_SCATTERDIV8DI,
30436 IX86_BUILTIN_SCATTERSIV16SF,
30437 IX86_BUILTIN_SCATTERSIV16SI,
30438 IX86_BUILTIN_SCATTERSIV8DF,
30439 IX86_BUILTIN_SCATTERSIV8DI,
30442 IX86_BUILTIN_GATHERPFQPD,
30443 IX86_BUILTIN_GATHERPFDPS,
30444 IX86_BUILTIN_GATHERPFDPD,
30445 IX86_BUILTIN_GATHERPFQPS,
30446 IX86_BUILTIN_SCATTERPFDPD,
30447 IX86_BUILTIN_SCATTERPFDPS,
30448 IX86_BUILTIN_SCATTERPFQPD,
30449 IX86_BUILTIN_SCATTERPFQPS,
30452 IX86_BUILTIN_EXP2PD_MASK,
30453 IX86_BUILTIN_EXP2PS_MASK,
30454 IX86_BUILTIN_EXP2PS,
30455 IX86_BUILTIN_RCP28PD,
30456 IX86_BUILTIN_RCP28PS,
30457 IX86_BUILTIN_RCP28SD,
30458 IX86_BUILTIN_RCP28SS,
30459 IX86_BUILTIN_RSQRT28PD,
30460 IX86_BUILTIN_RSQRT28PS,
30461 IX86_BUILTIN_RSQRT28SD,
30462 IX86_BUILTIN_RSQRT28SS,
30465 IX86_BUILTIN_VPMADD52LUQ512,
30466 IX86_BUILTIN_VPMADD52HUQ512,
30467 IX86_BUILTIN_VPMADD52LUQ256,
30468 IX86_BUILTIN_VPMADD52HUQ256,
30469 IX86_BUILTIN_VPMADD52LUQ128,
30470 IX86_BUILTIN_VPMADD52HUQ128,
30471 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30472 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30473 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30474 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30475 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30476 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30479 IX86_BUILTIN_VPMULTISHIFTQB512,
30480 IX86_BUILTIN_VPMULTISHIFTQB256,
30481 IX86_BUILTIN_VPMULTISHIFTQB128,
30482 IX86_BUILTIN_VPERMVARQI512_MASK,
30483 IX86_BUILTIN_VPERMT2VARQI512,
30484 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30485 IX86_BUILTIN_VPERMI2VARQI512,
30486 IX86_BUILTIN_VPERMVARQI256_MASK,
30487 IX86_BUILTIN_VPERMVARQI128_MASK,
30488 IX86_BUILTIN_VPERMT2VARQI256,
30489 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30490 IX86_BUILTIN_VPERMT2VARQI128,
30491 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30492 IX86_BUILTIN_VPERMI2VARQI256,
30493 IX86_BUILTIN_VPERMI2VARQI128,
30495 /* SHA builtins. */
30496 IX86_BUILTIN_SHA1MSG1,
30497 IX86_BUILTIN_SHA1MSG2,
30498 IX86_BUILTIN_SHA1NEXTE,
30499 IX86_BUILTIN_SHA1RNDS4,
30500 IX86_BUILTIN_SHA256MSG1,
30501 IX86_BUILTIN_SHA256MSG2,
30502 IX86_BUILTIN_SHA256RNDS2,
30504 /* CLWB instructions. */
30507 /* PCOMMIT instructions. */
30508 IX86_BUILTIN_PCOMMIT,
30510 /* CLFLUSHOPT instructions. */
30511 IX86_BUILTIN_CLFLUSHOPT,
30513 /* TFmode support builtins. */
30515 IX86_BUILTIN_HUGE_VALQ,
30516 IX86_BUILTIN_FABSQ,
30517 IX86_BUILTIN_COPYSIGNQ,
30519 /* Vectorizer support builtins. */
30520 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30521 IX86_BUILTIN_CPYSGNPS,
30522 IX86_BUILTIN_CPYSGNPD,
30523 IX86_BUILTIN_CPYSGNPS256,
30524 IX86_BUILTIN_CPYSGNPS512,
30525 IX86_BUILTIN_CPYSGNPD256,
30526 IX86_BUILTIN_CPYSGNPD512,
30527 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30528 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30531 /* FMA4 instructions. */
30532 IX86_BUILTIN_VFMADDSS,
30533 IX86_BUILTIN_VFMADDSD,
30534 IX86_BUILTIN_VFMADDPS,
30535 IX86_BUILTIN_VFMADDPD,
30536 IX86_BUILTIN_VFMADDPS256,
30537 IX86_BUILTIN_VFMADDPD256,
30538 IX86_BUILTIN_VFMADDSUBPS,
30539 IX86_BUILTIN_VFMADDSUBPD,
30540 IX86_BUILTIN_VFMADDSUBPS256,
30541 IX86_BUILTIN_VFMADDSUBPD256,
30543 /* FMA3 instructions. */
30544 IX86_BUILTIN_VFMADDSS3,
30545 IX86_BUILTIN_VFMADDSD3,
30547 /* XOP instructions. */
30548 IX86_BUILTIN_VPCMOV,
30549 IX86_BUILTIN_VPCMOV_V2DI,
30550 IX86_BUILTIN_VPCMOV_V4SI,
30551 IX86_BUILTIN_VPCMOV_V8HI,
30552 IX86_BUILTIN_VPCMOV_V16QI,
30553 IX86_BUILTIN_VPCMOV_V4SF,
30554 IX86_BUILTIN_VPCMOV_V2DF,
30555 IX86_BUILTIN_VPCMOV256,
30556 IX86_BUILTIN_VPCMOV_V4DI256,
30557 IX86_BUILTIN_VPCMOV_V8SI256,
30558 IX86_BUILTIN_VPCMOV_V16HI256,
30559 IX86_BUILTIN_VPCMOV_V32QI256,
30560 IX86_BUILTIN_VPCMOV_V8SF256,
30561 IX86_BUILTIN_VPCMOV_V4DF256,
30563 IX86_BUILTIN_VPPERM,
30565 IX86_BUILTIN_VPMACSSWW,
30566 IX86_BUILTIN_VPMACSWW,
30567 IX86_BUILTIN_VPMACSSWD,
30568 IX86_BUILTIN_VPMACSWD,
30569 IX86_BUILTIN_VPMACSSDD,
30570 IX86_BUILTIN_VPMACSDD,
30571 IX86_BUILTIN_VPMACSSDQL,
30572 IX86_BUILTIN_VPMACSSDQH,
30573 IX86_BUILTIN_VPMACSDQL,
30574 IX86_BUILTIN_VPMACSDQH,
30575 IX86_BUILTIN_VPMADCSSWD,
30576 IX86_BUILTIN_VPMADCSWD,
30578 IX86_BUILTIN_VPHADDBW,
30579 IX86_BUILTIN_VPHADDBD,
30580 IX86_BUILTIN_VPHADDBQ,
30581 IX86_BUILTIN_VPHADDWD,
30582 IX86_BUILTIN_VPHADDWQ,
30583 IX86_BUILTIN_VPHADDDQ,
30584 IX86_BUILTIN_VPHADDUBW,
30585 IX86_BUILTIN_VPHADDUBD,
30586 IX86_BUILTIN_VPHADDUBQ,
30587 IX86_BUILTIN_VPHADDUWD,
30588 IX86_BUILTIN_VPHADDUWQ,
30589 IX86_BUILTIN_VPHADDUDQ,
30590 IX86_BUILTIN_VPHSUBBW,
30591 IX86_BUILTIN_VPHSUBWD,
30592 IX86_BUILTIN_VPHSUBDQ,
30594 IX86_BUILTIN_VPROTB,
30595 IX86_BUILTIN_VPROTW,
30596 IX86_BUILTIN_VPROTD,
30597 IX86_BUILTIN_VPROTQ,
30598 IX86_BUILTIN_VPROTB_IMM,
30599 IX86_BUILTIN_VPROTW_IMM,
30600 IX86_BUILTIN_VPROTD_IMM,
30601 IX86_BUILTIN_VPROTQ_IMM,
30603 IX86_BUILTIN_VPSHLB,
30604 IX86_BUILTIN_VPSHLW,
30605 IX86_BUILTIN_VPSHLD,
30606 IX86_BUILTIN_VPSHLQ,
30607 IX86_BUILTIN_VPSHAB,
30608 IX86_BUILTIN_VPSHAW,
30609 IX86_BUILTIN_VPSHAD,
30610 IX86_BUILTIN_VPSHAQ,
30612 IX86_BUILTIN_VFRCZSS,
30613 IX86_BUILTIN_VFRCZSD,
30614 IX86_BUILTIN_VFRCZPS,
30615 IX86_BUILTIN_VFRCZPD,
30616 IX86_BUILTIN_VFRCZPS256,
30617 IX86_BUILTIN_VFRCZPD256,
30619 IX86_BUILTIN_VPCOMEQUB,
30620 IX86_BUILTIN_VPCOMNEUB,
30621 IX86_BUILTIN_VPCOMLTUB,
30622 IX86_BUILTIN_VPCOMLEUB,
30623 IX86_BUILTIN_VPCOMGTUB,
30624 IX86_BUILTIN_VPCOMGEUB,
30625 IX86_BUILTIN_VPCOMFALSEUB,
30626 IX86_BUILTIN_VPCOMTRUEUB,
30628 IX86_BUILTIN_VPCOMEQUW,
30629 IX86_BUILTIN_VPCOMNEUW,
30630 IX86_BUILTIN_VPCOMLTUW,
30631 IX86_BUILTIN_VPCOMLEUW,
30632 IX86_BUILTIN_VPCOMGTUW,
30633 IX86_BUILTIN_VPCOMGEUW,
30634 IX86_BUILTIN_VPCOMFALSEUW,
30635 IX86_BUILTIN_VPCOMTRUEUW,
30637 IX86_BUILTIN_VPCOMEQUD,
30638 IX86_BUILTIN_VPCOMNEUD,
30639 IX86_BUILTIN_VPCOMLTUD,
30640 IX86_BUILTIN_VPCOMLEUD,
30641 IX86_BUILTIN_VPCOMGTUD,
30642 IX86_BUILTIN_VPCOMGEUD,
30643 IX86_BUILTIN_VPCOMFALSEUD,
30644 IX86_BUILTIN_VPCOMTRUEUD,
30646 IX86_BUILTIN_VPCOMEQUQ,
30647 IX86_BUILTIN_VPCOMNEUQ,
30648 IX86_BUILTIN_VPCOMLTUQ,
30649 IX86_BUILTIN_VPCOMLEUQ,
30650 IX86_BUILTIN_VPCOMGTUQ,
30651 IX86_BUILTIN_VPCOMGEUQ,
30652 IX86_BUILTIN_VPCOMFALSEUQ,
30653 IX86_BUILTIN_VPCOMTRUEUQ,
30655 IX86_BUILTIN_VPCOMEQB,
30656 IX86_BUILTIN_VPCOMNEB,
30657 IX86_BUILTIN_VPCOMLTB,
30658 IX86_BUILTIN_VPCOMLEB,
30659 IX86_BUILTIN_VPCOMGTB,
30660 IX86_BUILTIN_VPCOMGEB,
30661 IX86_BUILTIN_VPCOMFALSEB,
30662 IX86_BUILTIN_VPCOMTRUEB,
30664 IX86_BUILTIN_VPCOMEQW,
30665 IX86_BUILTIN_VPCOMNEW,
30666 IX86_BUILTIN_VPCOMLTW,
30667 IX86_BUILTIN_VPCOMLEW,
30668 IX86_BUILTIN_VPCOMGTW,
30669 IX86_BUILTIN_VPCOMGEW,
30670 IX86_BUILTIN_VPCOMFALSEW,
30671 IX86_BUILTIN_VPCOMTRUEW,
30673 IX86_BUILTIN_VPCOMEQD,
30674 IX86_BUILTIN_VPCOMNED,
30675 IX86_BUILTIN_VPCOMLTD,
30676 IX86_BUILTIN_VPCOMLED,
30677 IX86_BUILTIN_VPCOMGTD,
30678 IX86_BUILTIN_VPCOMGED,
30679 IX86_BUILTIN_VPCOMFALSED,
30680 IX86_BUILTIN_VPCOMTRUED,
30682 IX86_BUILTIN_VPCOMEQQ,
30683 IX86_BUILTIN_VPCOMNEQ,
30684 IX86_BUILTIN_VPCOMLTQ,
30685 IX86_BUILTIN_VPCOMLEQ,
30686 IX86_BUILTIN_VPCOMGTQ,
30687 IX86_BUILTIN_VPCOMGEQ,
30688 IX86_BUILTIN_VPCOMFALSEQ,
30689 IX86_BUILTIN_VPCOMTRUEQ,
30691 /* LWP instructions. */
30692 IX86_BUILTIN_LLWPCB,
30693 IX86_BUILTIN_SLWPCB,
30694 IX86_BUILTIN_LWPVAL32,
30695 IX86_BUILTIN_LWPVAL64,
30696 IX86_BUILTIN_LWPINS32,
30697 IX86_BUILTIN_LWPINS64,
30702 IX86_BUILTIN_XBEGIN,
30704 IX86_BUILTIN_XABORT,
30705 IX86_BUILTIN_XTEST,
30708 IX86_BUILTIN_BNDMK,
30709 IX86_BUILTIN_BNDSTX,
30710 IX86_BUILTIN_BNDLDX,
30711 IX86_BUILTIN_BNDCL,
30712 IX86_BUILTIN_BNDCU,
30713 IX86_BUILTIN_BNDRET,
30714 IX86_BUILTIN_BNDNARROW,
30715 IX86_BUILTIN_BNDINT,
30716 IX86_BUILTIN_SIZEOF,
30717 IX86_BUILTIN_BNDLOWER,
30718 IX86_BUILTIN_BNDUPPER,
30720 /* BMI instructions. */
30721 IX86_BUILTIN_BEXTR32,
30722 IX86_BUILTIN_BEXTR64,
30725 /* TBM instructions. */
30726 IX86_BUILTIN_BEXTRI32,
30727 IX86_BUILTIN_BEXTRI64,
30729 /* BMI2 instructions. */
30730 IX86_BUILTIN_BZHI32,
30731 IX86_BUILTIN_BZHI64,
30732 IX86_BUILTIN_PDEP32,
30733 IX86_BUILTIN_PDEP64,
30734 IX86_BUILTIN_PEXT32,
30735 IX86_BUILTIN_PEXT64,
30737 /* ADX instructions. */
30738 IX86_BUILTIN_ADDCARRYX32,
30739 IX86_BUILTIN_ADDCARRYX64,
30741 /* SBB instructions. */
30742 IX86_BUILTIN_SBB32,
30743 IX86_BUILTIN_SBB64,
30745 /* FSGSBASE instructions. */
30746 IX86_BUILTIN_RDFSBASE32,
30747 IX86_BUILTIN_RDFSBASE64,
30748 IX86_BUILTIN_RDGSBASE32,
30749 IX86_BUILTIN_RDGSBASE64,
30750 IX86_BUILTIN_WRFSBASE32,
30751 IX86_BUILTIN_WRFSBASE64,
30752 IX86_BUILTIN_WRGSBASE32,
30753 IX86_BUILTIN_WRGSBASE64,
30755 /* RDRND instructions. */
30756 IX86_BUILTIN_RDRAND16_STEP,
30757 IX86_BUILTIN_RDRAND32_STEP,
30758 IX86_BUILTIN_RDRAND64_STEP,
30760 /* RDSEED instructions. */
30761 IX86_BUILTIN_RDSEED16_STEP,
30762 IX86_BUILTIN_RDSEED32_STEP,
30763 IX86_BUILTIN_RDSEED64_STEP,
30765 /* F16C instructions. */
30766 IX86_BUILTIN_CVTPH2PS,
30767 IX86_BUILTIN_CVTPH2PS256,
30768 IX86_BUILTIN_CVTPS2PH,
30769 IX86_BUILTIN_CVTPS2PH256,
30771 /* MONITORX and MWAITX instrucions. */
30772 IX86_BUILTIN_MONITORX,
30773 IX86_BUILTIN_MWAITX,
30775 /* CFString built-in for darwin */
30776 IX86_BUILTIN_CFSTRING,
30778 /* Builtins to get CPU type and supported features. */
30779 IX86_BUILTIN_CPU_INIT,
30780 IX86_BUILTIN_CPU_IS,
30781 IX86_BUILTIN_CPU_SUPPORTS,
30783 /* Read/write FLAGS register built-ins. */
30784 IX86_BUILTIN_READ_FLAGS,
30785 IX86_BUILTIN_WRITE_FLAGS,
30790 /* Table for the ix86 builtin decls. */
30791 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30793 /* Table of all of the builtin functions that are possible with different ISA's
30794 but are waiting to be built until a function is declared to use that
30796 struct builtin_isa {
30797 const char *name; /* function name */
30798 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30799 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30800 bool const_p; /* true if the declaration is constant */
30801 bool leaf_p; /* true if the declaration has leaf attribute */
30802 bool nothrow_p; /* true if the declaration has nothrow attribute */
30803 bool set_and_not_built_p;
30806 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30808 /* Bits that can still enable any inclusion of a builtin. */
30809 static HOST_WIDE_INT deferred_isa_values = 0;
30811 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30812 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30813 function decl in the ix86_builtins array. Returns the function decl or
30814 NULL_TREE, if the builtin was not added.
30816 If the front end has a special hook for builtin functions, delay adding
30817 builtin functions that aren't in the current ISA until the ISA is changed
30818 with function specific optimization. Doing so, can save about 300K for the
30819 default compiler. When the builtin is expanded, check at that time whether
30822 If the front end doesn't have a special hook, record all builtins, even if
30823 it isn't an instruction set in the current ISA in case the user uses
30824 function specific options for a different ISA, so that we don't get scope
30825 errors if a builtin is added in the middle of a function scope. */
30828 def_builtin (HOST_WIDE_INT mask, const char *name,
30829 enum ix86_builtin_func_type tcode,
30830 enum ix86_builtins code)
30832 tree decl = NULL_TREE;
30834 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30836 ix86_builtins_isa[(int) code].isa = mask;
30838 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
30839 where any bit set means that built-in is enable, this bit must be *and-ed*
30840 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
30841 means that *both* cpuid bits must be set for the built-in to be available.
30842 Handle this here. */
30843 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30844 mask &= ~OPTION_MASK_ISA_AVX512VL;
30846 mask &= ~OPTION_MASK_ISA_64BIT;
30848 || (mask & ix86_isa_flags) != 0
30849 || (lang_hooks.builtin_function
30850 == lang_hooks.builtin_function_ext_scope))
30853 tree type = ix86_get_builtin_func_type (tcode);
30854 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30856 ix86_builtins[(int) code] = decl;
30857 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30861 /* Just a MASK where set_and_not_built_p == true can potentially
30862 include a builtin. */
30863 deferred_isa_values |= mask;
30864 ix86_builtins[(int) code] = NULL_TREE;
30865 ix86_builtins_isa[(int) code].tcode = tcode;
30866 ix86_builtins_isa[(int) code].name = name;
30867 ix86_builtins_isa[(int) code].leaf_p = false;
30868 ix86_builtins_isa[(int) code].nothrow_p = false;
30869 ix86_builtins_isa[(int) code].const_p = false;
30870 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30877 /* Like def_builtin, but also marks the function decl "const". */
30880 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30881 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30883 tree decl = def_builtin (mask, name, tcode, code);
30885 TREE_READONLY (decl) = 1;
30887 ix86_builtins_isa[(int) code].const_p = true;
30892 /* Add any new builtin functions for a given ISA that may not have been
30893 declared. This saves a bit of space compared to adding all of the
30894 declarations to the tree, even if we didn't use them. */
30897 ix86_add_new_builtins (HOST_WIDE_INT isa)
30899 if ((isa & deferred_isa_values) == 0)
30902 /* Bits in ISA value can be removed from potential isa values. */
30903 deferred_isa_values &= ~isa;
30906 tree saved_current_target_pragma = current_target_pragma;
30907 current_target_pragma = NULL_TREE;
30909 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30911 if ((ix86_builtins_isa[i].isa & isa) != 0
30912 && ix86_builtins_isa[i].set_and_not_built_p)
30916 /* Don't define the builtin again. */
30917 ix86_builtins_isa[i].set_and_not_built_p = false;
30919 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30920 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30921 type, i, BUILT_IN_MD, NULL,
30924 ix86_builtins[i] = decl;
30925 if (ix86_builtins_isa[i].const_p)
30926 TREE_READONLY (decl) = 1;
30927 if (ix86_builtins_isa[i].leaf_p)
30928 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30930 if (ix86_builtins_isa[i].nothrow_p)
30931 TREE_NOTHROW (decl) = 1;
30935 current_target_pragma = saved_current_target_pragma;
30938 /* Bits for builtin_description.flag. */
30940 /* Set when we don't support the comparison natively, and should
30941 swap_comparison in order to support it. */
30942 #define BUILTIN_DESC_SWAP_OPERANDS 1
30944 struct builtin_description
30946 const HOST_WIDE_INT mask;
30947 const enum insn_code icode;
30948 const char *const name;
30949 const enum ix86_builtins code;
30950 const enum rtx_code comparison;
30954 static const struct builtin_description bdesc_comi[] =
30956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30965 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30982 static const struct builtin_description bdesc_pcmpestr[] =
30985 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30986 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30987 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30988 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30989 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30990 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30994 static const struct builtin_description bdesc_pcmpistr[] =
30997 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30998 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30999 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
31000 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
31001 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
31002 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
31003 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
31006 /* Special builtins with variable number of arguments. */
31007 static const struct builtin_description bdesc_special_args[] =
31009 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
31010 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
31011 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
31013 /* 80387 (for use internally for atomic compound assignment). */
31014 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
31015 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
31016 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
31017 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
31023 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
31025 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
31026 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
31027 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
31028 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31029 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31030 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31031 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31032 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31033 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31035 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31036 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31037 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31038 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31039 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31040 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31041 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31042 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31054 /* SSE or 3DNow!A */
31055 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31056 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31066 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31074 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31080 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31081 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31087 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31088 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31089 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31174 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31175 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31176 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31177 { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31178 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31179 { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31182 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31183 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31184 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31185 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31186 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31187 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31188 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31189 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31192 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31193 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31194 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31197 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31198 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31199 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31200 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31203 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31204 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31239 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31240 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31241 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31242 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31299 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31302 /* Builtins with variable number of arguments. */
31303 static const struct builtin_description bdesc_args[] =
31305 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31306 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31307 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31308 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31309 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31310 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31311 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31395 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31396 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31397 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31400 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31401 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31402 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31403 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31404 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31405 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31410 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31412 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31416 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31419 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31423 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31424 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31425 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31453 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31454 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31458 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31460 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31461 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31463 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31468 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31469 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31473 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31475 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31481 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31482 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31484 /* SSE MMX or 3Dnow!A */
31485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31486 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31489 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31491 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31492 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31494 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31495 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31497 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31518 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31519 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31523 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31525 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31526 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31527 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31528 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31556 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31560 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31562 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31563 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31565 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31568 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31569 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31571 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31573 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31574 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31575 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31576 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31577 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31578 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31579 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31580 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31591 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31592 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31594 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31596 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31597 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31609 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31610 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31614 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31615 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31618 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31619 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31620 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31621 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31627 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31631 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31636 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31641 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31642 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31643 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31644 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31645 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31646 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31649 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31650 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31651 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31652 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31653 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31654 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31656 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31657 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31658 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31659 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31670 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31671 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31674 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31675 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31677 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31678 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31679 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31680 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31681 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31682 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31761 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31762 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31763 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31764 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31766 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31767 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31768 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31769 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31771 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31772 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31774 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31775 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31777 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31778 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31779 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31780 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31782 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31783 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31785 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31786 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31788 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31789 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31790 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31794 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31795 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31796 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31797 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31800 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31801 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31802 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31803 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31806 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31807 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31809 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31810 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31811 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31812 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31815 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31818 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31819 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31822 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31823 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31826 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31832 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31833 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31834 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31835 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31836 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31837 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31838 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31839 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31840 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31841 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31842 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31843 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31866 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31867 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31871 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31873 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31889 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31891 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31893 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31905 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31906 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31919 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31920 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31930 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31931 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31932 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31953 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31954 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31956 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31960 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31961 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31962 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31967 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31968 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31969 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31970 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31976 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31998 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31999 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32000 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32001 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32002 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32003 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32004 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32005 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32006 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32007 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32008 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32009 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
32011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
32012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
32013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
32014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
32015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
32016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
32017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
32018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
32019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
32020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
32021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
32022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
32023 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32025 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32026 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32027 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32028 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32029 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32030 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
32032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
32034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32040 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32041 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32042 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32043 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32044 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32045 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32046 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32047 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32048 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32049 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32051 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32052 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32053 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32054 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32055 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32056 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32057 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32058 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32059 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32060 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32062 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32073 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32106 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32109 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32110 { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32111 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32114 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32115 { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32118 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32119 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32120 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32121 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32124 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32125 { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32126 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32127 { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32128 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32129 { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
32156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
32163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
32164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
32165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
32166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
32167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
32168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
32169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
32170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
32187 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
32188 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32298 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32299 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32300 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32301 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32333 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32338 /* Mask arithmetic operations */
32339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32370 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32373 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32398 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32399 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32400 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32401 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32402 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32403 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32404 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32406 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32407 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32408 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32409 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32410 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32415 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32416 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32417 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32418 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32419 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32420 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32421 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32422 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32423 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32424 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32427 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32428 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32429 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32430 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32452 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32453 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32454 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32455 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32470 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32498 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32499 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32500 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32501 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32502 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32503 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32535 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32536 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32537 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32544 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32560 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32561 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32596 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32597 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32598 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32599 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32624 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32625 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32626 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32627 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32628 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32629 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32630 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32631 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32632 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32633 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32742 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32743 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32744 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32745 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32746 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32747 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32748 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32749 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32756 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32757 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32758 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32759 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32770 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32771 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32772 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32773 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32774 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32775 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32776 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32777 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32834 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32835 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32836 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32837 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32839 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32840 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32850 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32851 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32852 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32853 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32854 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32855 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32856 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32857 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32858 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32859 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32860 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32861 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32862 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32863 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32864 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32865 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32866 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32867 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32868 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32869 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32870 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32871 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32872 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32873 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32874 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32875 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32880 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32881 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32882 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32883 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32888 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32889 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32890 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32891 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32896 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32897 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32904 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32905 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32906 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32907 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32948 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32949 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32950 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32951 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32952 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32953 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32954 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32955 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32956 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32957 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32964 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32965 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32966 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32967 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32975 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32976 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32977 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32978 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32996 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32997 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32998 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32999 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
33000 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33001 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33002 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
33003 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
33004 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
33005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
33006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
33007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
33008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
33009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
33010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
33012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
33014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
33015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
33016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
33017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
33022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
33023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
33024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
33025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
33026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
33027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
33028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
33029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
33030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
33031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
33032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
33033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
33034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
33035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
33036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
33037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33038 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33039 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33040 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
33043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
33048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
33049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
33052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33057 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33058 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33059 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
33061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
33062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
33071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
33074 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
33075 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
33076 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
33077 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
33078 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
33079 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
33080 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
33081 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
33082 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
33083 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
33084 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
33085 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
33086 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33087 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33088 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33089 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33090 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33091 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33092 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33093 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
33094 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33095 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
33096 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
33097 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
33098 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
33099 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
33100 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
33101 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
33102 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
33103 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
33104 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
33107 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
33108 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
33109 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33110 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33111 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33112 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33113 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33114 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
33115 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33116 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33117 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33118 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
33119 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
33120 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
33121 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
33122 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
33123 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33124 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33125 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33126 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33127 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33128 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33129 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33130 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33131 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33132 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33133 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33134 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33135 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33136 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33137 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33138 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33139 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33140 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33141 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33142 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33143 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33144 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33145 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33146 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33147 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33148 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33149 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33150 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33151 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33152 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33153 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33154 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33155 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33156 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33157 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33158 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33159 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33160 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33161 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33162 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33163 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
33164 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
33165 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33166 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33167 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33168 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33169 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33170 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33171 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33172 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33173 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33174 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33175 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33176 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33177 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
33178 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
33179 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
33180 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
33181 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33182 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33183 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33184 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33185 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33186 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33187 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33188 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33189 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33190 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33191 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33192 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33193 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33194 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33195 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33196 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33197 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33200 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33201 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33202 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33203 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33204 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33205 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33206 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33207 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33208 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33209 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33210 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33211 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33214 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33215 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33216 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33217 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33218 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33219 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33220 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33221 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33222 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33223 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33224 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33225 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33226 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33227 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33228 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33231 /* Builtins with rounding support. */
33232 static const struct builtin_description bdesc_round_args[] =
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33254 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33256 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33263 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33265 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33315 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33317 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33319 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33321 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33323 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33325 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33327 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33329 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33359 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33360 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33361 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33362 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33363 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33364 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33365 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33368 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33369 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33370 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33371 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33372 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33373 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33374 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33375 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33376 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33377 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33378 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33379 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33380 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33381 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33382 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33383 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33386 /* Bultins for MPX. */
33387 static const struct builtin_description bdesc_mpx[] =
33389 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33390 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33391 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33394 /* Const builtins for MPX. */
33395 static const struct builtin_description bdesc_mpx_const[] =
33397 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33398 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33399 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33400 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33401 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33402 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33403 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33404 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33407 /* FMA4 and XOP. */
33408 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33409 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33410 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33411 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33412 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33413 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33414 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33415 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33416 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33417 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33418 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33419 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33420 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33421 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33422 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33423 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33424 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33425 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33426 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33427 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33428 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33429 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33430 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33431 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33432 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33433 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33434 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33435 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33436 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33437 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33438 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33439 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33440 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33441 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33442 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33443 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33444 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33445 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33446 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33447 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33448 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33449 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33450 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33451 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33452 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33453 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33454 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33455 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33456 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33457 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33458 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33459 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33461 static const struct builtin_description bdesc_multi_arg[] =
33463 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33464 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33465 UNKNOWN, (int)MULTI_ARG_3_SF },
33466 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33467 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33468 UNKNOWN, (int)MULTI_ARG_3_DF },
33470 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33471 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33472 UNKNOWN, (int)MULTI_ARG_3_SF },
33473 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33474 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33475 UNKNOWN, (int)MULTI_ARG_3_DF },
33477 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33478 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33479 UNKNOWN, (int)MULTI_ARG_3_SF },
33480 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33481 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33482 UNKNOWN, (int)MULTI_ARG_3_DF },
33483 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33484 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33485 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33486 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33487 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33488 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33490 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33491 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33492 UNKNOWN, (int)MULTI_ARG_3_SF },
33493 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33494 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33495 UNKNOWN, (int)MULTI_ARG_3_DF },
33496 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33497 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33498 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33499 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33500 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33501 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33663 /* TM vector builtins. */
33665 /* Reuse the existing x86-specific `struct builtin_description' cause
33666 we're lazy. Add casts to make them fit. */
33667 static const struct builtin_description bdesc_tm[] =
33669 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33670 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33671 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33672 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33673 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33674 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33675 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33677 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33678 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33679 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33680 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33681 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33682 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33683 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33685 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33686 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33687 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33688 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33689 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33690 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33691 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33693 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33694 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33695 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33698 /* TM callbacks. */
33700 /* Return the builtin decl needed to load a vector of TYPE. */
33703 ix86_builtin_tm_load (tree type)
33705 if (TREE_CODE (type) == VECTOR_TYPE)
33707 switch (tree_to_uhwi (TYPE_SIZE (type)))
33710 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33712 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33714 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33720 /* Return the builtin decl needed to store a vector of TYPE. */
33723 ix86_builtin_tm_store (tree type)
33725 if (TREE_CODE (type) == VECTOR_TYPE)
33727 switch (tree_to_uhwi (TYPE_SIZE (type)))
33730 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33732 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33734 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33740 /* Initialize the transactional memory vector load/store builtins. */
33743 ix86_init_tm_builtins (void)
33745 enum ix86_builtin_func_type ftype;
33746 const struct builtin_description *d;
33749 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33750 tree attrs_log, attrs_type_log;
33755 /* If there are no builtins defined, we must be compiling in a
33756 language without trans-mem support. */
33757 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33760 /* Use whatever attributes a normal TM load has. */
33761 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33762 attrs_load = DECL_ATTRIBUTES (decl);
33763 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33764 /* Use whatever attributes a normal TM store has. */
33765 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33766 attrs_store = DECL_ATTRIBUTES (decl);
33767 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33768 /* Use whatever attributes a normal TM log has. */
33769 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33770 attrs_log = DECL_ATTRIBUTES (decl);
33771 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33773 for (i = 0, d = bdesc_tm;
33774 i < ARRAY_SIZE (bdesc_tm);
33777 if ((d->mask & ix86_isa_flags) != 0
33778 || (lang_hooks.builtin_function
33779 == lang_hooks.builtin_function_ext_scope))
33781 tree type, attrs, attrs_type;
33782 enum built_in_function code = (enum built_in_function) d->code;
33784 ftype = (enum ix86_builtin_func_type) d->flag;
33785 type = ix86_get_builtin_func_type (ftype);
33787 if (BUILTIN_TM_LOAD_P (code))
33789 attrs = attrs_load;
33790 attrs_type = attrs_type_load;
33792 else if (BUILTIN_TM_STORE_P (code))
33794 attrs = attrs_store;
33795 attrs_type = attrs_type_store;
33800 attrs_type = attrs_type_log;
33802 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33803 /* The builtin without the prefix for
33804 calling it directly. */
33805 d->name + strlen ("__builtin_"),
33807 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33808 set the TYPE_ATTRIBUTES. */
33809 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33811 set_builtin_decl (code, decl, false);
33816 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33817 in the current target ISA to allow the user to compile particular modules
33818 with different target specific options that differ from the command line
33821 ix86_init_mmx_sse_builtins (void)
33823 const struct builtin_description * d;
33824 enum ix86_builtin_func_type ftype;
33827 /* Add all special builtins with variable number of operands. */
33828 for (i = 0, d = bdesc_special_args;
33829 i < ARRAY_SIZE (bdesc_special_args);
33835 ftype = (enum ix86_builtin_func_type) d->flag;
33836 def_builtin (d->mask, d->name, ftype, d->code);
33839 /* Add all builtins with variable number of operands. */
33840 for (i = 0, d = bdesc_args;
33841 i < ARRAY_SIZE (bdesc_args);
33847 ftype = (enum ix86_builtin_func_type) d->flag;
33848 def_builtin_const (d->mask, d->name, ftype, d->code);
33851 /* Add all builtins with rounding. */
33852 for (i = 0, d = bdesc_round_args;
33853 i < ARRAY_SIZE (bdesc_round_args);
33859 ftype = (enum ix86_builtin_func_type) d->flag;
33860 def_builtin_const (d->mask, d->name, ftype, d->code);
33863 /* pcmpestr[im] insns. */
33864 for (i = 0, d = bdesc_pcmpestr;
33865 i < ARRAY_SIZE (bdesc_pcmpestr);
33868 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33869 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33871 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33872 def_builtin_const (d->mask, d->name, ftype, d->code);
33875 /* pcmpistr[im] insns. */
33876 for (i = 0, d = bdesc_pcmpistr;
33877 i < ARRAY_SIZE (bdesc_pcmpistr);
33880 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33881 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33883 ftype = INT_FTYPE_V16QI_V16QI_INT;
33884 def_builtin_const (d->mask, d->name, ftype, d->code);
33887 /* comi/ucomi insns. */
33888 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33890 if (d->mask == OPTION_MASK_ISA_SSE2)
33891 ftype = INT_FTYPE_V2DF_V2DF;
33893 ftype = INT_FTYPE_V4SF_V4SF;
33894 def_builtin_const (d->mask, d->name, ftype, d->code);
33898 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33899 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33900 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33901 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33903 /* SSE or 3DNow!A */
33904 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33905 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33906 IX86_BUILTIN_MASKMOVQ);
33909 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33910 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33912 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33913 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33914 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33915 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33918 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33919 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33920 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33921 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33924 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33925 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33926 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33927 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33928 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33929 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33930 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33931 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33932 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33933 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33934 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33935 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33938 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33939 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33942 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33943 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33944 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33945 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33946 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33947 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33948 IX86_BUILTIN_RDRAND64_STEP);
33951 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33952 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33953 IX86_BUILTIN_GATHERSIV2DF);
33955 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33956 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33957 IX86_BUILTIN_GATHERSIV4DF);
33959 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33960 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33961 IX86_BUILTIN_GATHERDIV2DF);
33963 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33964 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33965 IX86_BUILTIN_GATHERDIV4DF);
33967 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33968 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33969 IX86_BUILTIN_GATHERSIV4SF);
33971 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33972 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33973 IX86_BUILTIN_GATHERSIV8SF);
33975 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33976 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33977 IX86_BUILTIN_GATHERDIV4SF);
33979 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33980 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33981 IX86_BUILTIN_GATHERDIV8SF);
33983 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33984 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33985 IX86_BUILTIN_GATHERSIV2DI);
33987 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33988 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33989 IX86_BUILTIN_GATHERSIV4DI);
33991 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33992 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33993 IX86_BUILTIN_GATHERDIV2DI);
33995 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33996 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33997 IX86_BUILTIN_GATHERDIV4DI);
33999 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
34000 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
34001 IX86_BUILTIN_GATHERSIV4SI);
34003 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
34004 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
34005 IX86_BUILTIN_GATHERSIV8SI);
34007 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
34008 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
34009 IX86_BUILTIN_GATHERDIV4SI);
34011 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
34012 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
34013 IX86_BUILTIN_GATHERDIV8SI);
34015 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
34016 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
34017 IX86_BUILTIN_GATHERALTSIV4DF);
34019 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
34020 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
34021 IX86_BUILTIN_GATHERALTDIV8SF);
34023 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
34024 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
34025 IX86_BUILTIN_GATHERALTSIV4DI);
34027 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
34028 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
34029 IX86_BUILTIN_GATHERALTDIV8SI);
34032 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
34033 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
34034 IX86_BUILTIN_GATHER3SIV16SF);
34036 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
34037 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
34038 IX86_BUILTIN_GATHER3SIV8DF);
34040 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
34041 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
34042 IX86_BUILTIN_GATHER3DIV16SF);
34044 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
34045 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
34046 IX86_BUILTIN_GATHER3DIV8DF);
34048 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
34049 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34050 IX86_BUILTIN_GATHER3SIV16SI);
34052 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34053 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34054 IX86_BUILTIN_GATHER3SIV8DI);
34056 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34057 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34058 IX86_BUILTIN_GATHER3DIV16SI);
34060 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34061 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34062 IX86_BUILTIN_GATHER3DIV8DI);
34064 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34065 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34066 IX86_BUILTIN_GATHER3ALTSIV8DF);
34068 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34069 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34070 IX86_BUILTIN_GATHER3ALTDIV16SF);
34072 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34073 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34074 IX86_BUILTIN_GATHER3ALTSIV8DI);
34076 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34077 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34078 IX86_BUILTIN_GATHER3ALTDIV16SI);
34080 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34081 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34082 IX86_BUILTIN_SCATTERSIV16SF);
34084 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34085 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34086 IX86_BUILTIN_SCATTERSIV8DF);
34088 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34089 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34090 IX86_BUILTIN_SCATTERDIV16SF);
34092 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34093 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34094 IX86_BUILTIN_SCATTERDIV8DF);
34096 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34097 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34098 IX86_BUILTIN_SCATTERSIV16SI);
34100 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34101 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34102 IX86_BUILTIN_SCATTERSIV8DI);
34104 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34105 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34106 IX86_BUILTIN_SCATTERDIV16SI);
34108 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34109 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34110 IX86_BUILTIN_SCATTERDIV8DI);
34113 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34114 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34115 IX86_BUILTIN_GATHER3SIV2DF);
34117 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34118 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34119 IX86_BUILTIN_GATHER3SIV4DF);
34121 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34122 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34123 IX86_BUILTIN_GATHER3DIV2DF);
34125 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34126 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34127 IX86_BUILTIN_GATHER3DIV4DF);
34129 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34130 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34131 IX86_BUILTIN_GATHER3SIV4SF);
34133 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34134 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34135 IX86_BUILTIN_GATHER3SIV8SF);
34137 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34138 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34139 IX86_BUILTIN_GATHER3DIV4SF);
34141 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34142 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34143 IX86_BUILTIN_GATHER3DIV8SF);
34145 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34146 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34147 IX86_BUILTIN_GATHER3SIV2DI);
34149 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34150 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34151 IX86_BUILTIN_GATHER3SIV4DI);
34153 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34154 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34155 IX86_BUILTIN_GATHER3DIV2DI);
34157 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34158 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34159 IX86_BUILTIN_GATHER3DIV4DI);
34161 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34162 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34163 IX86_BUILTIN_GATHER3SIV4SI);
34165 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34166 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34167 IX86_BUILTIN_GATHER3SIV8SI);
34169 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34170 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34171 IX86_BUILTIN_GATHER3DIV4SI);
34173 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34174 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34175 IX86_BUILTIN_GATHER3DIV8SI);
34177 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34178 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34179 IX86_BUILTIN_GATHER3ALTSIV4DF);
34181 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34182 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34183 IX86_BUILTIN_GATHER3ALTDIV8SF);
34185 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34186 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34187 IX86_BUILTIN_GATHER3ALTSIV4DI);
34189 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34190 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34191 IX86_BUILTIN_GATHER3ALTDIV8SI);
34193 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34194 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34195 IX86_BUILTIN_SCATTERSIV8SF);
34197 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34198 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34199 IX86_BUILTIN_SCATTERSIV4SF);
34201 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34202 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34203 IX86_BUILTIN_SCATTERSIV4DF);
34205 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34206 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34207 IX86_BUILTIN_SCATTERSIV2DF);
34209 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34210 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34211 IX86_BUILTIN_SCATTERDIV8SF);
34213 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34214 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34215 IX86_BUILTIN_SCATTERDIV4SF);
34217 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34218 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34219 IX86_BUILTIN_SCATTERDIV4DF);
34221 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34222 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34223 IX86_BUILTIN_SCATTERDIV2DF);
34225 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34226 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34227 IX86_BUILTIN_SCATTERSIV8SI);
34229 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34230 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34231 IX86_BUILTIN_SCATTERSIV4SI);
34233 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34234 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34235 IX86_BUILTIN_SCATTERSIV4DI);
34237 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34238 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34239 IX86_BUILTIN_SCATTERSIV2DI);
34241 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34242 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34243 IX86_BUILTIN_SCATTERDIV8SI);
34245 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34246 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34247 IX86_BUILTIN_SCATTERDIV4SI);
34249 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34250 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34251 IX86_BUILTIN_SCATTERDIV4DI);
34253 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34254 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34255 IX86_BUILTIN_SCATTERDIV2DI);
34258 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34259 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34260 IX86_BUILTIN_GATHERPFDPD);
34261 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34262 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34263 IX86_BUILTIN_GATHERPFDPS);
34264 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34265 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34266 IX86_BUILTIN_GATHERPFQPD);
34267 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34268 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34269 IX86_BUILTIN_GATHERPFQPS);
34270 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34271 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34272 IX86_BUILTIN_SCATTERPFDPD);
34273 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34274 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34275 IX86_BUILTIN_SCATTERPFDPS);
34276 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34277 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34278 IX86_BUILTIN_SCATTERPFQPD);
34279 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34280 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34281 IX86_BUILTIN_SCATTERPFQPS);
34284 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34285 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34286 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34287 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34288 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34289 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34290 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34291 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34292 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34293 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34294 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34295 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34296 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34297 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34300 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34301 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34303 /* MMX access to the vec_init patterns. */
34304 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34305 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34307 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34308 V4HI_FTYPE_HI_HI_HI_HI,
34309 IX86_BUILTIN_VEC_INIT_V4HI);
34311 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34312 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34313 IX86_BUILTIN_VEC_INIT_V8QI);
34315 /* Access to the vec_extract patterns. */
34316 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34317 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34318 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34319 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34320 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34321 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34322 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34323 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34324 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34325 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34327 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34328 "__builtin_ia32_vec_ext_v4hi",
34329 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34331 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34332 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34334 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34335 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34337 /* Access to the vec_set patterns. */
34338 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34339 "__builtin_ia32_vec_set_v2di",
34340 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34342 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34343 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34345 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34346 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34348 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34349 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34351 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34352 "__builtin_ia32_vec_set_v4hi",
34353 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34355 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34356 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34359 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34360 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34361 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34362 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34363 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34364 "__builtin_ia32_rdseed_di_step",
34365 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34368 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34369 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34370 def_builtin (OPTION_MASK_ISA_64BIT,
34371 "__builtin_ia32_addcarryx_u64",
34372 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34373 IX86_BUILTIN_ADDCARRYX64);
34376 def_builtin (0, "__builtin_ia32_sbb_u32",
34377 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34378 def_builtin (OPTION_MASK_ISA_64BIT,
34379 "__builtin_ia32_sbb_u64",
34380 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34381 IX86_BUILTIN_SBB64);
34383 /* Read/write FLAGS. */
34384 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34385 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34386 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34387 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34388 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34389 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34390 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34391 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34394 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34395 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34398 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34399 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34401 /* MONITORX and MWAITX. */
34402 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34403 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34404 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34405 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34407 /* Add FMA4 multi-arg argument instructions */
34408 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34413 ftype = (enum ix86_builtin_func_type) d->flag;
34414 def_builtin_const (d->mask, d->name, ftype, d->code);
34419 ix86_init_mpx_builtins ()
34421 const struct builtin_description * d;
34422 enum ix86_builtin_func_type ftype;
34426 for (i = 0, d = bdesc_mpx;
34427 i < ARRAY_SIZE (bdesc_mpx);
34433 ftype = (enum ix86_builtin_func_type) d->flag;
34434 decl = def_builtin (d->mask, d->name, ftype, d->code);
34436 /* With no leaf and nothrow flags for MPX builtins
34437 abnormal edges may follow its call when setjmp
34438 presents in the function. Since we may have a lot
34439 of MPX builtins calls it causes lots of useless
34440 edges and enormous PHI nodes. To avoid this we mark
34441 MPX builtins as leaf and nothrow. */
34444 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34446 TREE_NOTHROW (decl) = 1;
34450 ix86_builtins_isa[(int)d->code].leaf_p = true;
34451 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34455 for (i = 0, d = bdesc_mpx_const;
34456 i < ARRAY_SIZE (bdesc_mpx_const);
34462 ftype = (enum ix86_builtin_func_type) d->flag;
34463 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34467 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34469 TREE_NOTHROW (decl) = 1;
34473 ix86_builtins_isa[(int)d->code].leaf_p = true;
34474 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34479 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34480 to return a pointer to VERSION_DECL if the outcome of the expression
34481 formed by PREDICATE_CHAIN is true. This function will be called during
34482 version dispatch to decide which function version to execute. It returns
34483 the basic block at the end, to which more conditions can be added. */
34486 add_condition_to_bb (tree function_decl, tree version_decl,
34487 tree predicate_chain, basic_block new_bb)
34489 gimple return_stmt;
34490 tree convert_expr, result_var;
34491 gimple convert_stmt;
34492 gimple call_cond_stmt;
34493 gimple if_else_stmt;
34495 basic_block bb1, bb2, bb3;
34498 tree cond_var, and_expr_var = NULL_TREE;
34501 tree predicate_decl, predicate_arg;
34503 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34505 gcc_assert (new_bb != NULL);
34506 gseq = bb_seq (new_bb);
34509 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34510 build_fold_addr_expr (version_decl));
34511 result_var = create_tmp_var (ptr_type_node);
34512 convert_stmt = gimple_build_assign (result_var, convert_expr);
34513 return_stmt = gimple_build_return (result_var);
34515 if (predicate_chain == NULL_TREE)
34517 gimple_seq_add_stmt (&gseq, convert_stmt);
34518 gimple_seq_add_stmt (&gseq, return_stmt);
34519 set_bb_seq (new_bb, gseq);
34520 gimple_set_bb (convert_stmt, new_bb);
34521 gimple_set_bb (return_stmt, new_bb);
34526 while (predicate_chain != NULL)
34528 cond_var = create_tmp_var (integer_type_node);
34529 predicate_decl = TREE_PURPOSE (predicate_chain);
34530 predicate_arg = TREE_VALUE (predicate_chain);
34531 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34532 gimple_call_set_lhs (call_cond_stmt, cond_var);
34534 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34535 gimple_set_bb (call_cond_stmt, new_bb);
34536 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34538 predicate_chain = TREE_CHAIN (predicate_chain);
34540 if (and_expr_var == NULL)
34541 and_expr_var = cond_var;
34544 gimple assign_stmt;
34545 /* Use MIN_EXPR to check if any integer is zero?.
34546 and_expr_var = min_expr <cond_var, and_expr_var> */
34547 assign_stmt = gimple_build_assign (and_expr_var,
34548 build2 (MIN_EXPR, integer_type_node,
34549 cond_var, and_expr_var));
34551 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34552 gimple_set_bb (assign_stmt, new_bb);
34553 gimple_seq_add_stmt (&gseq, assign_stmt);
34557 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34559 NULL_TREE, NULL_TREE);
34560 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34561 gimple_set_bb (if_else_stmt, new_bb);
34562 gimple_seq_add_stmt (&gseq, if_else_stmt);
34564 gimple_seq_add_stmt (&gseq, convert_stmt);
34565 gimple_seq_add_stmt (&gseq, return_stmt);
34566 set_bb_seq (new_bb, gseq);
34569 e12 = split_block (bb1, if_else_stmt);
34571 e12->flags &= ~EDGE_FALLTHRU;
34572 e12->flags |= EDGE_TRUE_VALUE;
34574 e23 = split_block (bb2, return_stmt);
34576 gimple_set_bb (convert_stmt, bb2);
34577 gimple_set_bb (return_stmt, bb2);
34580 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34583 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34590 /* This parses the attribute arguments to target in DECL and determines
34591 the right builtin to use to match the platform specification.
34592 It returns the priority value for this version decl. If PREDICATE_LIST
34593 is not NULL, it stores the list of cpu features that need to be checked
34594 before dispatching this function. */
34596 static unsigned int
34597 get_builtin_code_for_version (tree decl, tree *predicate_list)
34600 struct cl_target_option cur_target;
34602 struct cl_target_option *new_target;
34603 const char *arg_str = NULL;
34604 const char *attrs_str = NULL;
34605 char *tok_str = NULL;
34608 /* Priority of i386 features, greater value is higher priority. This is
34609 used to decide the order in which function dispatch must happen. For
34610 instance, a version specialized for SSE4.2 should be checked for dispatch
34611 before a version for SSE3, as SSE4.2 implies SSE3. */
34612 enum feature_priority
34643 enum feature_priority priority = P_ZERO;
34645 /* These are the target attribute strings for which a dispatcher is
34646 available, from fold_builtin_cpu. */
34648 static struct _feature_list
34650 const char *const name;
34651 const enum feature_priority priority;
34653 const feature_list[] =
34659 {"sse4a", P_SSE4_A},
34660 {"ssse3", P_SSSE3},
34661 {"sse4.1", P_SSE4_1},
34662 {"sse4.2", P_SSE4_2},
34663 {"popcnt", P_POPCNT},
34671 {"avx512f", P_AVX512F}
34675 static unsigned int NUM_FEATURES
34676 = sizeof (feature_list) / sizeof (struct _feature_list);
34680 tree predicate_chain = NULL_TREE;
34681 tree predicate_decl, predicate_arg;
34683 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34684 gcc_assert (attrs != NULL);
34686 attrs = TREE_VALUE (TREE_VALUE (attrs));
34688 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34689 attrs_str = TREE_STRING_POINTER (attrs);
34691 /* Return priority zero for default function. */
34692 if (strcmp (attrs_str, "default") == 0)
34695 /* Handle arch= if specified. For priority, set it to be 1 more than
34696 the best instruction set the processor can handle. For instance, if
34697 there is a version for atom and a version for ssse3 (the highest ISA
34698 priority for atom), the atom version must be checked for dispatch
34699 before the ssse3 version. */
34700 if (strstr (attrs_str, "arch=") != NULL)
34702 cl_target_option_save (&cur_target, &global_options);
34703 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34704 &global_options_set);
34706 gcc_assert (target_node);
34707 new_target = TREE_TARGET_OPTION (target_node);
34708 gcc_assert (new_target);
34710 if (new_target->arch_specified && new_target->arch > 0)
34712 switch (new_target->arch)
34714 case PROCESSOR_CORE2:
34716 priority = P_PROC_SSSE3;
34718 case PROCESSOR_NEHALEM:
34719 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34720 arg_str = "westmere";
34722 /* We translate "arch=corei7" and "arch=nehalem" to
34723 "corei7" so that it will be mapped to M_INTEL_COREI7
34724 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34725 arg_str = "corei7";
34726 priority = P_PROC_SSE4_2;
34728 case PROCESSOR_SANDYBRIDGE:
34729 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34730 arg_str = "ivybridge";
34732 arg_str = "sandybridge";
34733 priority = P_PROC_AVX;
34735 case PROCESSOR_HASWELL:
34736 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34737 arg_str = "broadwell";
34739 arg_str = "haswell";
34740 priority = P_PROC_AVX2;
34742 case PROCESSOR_BONNELL:
34743 arg_str = "bonnell";
34744 priority = P_PROC_SSSE3;
34746 case PROCESSOR_KNL:
34748 priority = P_PROC_AVX512F;
34750 case PROCESSOR_SILVERMONT:
34751 arg_str = "silvermont";
34752 priority = P_PROC_SSE4_2;
34754 case PROCESSOR_AMDFAM10:
34755 arg_str = "amdfam10h";
34756 priority = P_PROC_SSE4_A;
34758 case PROCESSOR_BTVER1:
34759 arg_str = "btver1";
34760 priority = P_PROC_SSE4_A;
34762 case PROCESSOR_BTVER2:
34763 arg_str = "btver2";
34764 priority = P_PROC_BMI;
34766 case PROCESSOR_BDVER1:
34767 arg_str = "bdver1";
34768 priority = P_PROC_XOP;
34770 case PROCESSOR_BDVER2:
34771 arg_str = "bdver2";
34772 priority = P_PROC_FMA;
34774 case PROCESSOR_BDVER3:
34775 arg_str = "bdver3";
34776 priority = P_PROC_FMA;
34778 case PROCESSOR_BDVER4:
34779 arg_str = "bdver4";
34780 priority = P_PROC_AVX2;
34785 cl_target_option_restore (&global_options, &cur_target);
34787 if (predicate_list && arg_str == NULL)
34789 error_at (DECL_SOURCE_LOCATION (decl),
34790 "No dispatcher found for the versioning attributes");
34794 if (predicate_list)
34796 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34797 /* For a C string literal the length includes the trailing NULL. */
34798 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34799 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34804 /* Process feature name. */
34805 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34806 strcpy (tok_str, attrs_str);
34807 token = strtok (tok_str, ",");
34808 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34810 while (token != NULL)
34812 /* Do not process "arch=" */
34813 if (strncmp (token, "arch=", 5) == 0)
34815 token = strtok (NULL, ",");
34818 for (i = 0; i < NUM_FEATURES; ++i)
34820 if (strcmp (token, feature_list[i].name) == 0)
34822 if (predicate_list)
34824 predicate_arg = build_string_literal (
34825 strlen (feature_list[i].name) + 1,
34826 feature_list[i].name);
34827 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34830 /* Find the maximum priority feature. */
34831 if (feature_list[i].priority > priority)
34832 priority = feature_list[i].priority;
34837 if (predicate_list && i == NUM_FEATURES)
34839 error_at (DECL_SOURCE_LOCATION (decl),
34840 "No dispatcher found for %s", token);
34843 token = strtok (NULL, ",");
34847 if (predicate_list && predicate_chain == NULL_TREE)
34849 error_at (DECL_SOURCE_LOCATION (decl),
34850 "No dispatcher found for the versioning attributes : %s",
34854 else if (predicate_list)
34856 predicate_chain = nreverse (predicate_chain);
34857 *predicate_list = predicate_chain;
34863 /* This compares the priority of target features in function DECL1
34864 and DECL2. It returns positive value if DECL1 is higher priority,
34865 negative value if DECL2 is higher priority and 0 if they are the
34869 ix86_compare_version_priority (tree decl1, tree decl2)
34871 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34872 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34874 return (int)priority1 - (int)priority2;
34877 /* V1 and V2 point to function versions with different priorities
34878 based on the target ISA. This function compares their priorities. */
34881 feature_compare (const void *v1, const void *v2)
34883 typedef struct _function_version_info
34886 tree predicate_chain;
34887 unsigned int dispatch_priority;
34888 } function_version_info;
34890 const function_version_info c1 = *(const function_version_info *)v1;
34891 const function_version_info c2 = *(const function_version_info *)v2;
34892 return (c2.dispatch_priority - c1.dispatch_priority);
34895 /* This function generates the dispatch function for
34896 multi-versioned functions. DISPATCH_DECL is the function which will
34897 contain the dispatch logic. FNDECLS are the function choices for
34898 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34899 in DISPATCH_DECL in which the dispatch code is generated. */
34902 dispatch_function_versions (tree dispatch_decl,
34904 basic_block *empty_bb)
34907 gimple ifunc_cpu_init_stmt;
34911 vec<tree> *fndecls;
34912 unsigned int num_versions = 0;
34913 unsigned int actual_versions = 0;
34916 struct _function_version_info
34919 tree predicate_chain;
34920 unsigned int dispatch_priority;
34921 }*function_version_info;
34923 gcc_assert (dispatch_decl != NULL
34924 && fndecls_p != NULL
34925 && empty_bb != NULL);
34927 /*fndecls_p is actually a vector. */
34928 fndecls = static_cast<vec<tree> *> (fndecls_p);
34930 /* At least one more version other than the default. */
34931 num_versions = fndecls->length ();
34932 gcc_assert (num_versions >= 2);
34934 function_version_info = (struct _function_version_info *)
34935 XNEWVEC (struct _function_version_info, (num_versions - 1));
34937 /* The first version in the vector is the default decl. */
34938 default_decl = (*fndecls)[0];
34940 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34942 gseq = bb_seq (*empty_bb);
34943 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34944 constructors, so explicity call __builtin_cpu_init here. */
34945 ifunc_cpu_init_stmt = gimple_build_call_vec (
34946 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34947 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34948 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34949 set_bb_seq (*empty_bb, gseq);
34954 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34956 tree version_decl = ele;
34957 tree predicate_chain = NULL_TREE;
34958 unsigned int priority;
34959 /* Get attribute string, parse it and find the right predicate decl.
34960 The predicate function could be a lengthy combination of many
34961 features, like arch-type and various isa-variants. */
34962 priority = get_builtin_code_for_version (version_decl,
34965 if (predicate_chain == NULL_TREE)
34968 function_version_info [actual_versions].version_decl = version_decl;
34969 function_version_info [actual_versions].predicate_chain
34971 function_version_info [actual_versions].dispatch_priority = priority;
34975 /* Sort the versions according to descending order of dispatch priority. The
34976 priority is based on the ISA. This is not a perfect solution. There
34977 could still be ambiguity. If more than one function version is suitable
34978 to execute, which one should be dispatched? In future, allow the user
34979 to specify a dispatch priority next to the version. */
34980 qsort (function_version_info, actual_versions,
34981 sizeof (struct _function_version_info), feature_compare);
34983 for (i = 0; i < actual_versions; ++i)
34984 *empty_bb = add_condition_to_bb (dispatch_decl,
34985 function_version_info[i].version_decl,
34986 function_version_info[i].predicate_chain,
34989 /* dispatch default version at the end. */
34990 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34993 free (function_version_info);
34997 /* Comparator function to be used in qsort routine to sort attribute
34998 specification strings to "target". */
35001 attr_strcmp (const void *v1, const void *v2)
35003 const char *c1 = *(char *const*)v1;
35004 const char *c2 = *(char *const*)v2;
35005 return strcmp (c1, c2);
35008 /* ARGLIST is the argument to target attribute. This function tokenizes
35009 the comma separated arguments, sorts them and returns a string which
35010 is a unique identifier for the comma separated arguments. It also
35011 replaces non-identifier characters "=,-" with "_". */
35014 sorted_attr_string (tree arglist)
35017 size_t str_len_sum = 0;
35018 char **args = NULL;
35019 char *attr_str, *ret_str;
35021 unsigned int argnum = 1;
35024 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35026 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35027 size_t len = strlen (str);
35028 str_len_sum += len + 1;
35029 if (arg != arglist)
35031 for (i = 0; i < strlen (str); i++)
35036 attr_str = XNEWVEC (char, str_len_sum);
35038 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35040 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35041 size_t len = strlen (str);
35042 memcpy (attr_str + str_len_sum, str, len);
35043 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
35044 str_len_sum += len + 1;
35047 /* Replace "=,-" with "_". */
35048 for (i = 0; i < strlen (attr_str); i++)
35049 if (attr_str[i] == '=' || attr_str[i]== '-')
35055 args = XNEWVEC (char *, argnum);
35058 attr = strtok (attr_str, ",");
35059 while (attr != NULL)
35063 attr = strtok (NULL, ",");
35066 qsort (args, argnum, sizeof (char *), attr_strcmp);
35068 ret_str = XNEWVEC (char, str_len_sum);
35070 for (i = 0; i < argnum; i++)
35072 size_t len = strlen (args[i]);
35073 memcpy (ret_str + str_len_sum, args[i], len);
35074 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35075 str_len_sum += len + 1;
35079 XDELETEVEC (attr_str);
35083 /* This function changes the assembler name for functions that are
35084 versions. If DECL is a function version and has a "target"
35085 attribute, it appends the attribute string to its assembler name. */
35088 ix86_mangle_function_version_assembler_name (tree decl, tree id)
35091 const char *orig_name, *version_string;
35092 char *attr_str, *assembler_name;
35094 if (DECL_DECLARED_INLINE_P (decl)
35095 && lookup_attribute ("gnu_inline",
35096 DECL_ATTRIBUTES (decl)))
35097 error_at (DECL_SOURCE_LOCATION (decl),
35098 "Function versions cannot be marked as gnu_inline,"
35099 " bodies have to be generated");
35101 if (DECL_VIRTUAL_P (decl)
35102 || DECL_VINDEX (decl))
35103 sorry ("Virtual function multiversioning not supported");
35105 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35107 /* target attribute string cannot be NULL. */
35108 gcc_assert (version_attr != NULL_TREE);
35110 orig_name = IDENTIFIER_POINTER (id);
35112 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35114 if (strcmp (version_string, "default") == 0)
35117 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35118 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35120 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35122 /* Allow assembler name to be modified if already set. */
35123 if (DECL_ASSEMBLER_NAME_SET_P (decl))
35124 SET_DECL_RTL (decl, NULL);
35126 tree ret = get_identifier (assembler_name);
35127 XDELETEVEC (attr_str);
35128 XDELETEVEC (assembler_name);
35132 /* This function returns true if FN1 and FN2 are versions of the same function,
35133 that is, the target strings of the function decls are different. This assumes
35134 that FN1 and FN2 have the same signature. */
35137 ix86_function_versions (tree fn1, tree fn2)
35140 char *target1, *target2;
35143 if (TREE_CODE (fn1) != FUNCTION_DECL
35144 || TREE_CODE (fn2) != FUNCTION_DECL)
35147 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35148 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35150 /* At least one function decl should have the target attribute specified. */
35151 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35154 /* Diagnose missing target attribute if one of the decls is already
35155 multi-versioned. */
35156 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35158 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35160 if (attr2 != NULL_TREE)
35167 error_at (DECL_SOURCE_LOCATION (fn2),
35168 "missing %<target%> attribute for multi-versioned %D",
35170 inform (DECL_SOURCE_LOCATION (fn1),
35171 "previous declaration of %D", fn1);
35172 /* Prevent diagnosing of the same error multiple times. */
35173 DECL_ATTRIBUTES (fn2)
35174 = tree_cons (get_identifier ("target"),
35175 copy_node (TREE_VALUE (attr1)),
35176 DECL_ATTRIBUTES (fn2));
35181 target1 = sorted_attr_string (TREE_VALUE (attr1));
35182 target2 = sorted_attr_string (TREE_VALUE (attr2));
35184 /* The sorted target strings must be different for fn1 and fn2
35186 if (strcmp (target1, target2) == 0)
35191 XDELETEVEC (target1);
35192 XDELETEVEC (target2);
35198 ix86_mangle_decl_assembler_name (tree decl, tree id)
35200 /* For function version, add the target suffix to the assembler name. */
35201 if (TREE_CODE (decl) == FUNCTION_DECL
35202 && DECL_FUNCTION_VERSIONED (decl))
35203 id = ix86_mangle_function_version_assembler_name (decl, id);
35204 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35205 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35211 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35212 is true, append the full path name of the source file. */
35215 make_name (tree decl, const char *suffix, bool make_unique)
35217 char *global_var_name;
35220 const char *unique_name = NULL;
35222 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35224 /* Get a unique name that can be used globally without any chances
35225 of collision at link time. */
35227 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35229 name_len = strlen (name) + strlen (suffix) + 2;
35232 name_len += strlen (unique_name) + 1;
35233 global_var_name = XNEWVEC (char, name_len);
35235 /* Use '.' to concatenate names as it is demangler friendly. */
35237 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35240 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35242 return global_var_name;
35245 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35247 /* Make a dispatcher declaration for the multi-versioned function DECL.
35248 Calls to DECL function will be replaced with calls to the dispatcher
35249 by the front-end. Return the decl created. */
35252 make_dispatcher_decl (const tree decl)
35256 tree fn_type, func_type;
35257 bool is_uniq = false;
35259 if (TREE_PUBLIC (decl) == 0)
35262 func_name = make_name (decl, "ifunc", is_uniq);
35264 fn_type = TREE_TYPE (decl);
35265 func_type = build_function_type (TREE_TYPE (fn_type),
35266 TYPE_ARG_TYPES (fn_type));
35268 func_decl = build_fn_decl (func_name, func_type);
35269 XDELETEVEC (func_name);
35270 TREE_USED (func_decl) = 1;
35271 DECL_CONTEXT (func_decl) = NULL_TREE;
35272 DECL_INITIAL (func_decl) = error_mark_node;
35273 DECL_ARTIFICIAL (func_decl) = 1;
35274 /* Mark this func as external, the resolver will flip it again if
35275 it gets generated. */
35276 DECL_EXTERNAL (func_decl) = 1;
35277 /* This will be of type IFUNCs have to be externally visible. */
35278 TREE_PUBLIC (func_decl) = 1;
35285 /* Returns true if decl is multi-versioned and DECL is the default function,
35286 that is it is not tagged with target specific optimization. */
35289 is_function_default_version (const tree decl)
35291 if (TREE_CODE (decl) != FUNCTION_DECL
35292 || !DECL_FUNCTION_VERSIONED (decl))
35294 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35296 attr = TREE_VALUE (TREE_VALUE (attr));
35297 return (TREE_CODE (attr) == STRING_CST
35298 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35301 /* Make a dispatcher declaration for the multi-versioned function DECL.
35302 Calls to DECL function will be replaced with calls to the dispatcher
35303 by the front-end. Returns the decl of the dispatcher function. */
35306 ix86_get_function_versions_dispatcher (void *decl)
35308 tree fn = (tree) decl;
35309 struct cgraph_node *node = NULL;
35310 struct cgraph_node *default_node = NULL;
35311 struct cgraph_function_version_info *node_v = NULL;
35312 struct cgraph_function_version_info *first_v = NULL;
35314 tree dispatch_decl = NULL;
35316 struct cgraph_function_version_info *default_version_info = NULL;
35318 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35320 node = cgraph_node::get (fn);
35321 gcc_assert (node != NULL);
35323 node_v = node->function_version ();
35324 gcc_assert (node_v != NULL);
35326 if (node_v->dispatcher_resolver != NULL)
35327 return node_v->dispatcher_resolver;
35329 /* Find the default version and make it the first node. */
35331 /* Go to the beginning of the chain. */
35332 while (first_v->prev != NULL)
35333 first_v = first_v->prev;
35334 default_version_info = first_v;
35335 while (default_version_info != NULL)
35337 if (is_function_default_version
35338 (default_version_info->this_node->decl))
35340 default_version_info = default_version_info->next;
35343 /* If there is no default node, just return NULL. */
35344 if (default_version_info == NULL)
35347 /* Make default info the first node. */
35348 if (first_v != default_version_info)
35350 default_version_info->prev->next = default_version_info->next;
35351 if (default_version_info->next)
35352 default_version_info->next->prev = default_version_info->prev;
35353 first_v->prev = default_version_info;
35354 default_version_info->next = first_v;
35355 default_version_info->prev = NULL;
35358 default_node = default_version_info->this_node;
35360 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35361 if (targetm.has_ifunc_p ())
35363 struct cgraph_function_version_info *it_v = NULL;
35364 struct cgraph_node *dispatcher_node = NULL;
35365 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35367 /* Right now, the dispatching is done via ifunc. */
35368 dispatch_decl = make_dispatcher_decl (default_node->decl);
35370 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35371 gcc_assert (dispatcher_node != NULL);
35372 dispatcher_node->dispatcher_function = 1;
35373 dispatcher_version_info
35374 = dispatcher_node->insert_new_function_version ();
35375 dispatcher_version_info->next = default_version_info;
35376 dispatcher_node->definition = 1;
35378 /* Set the dispatcher for all the versions. */
35379 it_v = default_version_info;
35380 while (it_v != NULL)
35382 it_v->dispatcher_resolver = dispatch_decl;
35389 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35390 "multiversioning needs ifunc which is not supported "
35394 return dispatch_decl;
35397 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35401 make_attribute (const char *name, const char *arg_name, tree chain)
35404 tree attr_arg_name;
35408 attr_name = get_identifier (name);
35409 attr_arg_name = build_string (strlen (arg_name), arg_name);
35410 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35411 attr = tree_cons (attr_name, attr_args, chain);
35415 /* Make the resolver function decl to dispatch the versions of
35416 a multi-versioned function, DEFAULT_DECL. Create an
35417 empty basic block in the resolver and store the pointer in
35418 EMPTY_BB. Return the decl of the resolver function. */
35421 make_resolver_func (const tree default_decl,
35422 const tree dispatch_decl,
35423 basic_block *empty_bb)
35425 char *resolver_name;
35426 tree decl, type, decl_name, t;
35427 bool is_uniq = false;
35429 /* IFUNC's have to be globally visible. So, if the default_decl is
35430 not, then the name of the IFUNC should be made unique. */
35431 if (TREE_PUBLIC (default_decl) == 0)
35434 /* Append the filename to the resolver function if the versions are
35435 not externally visible. This is because the resolver function has
35436 to be externally visible for the loader to find it. So, appending
35437 the filename will prevent conflicts with a resolver function from
35438 another module which is based on the same version name. */
35439 resolver_name = make_name (default_decl, "resolver", is_uniq);
35441 /* The resolver function should return a (void *). */
35442 type = build_function_type_list (ptr_type_node, NULL_TREE);
35444 decl = build_fn_decl (resolver_name, type);
35445 decl_name = get_identifier (resolver_name);
35446 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35448 DECL_NAME (decl) = decl_name;
35449 TREE_USED (decl) = 1;
35450 DECL_ARTIFICIAL (decl) = 1;
35451 DECL_IGNORED_P (decl) = 0;
35452 /* IFUNC resolvers have to be externally visible. */
35453 TREE_PUBLIC (decl) = 1;
35454 DECL_UNINLINABLE (decl) = 1;
35456 /* Resolver is not external, body is generated. */
35457 DECL_EXTERNAL (decl) = 0;
35458 DECL_EXTERNAL (dispatch_decl) = 0;
35460 DECL_CONTEXT (decl) = NULL_TREE;
35461 DECL_INITIAL (decl) = make_node (BLOCK);
35462 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35464 if (DECL_COMDAT_GROUP (default_decl)
35465 || TREE_PUBLIC (default_decl))
35467 /* In this case, each translation unit with a call to this
35468 versioned function will put out a resolver. Ensure it
35469 is comdat to keep just one copy. */
35470 DECL_COMDAT (decl) = 1;
35471 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35473 /* Build result decl and add to function_decl. */
35474 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35475 DECL_ARTIFICIAL (t) = 1;
35476 DECL_IGNORED_P (t) = 1;
35477 DECL_RESULT (decl) = t;
35479 gimplify_function_tree (decl);
35480 push_cfun (DECL_STRUCT_FUNCTION (decl));
35481 *empty_bb = init_lowered_empty_function (decl, false, 0);
35483 cgraph_node::add_new_function (decl, true);
35484 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35488 gcc_assert (dispatch_decl != NULL);
35489 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35490 DECL_ATTRIBUTES (dispatch_decl)
35491 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35493 /* Create the alias for dispatch to resolver here. */
35494 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35495 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35496 XDELETEVEC (resolver_name);
35500 /* Generate the dispatching code body to dispatch multi-versioned function
35501 DECL. The target hook is called to process the "target" attributes and
35502 provide the code to dispatch the right function at run-time. NODE points
35503 to the dispatcher decl whose body will be created. */
35506 ix86_generate_version_dispatcher_body (void *node_p)
35508 tree resolver_decl;
35509 basic_block empty_bb;
35510 tree default_ver_decl;
35511 struct cgraph_node *versn;
35512 struct cgraph_node *node;
35514 struct cgraph_function_version_info *node_version_info = NULL;
35515 struct cgraph_function_version_info *versn_info = NULL;
35517 node = (cgraph_node *)node_p;
35519 node_version_info = node->function_version ();
35520 gcc_assert (node->dispatcher_function
35521 && node_version_info != NULL);
35523 if (node_version_info->dispatcher_resolver)
35524 return node_version_info->dispatcher_resolver;
35526 /* The first version in the chain corresponds to the default version. */
35527 default_ver_decl = node_version_info->next->this_node->decl;
35529 /* node is going to be an alias, so remove the finalized bit. */
35530 node->definition = false;
35532 resolver_decl = make_resolver_func (default_ver_decl,
35533 node->decl, &empty_bb);
35535 node_version_info->dispatcher_resolver = resolver_decl;
35537 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35539 auto_vec<tree, 2> fn_ver_vec;
35541 for (versn_info = node_version_info->next; versn_info;
35542 versn_info = versn_info->next)
35544 versn = versn_info->this_node;
35545 /* Check for virtual functions here again, as by this time it should
35546 have been determined if this function needs a vtable index or
35547 not. This happens for methods in derived classes that override
35548 virtual methods in base classes but are not explicitly marked as
35550 if (DECL_VINDEX (versn->decl))
35551 sorry ("Virtual function multiversioning not supported");
35553 fn_ver_vec.safe_push (versn->decl);
35556 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35557 cgraph_edge::rebuild_edges ();
35559 return resolver_decl;
35561 /* This builds the processor_model struct type defined in
35562 libgcc/config/i386/cpuinfo.c */
35565 build_processor_model_struct (void)
35567 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35569 tree field = NULL_TREE, field_chain = NULL_TREE;
35571 tree type = make_node (RECORD_TYPE);
35573 /* The first 3 fields are unsigned int. */
35574 for (i = 0; i < 3; ++i)
35576 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35577 get_identifier (field_name[i]), unsigned_type_node);
35578 if (field_chain != NULL_TREE)
35579 DECL_CHAIN (field) = field_chain;
35580 field_chain = field;
35583 /* The last field is an array of unsigned integers of size one. */
35584 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35585 get_identifier (field_name[3]),
35586 build_array_type (unsigned_type_node,
35587 build_index_type (size_one_node)));
35588 if (field_chain != NULL_TREE)
35589 DECL_CHAIN (field) = field_chain;
35590 field_chain = field;
35592 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35596 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35599 make_var_decl (tree type, const char *name)
35603 new_decl = build_decl (UNKNOWN_LOCATION,
35605 get_identifier(name),
35608 DECL_EXTERNAL (new_decl) = 1;
35609 TREE_STATIC (new_decl) = 1;
35610 TREE_PUBLIC (new_decl) = 1;
35611 DECL_INITIAL (new_decl) = 0;
35612 DECL_ARTIFICIAL (new_decl) = 0;
35613 DECL_PRESERVE_P (new_decl) = 1;
35615 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35616 assemble_variable (new_decl, 0, 0, 0);
35621 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35622 into an integer defined in libgcc/config/i386/cpuinfo.c */
35625 fold_builtin_cpu (tree fndecl, tree *args)
35628 enum ix86_builtins fn_code = (enum ix86_builtins)
35629 DECL_FUNCTION_CODE (fndecl);
35630 tree param_string_cst = NULL;
35632 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35633 enum processor_features
35656 /* These are the values for vendor types and cpu types and subtypes
35657 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35658 the corresponding start value. */
35659 enum processor_model
35669 M_INTEL_SILVERMONT,
35673 M_CPU_SUBTYPE_START,
35674 M_INTEL_COREI7_NEHALEM,
35675 M_INTEL_COREI7_WESTMERE,
35676 M_INTEL_COREI7_SANDYBRIDGE,
35677 M_AMDFAM10H_BARCELONA,
35678 M_AMDFAM10H_SHANGHAI,
35679 M_AMDFAM10H_ISTANBUL,
35680 M_AMDFAM15H_BDVER1,
35681 M_AMDFAM15H_BDVER2,
35682 M_AMDFAM15H_BDVER3,
35683 M_AMDFAM15H_BDVER4,
35684 M_INTEL_COREI7_IVYBRIDGE,
35685 M_INTEL_COREI7_HASWELL,
35686 M_INTEL_COREI7_BROADWELL
35689 static struct _arch_names_table
35691 const char *const name;
35692 const enum processor_model model;
35694 const arch_names_table[] =
35697 {"intel", M_INTEL},
35698 {"atom", M_INTEL_BONNELL},
35699 {"slm", M_INTEL_SILVERMONT},
35700 {"core2", M_INTEL_CORE2},
35701 {"corei7", M_INTEL_COREI7},
35702 {"nehalem", M_INTEL_COREI7_NEHALEM},
35703 {"westmere", M_INTEL_COREI7_WESTMERE},
35704 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35705 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35706 {"haswell", M_INTEL_COREI7_HASWELL},
35707 {"broadwell", M_INTEL_COREI7_BROADWELL},
35708 {"bonnell", M_INTEL_BONNELL},
35709 {"silvermont", M_INTEL_SILVERMONT},
35710 {"knl", M_INTEL_KNL},
35711 {"amdfam10h", M_AMDFAM10H},
35712 {"barcelona", M_AMDFAM10H_BARCELONA},
35713 {"shanghai", M_AMDFAM10H_SHANGHAI},
35714 {"istanbul", M_AMDFAM10H_ISTANBUL},
35715 {"btver1", M_AMD_BTVER1},
35716 {"amdfam15h", M_AMDFAM15H},
35717 {"bdver1", M_AMDFAM15H_BDVER1},
35718 {"bdver2", M_AMDFAM15H_BDVER2},
35719 {"bdver3", M_AMDFAM15H_BDVER3},
35720 {"bdver4", M_AMDFAM15H_BDVER4},
35721 {"btver2", M_AMD_BTVER2},
35724 static struct _isa_names_table
35726 const char *const name;
35727 const enum processor_features feature;
35729 const isa_names_table[] =
35733 {"popcnt", F_POPCNT},
35737 {"ssse3", F_SSSE3},
35738 {"sse4a", F_SSE4_A},
35739 {"sse4.1", F_SSE4_1},
35740 {"sse4.2", F_SSE4_2},
35746 {"avx512f",F_AVX512F},
35751 tree __processor_model_type = build_processor_model_struct ();
35752 tree __cpu_model_var = make_var_decl (__processor_model_type,
35756 varpool_node::add (__cpu_model_var);
35758 gcc_assert ((args != NULL) && (*args != NULL));
35760 param_string_cst = *args;
35761 while (param_string_cst
35762 && TREE_CODE (param_string_cst) != STRING_CST)
35764 /* *args must be a expr that can contain other EXPRS leading to a
35766 if (!EXPR_P (param_string_cst))
35768 error ("Parameter to builtin must be a string constant or literal");
35769 return integer_zero_node;
35771 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35774 gcc_assert (param_string_cst);
35776 if (fn_code == IX86_BUILTIN_CPU_IS)
35782 unsigned int field_val = 0;
35783 unsigned int NUM_ARCH_NAMES
35784 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35786 for (i = 0; i < NUM_ARCH_NAMES; i++)
35787 if (strcmp (arch_names_table[i].name,
35788 TREE_STRING_POINTER (param_string_cst)) == 0)
35791 if (i == NUM_ARCH_NAMES)
35793 error ("Parameter to builtin not valid: %s",
35794 TREE_STRING_POINTER (param_string_cst));
35795 return integer_zero_node;
35798 field = TYPE_FIELDS (__processor_model_type);
35799 field_val = arch_names_table[i].model;
35801 /* CPU types are stored in the next field. */
35802 if (field_val > M_CPU_TYPE_START
35803 && field_val < M_CPU_SUBTYPE_START)
35805 field = DECL_CHAIN (field);
35806 field_val -= M_CPU_TYPE_START;
35809 /* CPU subtypes are stored in the next field. */
35810 if (field_val > M_CPU_SUBTYPE_START)
35812 field = DECL_CHAIN ( DECL_CHAIN (field));
35813 field_val -= M_CPU_SUBTYPE_START;
35816 /* Get the appropriate field in __cpu_model. */
35817 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35820 /* Check the value. */
35821 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35822 build_int_cstu (unsigned_type_node, field_val));
35823 return build1 (CONVERT_EXPR, integer_type_node, final);
35825 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35832 unsigned int field_val = 0;
35833 unsigned int NUM_ISA_NAMES
35834 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35836 for (i = 0; i < NUM_ISA_NAMES; i++)
35837 if (strcmp (isa_names_table[i].name,
35838 TREE_STRING_POINTER (param_string_cst)) == 0)
35841 if (i == NUM_ISA_NAMES)
35843 error ("Parameter to builtin not valid: %s",
35844 TREE_STRING_POINTER (param_string_cst));
35845 return integer_zero_node;
35848 field = TYPE_FIELDS (__processor_model_type);
35849 /* Get the last field, which is __cpu_features. */
35850 while (DECL_CHAIN (field))
35851 field = DECL_CHAIN (field);
35853 /* Get the appropriate field: __cpu_model.__cpu_features */
35854 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35857 /* Access the 0th element of __cpu_features array. */
35858 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35859 integer_zero_node, NULL_TREE, NULL_TREE);
35861 field_val = (1 << isa_names_table[i].feature);
35862 /* Return __cpu_model.__cpu_features[0] & field_val */
35863 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35864 build_int_cstu (unsigned_type_node, field_val));
35865 return build1 (CONVERT_EXPR, integer_type_node, final);
35867 gcc_unreachable ();
35871 ix86_fold_builtin (tree fndecl, int n_args,
35872 tree *args, bool ignore ATTRIBUTE_UNUSED)
35874 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35876 enum ix86_builtins fn_code = (enum ix86_builtins)
35877 DECL_FUNCTION_CODE (fndecl);
35878 if (fn_code == IX86_BUILTIN_CPU_IS
35879 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35881 gcc_assert (n_args == 1);
35882 return fold_builtin_cpu (fndecl, args);
35886 #ifdef SUBTARGET_FOLD_BUILTIN
35887 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35893 /* Make builtins to detect cpu type and features supported. NAME is
35894 the builtin name, CODE is the builtin code, and FTYPE is the function
35895 type of the builtin. */
35898 make_cpu_type_builtin (const char* name, int code,
35899 enum ix86_builtin_func_type ftype, bool is_const)
35904 type = ix86_get_builtin_func_type (ftype);
35905 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35907 gcc_assert (decl != NULL_TREE);
35908 ix86_builtins[(int) code] = decl;
35909 TREE_READONLY (decl) = is_const;
35912 /* Make builtins to get CPU type and features supported. The created
35915 __builtin_cpu_init (), to detect cpu type and features,
35916 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35917 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35921 ix86_init_platform_type_builtins (void)
35923 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35924 INT_FTYPE_VOID, false);
35925 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35926 INT_FTYPE_PCCHAR, true);
35927 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35928 INT_FTYPE_PCCHAR, true);
35931 /* Internal method for ix86_init_builtins. */
35934 ix86_init_builtins_va_builtins_abi (void)
35936 tree ms_va_ref, sysv_va_ref;
35937 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35938 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35939 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35940 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35944 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35945 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35946 ms_va_ref = build_reference_type (ms_va_list_type_node);
35948 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35951 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35952 fnvoid_va_start_ms =
35953 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35954 fnvoid_va_end_sysv =
35955 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35956 fnvoid_va_start_sysv =
35957 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35959 fnvoid_va_copy_ms =
35960 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35962 fnvoid_va_copy_sysv =
35963 build_function_type_list (void_type_node, sysv_va_ref,
35964 sysv_va_ref, NULL_TREE);
35966 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35967 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35968 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35969 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35970 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35971 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35972 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35973 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35974 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35975 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35976 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35977 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35981 ix86_init_builtin_types (void)
35983 tree float128_type_node, float80_type_node;
35985 /* The __float80 type. */
35986 float80_type_node = long_double_type_node;
35987 if (TYPE_MODE (float80_type_node) != XFmode)
35989 /* The __float80 type. */
35990 float80_type_node = make_node (REAL_TYPE);
35992 TYPE_PRECISION (float80_type_node) = 80;
35993 layout_type (float80_type_node);
35995 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35997 /* The __float128 type. */
35998 float128_type_node = make_node (REAL_TYPE);
35999 TYPE_PRECISION (float128_type_node) = 128;
36000 layout_type (float128_type_node);
36001 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
36003 /* This macro is built by i386-builtin-types.awk. */
36004 DEFINE_BUILTIN_PRIMITIVE_TYPES;
36008 ix86_init_builtins (void)
36012 ix86_init_builtin_types ();
36014 /* Builtins to get CPU type and features. */
36015 ix86_init_platform_type_builtins ();
36017 /* TFmode support builtins. */
36018 def_builtin_const (0, "__builtin_infq",
36019 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
36020 def_builtin_const (0, "__builtin_huge_valq",
36021 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
36023 /* We will expand them to normal call if SSE isn't available since
36024 they are used by libgcc. */
36025 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
36026 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
36027 BUILT_IN_MD, "__fabstf2", NULL_TREE);
36028 TREE_READONLY (t) = 1;
36029 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
36031 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
36032 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
36033 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
36034 TREE_READONLY (t) = 1;
36035 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
36037 ix86_init_tm_builtins ();
36038 ix86_init_mmx_sse_builtins ();
36039 ix86_init_mpx_builtins ();
36042 ix86_init_builtins_va_builtins_abi ();
36044 #ifdef SUBTARGET_INIT_BUILTINS
36045 SUBTARGET_INIT_BUILTINS;
36049 /* Return the ix86 builtin for CODE. */
36052 ix86_builtin_decl (unsigned code, bool)
36054 if (code >= IX86_BUILTIN_MAX)
36055 return error_mark_node;
36057 return ix86_builtins[code];
36060 /* Errors in the source file can cause expand_expr to return const0_rtx
36061 where we expect a vector. To avoid crashing, use one of the vector
36062 clear instructions. */
36064 safe_vector_operand (rtx x, machine_mode mode)
36066 if (x == const0_rtx)
36067 x = CONST0_RTX (mode);
36071 /* Fixup modeless constants to fit required mode. */
36073 fixup_modeless_constant (rtx x, machine_mode mode)
36075 if (GET_MODE (x) == VOIDmode)
36076 x = convert_to_mode (mode, x, 1);
36080 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
36083 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36086 tree arg0 = CALL_EXPR_ARG (exp, 0);
36087 tree arg1 = CALL_EXPR_ARG (exp, 1);
36088 rtx op0 = expand_normal (arg0);
36089 rtx op1 = expand_normal (arg1);
36090 machine_mode tmode = insn_data[icode].operand[0].mode;
36091 machine_mode mode0 = insn_data[icode].operand[1].mode;
36092 machine_mode mode1 = insn_data[icode].operand[2].mode;
36094 if (VECTOR_MODE_P (mode0))
36095 op0 = safe_vector_operand (op0, mode0);
36096 if (VECTOR_MODE_P (mode1))
36097 op1 = safe_vector_operand (op1, mode1);
36099 if (optimize || !target
36100 || GET_MODE (target) != tmode
36101 || !insn_data[icode].operand[0].predicate (target, tmode))
36102 target = gen_reg_rtx (tmode);
36104 if (GET_MODE (op1) == SImode && mode1 == TImode)
36106 rtx x = gen_reg_rtx (V4SImode);
36107 emit_insn (gen_sse2_loadd (x, op1));
36108 op1 = gen_lowpart (TImode, x);
36111 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36112 op0 = copy_to_mode_reg (mode0, op0);
36113 if (!insn_data[icode].operand[2].predicate (op1, mode1))
36114 op1 = copy_to_mode_reg (mode1, op1);
36116 pat = GEN_FCN (icode) (target, op0, op1);
36125 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
36128 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36129 enum ix86_builtin_func_type m_type,
36130 enum rtx_code sub_code)
36135 bool comparison_p = false;
36137 bool last_arg_constant = false;
36138 int num_memory = 0;
36144 machine_mode tmode = insn_data[icode].operand[0].mode;
36148 case MULTI_ARG_4_DF2_DI_I:
36149 case MULTI_ARG_4_DF2_DI_I1:
36150 case MULTI_ARG_4_SF2_SI_I:
36151 case MULTI_ARG_4_SF2_SI_I1:
36153 last_arg_constant = true;
36156 case MULTI_ARG_3_SF:
36157 case MULTI_ARG_3_DF:
36158 case MULTI_ARG_3_SF2:
36159 case MULTI_ARG_3_DF2:
36160 case MULTI_ARG_3_DI:
36161 case MULTI_ARG_3_SI:
36162 case MULTI_ARG_3_SI_DI:
36163 case MULTI_ARG_3_HI:
36164 case MULTI_ARG_3_HI_SI:
36165 case MULTI_ARG_3_QI:
36166 case MULTI_ARG_3_DI2:
36167 case MULTI_ARG_3_SI2:
36168 case MULTI_ARG_3_HI2:
36169 case MULTI_ARG_3_QI2:
36173 case MULTI_ARG_2_SF:
36174 case MULTI_ARG_2_DF:
36175 case MULTI_ARG_2_DI:
36176 case MULTI_ARG_2_SI:
36177 case MULTI_ARG_2_HI:
36178 case MULTI_ARG_2_QI:
36182 case MULTI_ARG_2_DI_IMM:
36183 case MULTI_ARG_2_SI_IMM:
36184 case MULTI_ARG_2_HI_IMM:
36185 case MULTI_ARG_2_QI_IMM:
36187 last_arg_constant = true;
36190 case MULTI_ARG_1_SF:
36191 case MULTI_ARG_1_DF:
36192 case MULTI_ARG_1_SF2:
36193 case MULTI_ARG_1_DF2:
36194 case MULTI_ARG_1_DI:
36195 case MULTI_ARG_1_SI:
36196 case MULTI_ARG_1_HI:
36197 case MULTI_ARG_1_QI:
36198 case MULTI_ARG_1_SI_DI:
36199 case MULTI_ARG_1_HI_DI:
36200 case MULTI_ARG_1_HI_SI:
36201 case MULTI_ARG_1_QI_DI:
36202 case MULTI_ARG_1_QI_SI:
36203 case MULTI_ARG_1_QI_HI:
36207 case MULTI_ARG_2_DI_CMP:
36208 case MULTI_ARG_2_SI_CMP:
36209 case MULTI_ARG_2_HI_CMP:
36210 case MULTI_ARG_2_QI_CMP:
36212 comparison_p = true;
36215 case MULTI_ARG_2_SF_TF:
36216 case MULTI_ARG_2_DF_TF:
36217 case MULTI_ARG_2_DI_TF:
36218 case MULTI_ARG_2_SI_TF:
36219 case MULTI_ARG_2_HI_TF:
36220 case MULTI_ARG_2_QI_TF:
36226 gcc_unreachable ();
36229 if (optimize || !target
36230 || GET_MODE (target) != tmode
36231 || !insn_data[icode].operand[0].predicate (target, tmode))
36232 target = gen_reg_rtx (tmode);
36234 gcc_assert (nargs <= 4);
36236 for (i = 0; i < nargs; i++)
36238 tree arg = CALL_EXPR_ARG (exp, i);
36239 rtx op = expand_normal (arg);
36240 int adjust = (comparison_p) ? 1 : 0;
36241 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36243 if (last_arg_constant && i == nargs - 1)
36245 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36247 enum insn_code new_icode = icode;
36250 case CODE_FOR_xop_vpermil2v2df3:
36251 case CODE_FOR_xop_vpermil2v4sf3:
36252 case CODE_FOR_xop_vpermil2v4df3:
36253 case CODE_FOR_xop_vpermil2v8sf3:
36254 error ("the last argument must be a 2-bit immediate");
36255 return gen_reg_rtx (tmode);
36256 case CODE_FOR_xop_rotlv2di3:
36257 new_icode = CODE_FOR_rotlv2di3;
36259 case CODE_FOR_xop_rotlv4si3:
36260 new_icode = CODE_FOR_rotlv4si3;
36262 case CODE_FOR_xop_rotlv8hi3:
36263 new_icode = CODE_FOR_rotlv8hi3;
36265 case CODE_FOR_xop_rotlv16qi3:
36266 new_icode = CODE_FOR_rotlv16qi3;
36268 if (CONST_INT_P (op))
36270 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36271 op = GEN_INT (INTVAL (op) & mask);
36272 gcc_checking_assert
36273 (insn_data[icode].operand[i + 1].predicate (op, mode));
36277 gcc_checking_assert
36279 && insn_data[new_icode].operand[0].mode == tmode
36280 && insn_data[new_icode].operand[1].mode == tmode
36281 && insn_data[new_icode].operand[2].mode == mode
36282 && insn_data[new_icode].operand[0].predicate
36283 == insn_data[icode].operand[0].predicate
36284 && insn_data[new_icode].operand[1].predicate
36285 == insn_data[icode].operand[1].predicate);
36291 gcc_unreachable ();
36298 if (VECTOR_MODE_P (mode))
36299 op = safe_vector_operand (op, mode);
36301 /* If we aren't optimizing, only allow one memory operand to be
36303 if (memory_operand (op, mode))
36306 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36309 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36311 op = force_reg (mode, op);
36315 args[i].mode = mode;
36321 pat = GEN_FCN (icode) (target, args[0].op);
36326 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36327 GEN_INT ((int)sub_code));
36328 else if (! comparison_p)
36329 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36332 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36336 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36341 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36345 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36349 gcc_unreachable ();
36359 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36360 insns with vec_merge. */
36363 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36367 tree arg0 = CALL_EXPR_ARG (exp, 0);
36368 rtx op1, op0 = expand_normal (arg0);
36369 machine_mode tmode = insn_data[icode].operand[0].mode;
36370 machine_mode mode0 = insn_data[icode].operand[1].mode;
36372 if (optimize || !target
36373 || GET_MODE (target) != tmode
36374 || !insn_data[icode].operand[0].predicate (target, tmode))
36375 target = gen_reg_rtx (tmode);
36377 if (VECTOR_MODE_P (mode0))
36378 op0 = safe_vector_operand (op0, mode0);
36380 if ((optimize && !register_operand (op0, mode0))
36381 || !insn_data[icode].operand[1].predicate (op0, mode0))
36382 op0 = copy_to_mode_reg (mode0, op0);
36385 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36386 op1 = copy_to_mode_reg (mode0, op1);
36388 pat = GEN_FCN (icode) (target, op0, op1);
36395 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36398 ix86_expand_sse_compare (const struct builtin_description *d,
36399 tree exp, rtx target, bool swap)
36402 tree arg0 = CALL_EXPR_ARG (exp, 0);
36403 tree arg1 = CALL_EXPR_ARG (exp, 1);
36404 rtx op0 = expand_normal (arg0);
36405 rtx op1 = expand_normal (arg1);
36407 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36408 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36409 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36410 enum rtx_code comparison = d->comparison;
36412 if (VECTOR_MODE_P (mode0))
36413 op0 = safe_vector_operand (op0, mode0);
36414 if (VECTOR_MODE_P (mode1))
36415 op1 = safe_vector_operand (op1, mode1);
36417 /* Swap operands if we have a comparison that isn't available in
36420 std::swap (op0, op1);
36422 if (optimize || !target
36423 || GET_MODE (target) != tmode
36424 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36425 target = gen_reg_rtx (tmode);
36427 if ((optimize && !register_operand (op0, mode0))
36428 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36429 op0 = copy_to_mode_reg (mode0, op0);
36430 if ((optimize && !register_operand (op1, mode1))
36431 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36432 op1 = copy_to_mode_reg (mode1, op1);
36434 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36435 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36442 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36445 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36449 tree arg0 = CALL_EXPR_ARG (exp, 0);
36450 tree arg1 = CALL_EXPR_ARG (exp, 1);
36451 rtx op0 = expand_normal (arg0);
36452 rtx op1 = expand_normal (arg1);
36453 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36454 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36455 enum rtx_code comparison = d->comparison;
36457 if (VECTOR_MODE_P (mode0))
36458 op0 = safe_vector_operand (op0, mode0);
36459 if (VECTOR_MODE_P (mode1))
36460 op1 = safe_vector_operand (op1, mode1);
36462 /* Swap operands if we have a comparison that isn't available in
36464 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36465 std::swap (op0, op1);
36467 target = gen_reg_rtx (SImode);
36468 emit_move_insn (target, const0_rtx);
36469 target = gen_rtx_SUBREG (QImode, target, 0);
36471 if ((optimize && !register_operand (op0, mode0))
36472 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36473 op0 = copy_to_mode_reg (mode0, op0);
36474 if ((optimize && !register_operand (op1, mode1))
36475 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36476 op1 = copy_to_mode_reg (mode1, op1);
36478 pat = GEN_FCN (d->icode) (op0, op1);
36482 emit_insn (gen_rtx_SET (VOIDmode,
36483 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36484 gen_rtx_fmt_ee (comparison, QImode,
36488 return SUBREG_REG (target);
36491 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36494 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36498 tree arg0 = CALL_EXPR_ARG (exp, 0);
36499 rtx op1, op0 = expand_normal (arg0);
36500 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36501 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36503 if (optimize || target == 0
36504 || GET_MODE (target) != tmode
36505 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36506 target = gen_reg_rtx (tmode);
36508 if (VECTOR_MODE_P (mode0))
36509 op0 = safe_vector_operand (op0, mode0);
36511 if ((optimize && !register_operand (op0, mode0))
36512 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36513 op0 = copy_to_mode_reg (mode0, op0);
36515 op1 = GEN_INT (d->comparison);
36517 pat = GEN_FCN (d->icode) (target, op0, op1);
36525 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36526 tree exp, rtx target)
36529 tree arg0 = CALL_EXPR_ARG (exp, 0);
36530 tree arg1 = CALL_EXPR_ARG (exp, 1);
36531 rtx op0 = expand_normal (arg0);
36532 rtx op1 = expand_normal (arg1);
36534 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36535 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36536 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36538 if (optimize || target == 0
36539 || GET_MODE (target) != tmode
36540 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36541 target = gen_reg_rtx (tmode);
36543 op0 = safe_vector_operand (op0, mode0);
36544 op1 = safe_vector_operand (op1, mode1);
36546 if ((optimize && !register_operand (op0, mode0))
36547 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36548 op0 = copy_to_mode_reg (mode0, op0);
36549 if ((optimize && !register_operand (op1, mode1))
36550 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36551 op1 = copy_to_mode_reg (mode1, op1);
36553 op2 = GEN_INT (d->comparison);
36555 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36562 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36565 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36569 tree arg0 = CALL_EXPR_ARG (exp, 0);
36570 tree arg1 = CALL_EXPR_ARG (exp, 1);
36571 rtx op0 = expand_normal (arg0);
36572 rtx op1 = expand_normal (arg1);
36573 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36574 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36575 enum rtx_code comparison = d->comparison;
36577 if (VECTOR_MODE_P (mode0))
36578 op0 = safe_vector_operand (op0, mode0);
36579 if (VECTOR_MODE_P (mode1))
36580 op1 = safe_vector_operand (op1, mode1);
36582 target = gen_reg_rtx (SImode);
36583 emit_move_insn (target, const0_rtx);
36584 target = gen_rtx_SUBREG (QImode, target, 0);
36586 if ((optimize && !register_operand (op0, mode0))
36587 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36588 op0 = copy_to_mode_reg (mode0, op0);
36589 if ((optimize && !register_operand (op1, mode1))
36590 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36591 op1 = copy_to_mode_reg (mode1, op1);
36593 pat = GEN_FCN (d->icode) (op0, op1);
36597 emit_insn (gen_rtx_SET (VOIDmode,
36598 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36599 gen_rtx_fmt_ee (comparison, QImode,
36603 return SUBREG_REG (target);
36606 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36609 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36610 tree exp, rtx target)
36613 tree arg0 = CALL_EXPR_ARG (exp, 0);
36614 tree arg1 = CALL_EXPR_ARG (exp, 1);
36615 tree arg2 = CALL_EXPR_ARG (exp, 2);
36616 tree arg3 = CALL_EXPR_ARG (exp, 3);
36617 tree arg4 = CALL_EXPR_ARG (exp, 4);
36618 rtx scratch0, scratch1;
36619 rtx op0 = expand_normal (arg0);
36620 rtx op1 = expand_normal (arg1);
36621 rtx op2 = expand_normal (arg2);
36622 rtx op3 = expand_normal (arg3);
36623 rtx op4 = expand_normal (arg4);
36624 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36626 tmode0 = insn_data[d->icode].operand[0].mode;
36627 tmode1 = insn_data[d->icode].operand[1].mode;
36628 modev2 = insn_data[d->icode].operand[2].mode;
36629 modei3 = insn_data[d->icode].operand[3].mode;
36630 modev4 = insn_data[d->icode].operand[4].mode;
36631 modei5 = insn_data[d->icode].operand[5].mode;
36632 modeimm = insn_data[d->icode].operand[6].mode;
36634 if (VECTOR_MODE_P (modev2))
36635 op0 = safe_vector_operand (op0, modev2);
36636 if (VECTOR_MODE_P (modev4))
36637 op2 = safe_vector_operand (op2, modev4);
36639 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36640 op0 = copy_to_mode_reg (modev2, op0);
36641 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36642 op1 = copy_to_mode_reg (modei3, op1);
36643 if ((optimize && !register_operand (op2, modev4))
36644 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36645 op2 = copy_to_mode_reg (modev4, op2);
36646 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36647 op3 = copy_to_mode_reg (modei5, op3);
36649 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36651 error ("the fifth argument must be an 8-bit immediate");
36655 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36657 if (optimize || !target
36658 || GET_MODE (target) != tmode0
36659 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36660 target = gen_reg_rtx (tmode0);
36662 scratch1 = gen_reg_rtx (tmode1);
36664 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36666 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36668 if (optimize || !target
36669 || GET_MODE (target) != tmode1
36670 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36671 target = gen_reg_rtx (tmode1);
36673 scratch0 = gen_reg_rtx (tmode0);
36675 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36679 gcc_assert (d->flag);
36681 scratch0 = gen_reg_rtx (tmode0);
36682 scratch1 = gen_reg_rtx (tmode1);
36684 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36694 target = gen_reg_rtx (SImode);
36695 emit_move_insn (target, const0_rtx);
36696 target = gen_rtx_SUBREG (QImode, target, 0);
36699 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36700 gen_rtx_fmt_ee (EQ, QImode,
36701 gen_rtx_REG ((machine_mode) d->flag,
36704 return SUBREG_REG (target);
36711 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36714 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36715 tree exp, rtx target)
36718 tree arg0 = CALL_EXPR_ARG (exp, 0);
36719 tree arg1 = CALL_EXPR_ARG (exp, 1);
36720 tree arg2 = CALL_EXPR_ARG (exp, 2);
36721 rtx scratch0, scratch1;
36722 rtx op0 = expand_normal (arg0);
36723 rtx op1 = expand_normal (arg1);
36724 rtx op2 = expand_normal (arg2);
36725 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36727 tmode0 = insn_data[d->icode].operand[0].mode;
36728 tmode1 = insn_data[d->icode].operand[1].mode;
36729 modev2 = insn_data[d->icode].operand[2].mode;
36730 modev3 = insn_data[d->icode].operand[3].mode;
36731 modeimm = insn_data[d->icode].operand[4].mode;
36733 if (VECTOR_MODE_P (modev2))
36734 op0 = safe_vector_operand (op0, modev2);
36735 if (VECTOR_MODE_P (modev3))
36736 op1 = safe_vector_operand (op1, modev3);
36738 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36739 op0 = copy_to_mode_reg (modev2, op0);
36740 if ((optimize && !register_operand (op1, modev3))
36741 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36742 op1 = copy_to_mode_reg (modev3, op1);
36744 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36746 error ("the third argument must be an 8-bit immediate");
36750 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36752 if (optimize || !target
36753 || GET_MODE (target) != tmode0
36754 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36755 target = gen_reg_rtx (tmode0);
36757 scratch1 = gen_reg_rtx (tmode1);
36759 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36761 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36763 if (optimize || !target
36764 || GET_MODE (target) != tmode1
36765 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36766 target = gen_reg_rtx (tmode1);
36768 scratch0 = gen_reg_rtx (tmode0);
36770 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36774 gcc_assert (d->flag);
36776 scratch0 = gen_reg_rtx (tmode0);
36777 scratch1 = gen_reg_rtx (tmode1);
36779 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36789 target = gen_reg_rtx (SImode);
36790 emit_move_insn (target, const0_rtx);
36791 target = gen_rtx_SUBREG (QImode, target, 0);
36794 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36795 gen_rtx_fmt_ee (EQ, QImode,
36796 gen_rtx_REG ((machine_mode) d->flag,
36799 return SUBREG_REG (target);
36805 /* Subroutine of ix86_expand_builtin to take care of insns with
36806 variable number of operands. */
36809 ix86_expand_args_builtin (const struct builtin_description *d,
36810 tree exp, rtx target)
36812 rtx pat, real_target;
36813 unsigned int i, nargs;
36814 unsigned int nargs_constant = 0;
36815 unsigned int mask_pos = 0;
36816 int num_memory = 0;
36822 bool last_arg_count = false;
36823 enum insn_code icode = d->icode;
36824 const struct insn_data_d *insn_p = &insn_data[icode];
36825 machine_mode tmode = insn_p->operand[0].mode;
36826 machine_mode rmode = VOIDmode;
36828 enum rtx_code comparison = d->comparison;
36830 switch ((enum ix86_builtin_func_type) d->flag)
36832 case V2DF_FTYPE_V2DF_ROUND:
36833 case V4DF_FTYPE_V4DF_ROUND:
36834 case V4SF_FTYPE_V4SF_ROUND:
36835 case V8SF_FTYPE_V8SF_ROUND:
36836 case V4SI_FTYPE_V4SF_ROUND:
36837 case V8SI_FTYPE_V8SF_ROUND:
36838 return ix86_expand_sse_round (d, exp, target);
36839 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36840 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36841 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36842 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36843 case INT_FTYPE_V8SF_V8SF_PTEST:
36844 case INT_FTYPE_V4DI_V4DI_PTEST:
36845 case INT_FTYPE_V4DF_V4DF_PTEST:
36846 case INT_FTYPE_V4SF_V4SF_PTEST:
36847 case INT_FTYPE_V2DI_V2DI_PTEST:
36848 case INT_FTYPE_V2DF_V2DF_PTEST:
36849 return ix86_expand_sse_ptest (d, exp, target);
36850 case FLOAT128_FTYPE_FLOAT128:
36851 case FLOAT_FTYPE_FLOAT:
36852 case INT_FTYPE_INT:
36853 case UINT64_FTYPE_INT:
36854 case UINT16_FTYPE_UINT16:
36855 case INT64_FTYPE_INT64:
36856 case INT64_FTYPE_V4SF:
36857 case INT64_FTYPE_V2DF:
36858 case INT_FTYPE_V16QI:
36859 case INT_FTYPE_V8QI:
36860 case INT_FTYPE_V8SF:
36861 case INT_FTYPE_V4DF:
36862 case INT_FTYPE_V4SF:
36863 case INT_FTYPE_V2DF:
36864 case INT_FTYPE_V32QI:
36865 case V16QI_FTYPE_V16QI:
36866 case V8SI_FTYPE_V8SF:
36867 case V8SI_FTYPE_V4SI:
36868 case V8HI_FTYPE_V8HI:
36869 case V8HI_FTYPE_V16QI:
36870 case V8QI_FTYPE_V8QI:
36871 case V8SF_FTYPE_V8SF:
36872 case V8SF_FTYPE_V8SI:
36873 case V8SF_FTYPE_V4SF:
36874 case V8SF_FTYPE_V8HI:
36875 case V4SI_FTYPE_V4SI:
36876 case V4SI_FTYPE_V16QI:
36877 case V4SI_FTYPE_V4SF:
36878 case V4SI_FTYPE_V8SI:
36879 case V4SI_FTYPE_V8HI:
36880 case V4SI_FTYPE_V4DF:
36881 case V4SI_FTYPE_V2DF:
36882 case V4HI_FTYPE_V4HI:
36883 case V4DF_FTYPE_V4DF:
36884 case V4DF_FTYPE_V4SI:
36885 case V4DF_FTYPE_V4SF:
36886 case V4DF_FTYPE_V2DF:
36887 case V4SF_FTYPE_V4SF:
36888 case V4SF_FTYPE_V4SI:
36889 case V4SF_FTYPE_V8SF:
36890 case V4SF_FTYPE_V4DF:
36891 case V4SF_FTYPE_V8HI:
36892 case V4SF_FTYPE_V2DF:
36893 case V2DI_FTYPE_V2DI:
36894 case V2DI_FTYPE_V16QI:
36895 case V2DI_FTYPE_V8HI:
36896 case V2DI_FTYPE_V4SI:
36897 case V2DF_FTYPE_V2DF:
36898 case V2DF_FTYPE_V4SI:
36899 case V2DF_FTYPE_V4DF:
36900 case V2DF_FTYPE_V4SF:
36901 case V2DF_FTYPE_V2SI:
36902 case V2SI_FTYPE_V2SI:
36903 case V2SI_FTYPE_V4SF:
36904 case V2SI_FTYPE_V2SF:
36905 case V2SI_FTYPE_V2DF:
36906 case V2SF_FTYPE_V2SF:
36907 case V2SF_FTYPE_V2SI:
36908 case V32QI_FTYPE_V32QI:
36909 case V32QI_FTYPE_V16QI:
36910 case V16HI_FTYPE_V16HI:
36911 case V16HI_FTYPE_V8HI:
36912 case V8SI_FTYPE_V8SI:
36913 case V16HI_FTYPE_V16QI:
36914 case V8SI_FTYPE_V16QI:
36915 case V4DI_FTYPE_V16QI:
36916 case V8SI_FTYPE_V8HI:
36917 case V4DI_FTYPE_V8HI:
36918 case V4DI_FTYPE_V4SI:
36919 case V4DI_FTYPE_V2DI:
36921 case HI_FTYPE_V16QI:
36922 case SI_FTYPE_V32QI:
36923 case DI_FTYPE_V64QI:
36924 case V16QI_FTYPE_HI:
36925 case V32QI_FTYPE_SI:
36926 case V64QI_FTYPE_DI:
36927 case V8HI_FTYPE_QI:
36928 case V16HI_FTYPE_HI:
36929 case V32HI_FTYPE_SI:
36930 case V4SI_FTYPE_QI:
36931 case V8SI_FTYPE_QI:
36932 case V4SI_FTYPE_HI:
36933 case V8SI_FTYPE_HI:
36934 case QI_FTYPE_V8HI:
36935 case HI_FTYPE_V16HI:
36936 case SI_FTYPE_V32HI:
36937 case QI_FTYPE_V4SI:
36938 case QI_FTYPE_V8SI:
36939 case HI_FTYPE_V16SI:
36940 case QI_FTYPE_V2DI:
36941 case QI_FTYPE_V4DI:
36942 case QI_FTYPE_V8DI:
36943 case UINT_FTYPE_V2DF:
36944 case UINT_FTYPE_V4SF:
36945 case UINT64_FTYPE_V2DF:
36946 case UINT64_FTYPE_V4SF:
36947 case V16QI_FTYPE_V8DI:
36948 case V16HI_FTYPE_V16SI:
36949 case V16SI_FTYPE_HI:
36950 case V2DI_FTYPE_QI:
36951 case V4DI_FTYPE_QI:
36952 case V16SI_FTYPE_V16SI:
36953 case V16SI_FTYPE_INT:
36954 case V16SF_FTYPE_FLOAT:
36955 case V16SF_FTYPE_V8SF:
36956 case V16SI_FTYPE_V8SI:
36957 case V16SF_FTYPE_V4SF:
36958 case V16SI_FTYPE_V4SI:
36959 case V16SF_FTYPE_V16SF:
36960 case V8HI_FTYPE_V8DI:
36961 case V8UHI_FTYPE_V8UHI:
36962 case V8SI_FTYPE_V8DI:
36963 case V8SF_FTYPE_V8DF:
36964 case V8DI_FTYPE_QI:
36965 case V8DI_FTYPE_INT64:
36966 case V8DI_FTYPE_V4DI:
36967 case V8DI_FTYPE_V8DI:
36968 case V8DF_FTYPE_DOUBLE:
36969 case V8DF_FTYPE_V4DF:
36970 case V8DF_FTYPE_V2DF:
36971 case V8DF_FTYPE_V8DF:
36972 case V8DF_FTYPE_V8SI:
36975 case V4SF_FTYPE_V4SF_VEC_MERGE:
36976 case V2DF_FTYPE_V2DF_VEC_MERGE:
36977 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36978 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36979 case V16QI_FTYPE_V16QI_V16QI:
36980 case V16QI_FTYPE_V8HI_V8HI:
36981 case V16SI_FTYPE_V16SI_V16SI:
36982 case V16SF_FTYPE_V16SF_V16SF:
36983 case V16SF_FTYPE_V16SF_V16SI:
36984 case V8QI_FTYPE_V8QI_V8QI:
36985 case V8QI_FTYPE_V4HI_V4HI:
36986 case V8HI_FTYPE_V8HI_V8HI:
36987 case V8HI_FTYPE_V16QI_V16QI:
36988 case V8HI_FTYPE_V4SI_V4SI:
36989 case V8SF_FTYPE_V8SF_V8SF:
36990 case V8SF_FTYPE_V8SF_V8SI:
36991 case V8DI_FTYPE_V8DI_V8DI:
36992 case V8DF_FTYPE_V8DF_V8DF:
36993 case V8DF_FTYPE_V8DF_V8DI:
36994 case V4SI_FTYPE_V4SI_V4SI:
36995 case V4SI_FTYPE_V8HI_V8HI:
36996 case V4SI_FTYPE_V4SF_V4SF:
36997 case V4SI_FTYPE_V2DF_V2DF:
36998 case V4HI_FTYPE_V4HI_V4HI:
36999 case V4HI_FTYPE_V8QI_V8QI:
37000 case V4HI_FTYPE_V2SI_V2SI:
37001 case V4DF_FTYPE_V4DF_V4DF:
37002 case V4DF_FTYPE_V4DF_V4DI:
37003 case V4SF_FTYPE_V4SF_V4SF:
37004 case V4SF_FTYPE_V4SF_V4SI:
37005 case V4SF_FTYPE_V4SF_V2SI:
37006 case V4SF_FTYPE_V4SF_V2DF:
37007 case V4SF_FTYPE_V4SF_UINT:
37008 case V4SF_FTYPE_V4SF_UINT64:
37009 case V4SF_FTYPE_V4SF_DI:
37010 case V4SF_FTYPE_V4SF_SI:
37011 case V2DI_FTYPE_V2DI_V2DI:
37012 case V2DI_FTYPE_V16QI_V16QI:
37013 case V2DI_FTYPE_V4SI_V4SI:
37014 case V2UDI_FTYPE_V4USI_V4USI:
37015 case V2DI_FTYPE_V2DI_V16QI:
37016 case V2DI_FTYPE_V2DF_V2DF:
37017 case V2SI_FTYPE_V2SI_V2SI:
37018 case V2SI_FTYPE_V4HI_V4HI:
37019 case V2SI_FTYPE_V2SF_V2SF:
37020 case V2DF_FTYPE_V2DF_V2DF:
37021 case V2DF_FTYPE_V2DF_V4SF:
37022 case V2DF_FTYPE_V2DF_V2DI:
37023 case V2DF_FTYPE_V2DF_DI:
37024 case V2DF_FTYPE_V2DF_SI:
37025 case V2DF_FTYPE_V2DF_UINT:
37026 case V2DF_FTYPE_V2DF_UINT64:
37027 case V2SF_FTYPE_V2SF_V2SF:
37028 case V1DI_FTYPE_V1DI_V1DI:
37029 case V1DI_FTYPE_V8QI_V8QI:
37030 case V1DI_FTYPE_V2SI_V2SI:
37031 case V32QI_FTYPE_V16HI_V16HI:
37032 case V16HI_FTYPE_V8SI_V8SI:
37033 case V32QI_FTYPE_V32QI_V32QI:
37034 case V16HI_FTYPE_V32QI_V32QI:
37035 case V16HI_FTYPE_V16HI_V16HI:
37036 case V8SI_FTYPE_V4DF_V4DF:
37037 case V8SI_FTYPE_V8SI_V8SI:
37038 case V8SI_FTYPE_V16HI_V16HI:
37039 case V4DI_FTYPE_V4DI_V4DI:
37040 case V4DI_FTYPE_V8SI_V8SI:
37041 case V4UDI_FTYPE_V8USI_V8USI:
37042 case QI_FTYPE_V8DI_V8DI:
37043 case V8DI_FTYPE_V64QI_V64QI:
37044 case HI_FTYPE_V16SI_V16SI:
37045 if (comparison == UNKNOWN)
37046 return ix86_expand_binop_builtin (icode, exp, target);
37049 case V4SF_FTYPE_V4SF_V4SF_SWAP:
37050 case V2DF_FTYPE_V2DF_V2DF_SWAP:
37051 gcc_assert (comparison != UNKNOWN);
37055 case V16HI_FTYPE_V16HI_V8HI_COUNT:
37056 case V16HI_FTYPE_V16HI_SI_COUNT:
37057 case V8SI_FTYPE_V8SI_V4SI_COUNT:
37058 case V8SI_FTYPE_V8SI_SI_COUNT:
37059 case V4DI_FTYPE_V4DI_V2DI_COUNT:
37060 case V4DI_FTYPE_V4DI_INT_COUNT:
37061 case V8HI_FTYPE_V8HI_V8HI_COUNT:
37062 case V8HI_FTYPE_V8HI_SI_COUNT:
37063 case V4SI_FTYPE_V4SI_V4SI_COUNT:
37064 case V4SI_FTYPE_V4SI_SI_COUNT:
37065 case V4HI_FTYPE_V4HI_V4HI_COUNT:
37066 case V4HI_FTYPE_V4HI_SI_COUNT:
37067 case V2DI_FTYPE_V2DI_V2DI_COUNT:
37068 case V2DI_FTYPE_V2DI_SI_COUNT:
37069 case V2SI_FTYPE_V2SI_V2SI_COUNT:
37070 case V2SI_FTYPE_V2SI_SI_COUNT:
37071 case V1DI_FTYPE_V1DI_V1DI_COUNT:
37072 case V1DI_FTYPE_V1DI_SI_COUNT:
37074 last_arg_count = true;
37076 case UINT64_FTYPE_UINT64_UINT64:
37077 case UINT_FTYPE_UINT_UINT:
37078 case UINT_FTYPE_UINT_USHORT:
37079 case UINT_FTYPE_UINT_UCHAR:
37080 case UINT16_FTYPE_UINT16_INT:
37081 case UINT8_FTYPE_UINT8_INT:
37082 case HI_FTYPE_HI_HI:
37083 case SI_FTYPE_SI_SI:
37084 case DI_FTYPE_DI_DI:
37085 case V16SI_FTYPE_V8DF_V8DF:
37088 case V2DI_FTYPE_V2DI_INT_CONVERT:
37091 nargs_constant = 1;
37093 case V4DI_FTYPE_V4DI_INT_CONVERT:
37096 nargs_constant = 1;
37098 case V8DI_FTYPE_V8DI_INT_CONVERT:
37101 nargs_constant = 1;
37103 case V8HI_FTYPE_V8HI_INT:
37104 case V8HI_FTYPE_V8SF_INT:
37105 case V16HI_FTYPE_V16SF_INT:
37106 case V8HI_FTYPE_V4SF_INT:
37107 case V8SF_FTYPE_V8SF_INT:
37108 case V4SF_FTYPE_V16SF_INT:
37109 case V16SF_FTYPE_V16SF_INT:
37110 case V4SI_FTYPE_V4SI_INT:
37111 case V4SI_FTYPE_V8SI_INT:
37112 case V4HI_FTYPE_V4HI_INT:
37113 case V4DF_FTYPE_V4DF_INT:
37114 case V4DF_FTYPE_V8DF_INT:
37115 case V4SF_FTYPE_V4SF_INT:
37116 case V4SF_FTYPE_V8SF_INT:
37117 case V2DI_FTYPE_V2DI_INT:
37118 case V2DF_FTYPE_V2DF_INT:
37119 case V2DF_FTYPE_V4DF_INT:
37120 case V16HI_FTYPE_V16HI_INT:
37121 case V8SI_FTYPE_V8SI_INT:
37122 case V16SI_FTYPE_V16SI_INT:
37123 case V4SI_FTYPE_V16SI_INT:
37124 case V4DI_FTYPE_V4DI_INT:
37125 case V2DI_FTYPE_V4DI_INT:
37126 case V4DI_FTYPE_V8DI_INT:
37127 case HI_FTYPE_HI_INT:
37128 case QI_FTYPE_V4SF_INT:
37129 case QI_FTYPE_V2DF_INT:
37131 nargs_constant = 1;
37133 case V16QI_FTYPE_V16QI_V16QI_V16QI:
37134 case V8SF_FTYPE_V8SF_V8SF_V8SF:
37135 case V4DF_FTYPE_V4DF_V4DF_V4DF:
37136 case V4SF_FTYPE_V4SF_V4SF_V4SF:
37137 case V2DF_FTYPE_V2DF_V2DF_V2DF:
37138 case V32QI_FTYPE_V32QI_V32QI_V32QI:
37139 case HI_FTYPE_V16SI_V16SI_HI:
37140 case QI_FTYPE_V8DI_V8DI_QI:
37141 case V16HI_FTYPE_V16SI_V16HI_HI:
37142 case V16QI_FTYPE_V16SI_V16QI_HI:
37143 case V16QI_FTYPE_V8DI_V16QI_QI:
37144 case V16SF_FTYPE_V16SF_V16SF_HI:
37145 case V16SF_FTYPE_V16SF_V16SF_V16SF:
37146 case V16SF_FTYPE_V16SF_V16SI_V16SF:
37147 case V16SF_FTYPE_V16SI_V16SF_HI:
37148 case V16SF_FTYPE_V16SI_V16SF_V16SF:
37149 case V16SF_FTYPE_V4SF_V16SF_HI:
37150 case V16SI_FTYPE_SI_V16SI_HI:
37151 case V16SI_FTYPE_V16HI_V16SI_HI:
37152 case V16SI_FTYPE_V16QI_V16SI_HI:
37153 case V16SI_FTYPE_V16SF_V16SI_HI:
37154 case V8SF_FTYPE_V4SF_V8SF_QI:
37155 case V4DF_FTYPE_V2DF_V4DF_QI:
37156 case V8SI_FTYPE_V4SI_V8SI_QI:
37157 case V8SI_FTYPE_SI_V8SI_QI:
37158 case V4SI_FTYPE_V4SI_V4SI_QI:
37159 case V4SI_FTYPE_SI_V4SI_QI:
37160 case V4DI_FTYPE_V2DI_V4DI_QI:
37161 case V4DI_FTYPE_DI_V4DI_QI:
37162 case V2DI_FTYPE_V2DI_V2DI_QI:
37163 case V2DI_FTYPE_DI_V2DI_QI:
37164 case V64QI_FTYPE_V64QI_V64QI_DI:
37165 case V64QI_FTYPE_V16QI_V64QI_DI:
37166 case V64QI_FTYPE_QI_V64QI_DI:
37167 case V32QI_FTYPE_V32QI_V32QI_SI:
37168 case V32QI_FTYPE_V16QI_V32QI_SI:
37169 case V32QI_FTYPE_QI_V32QI_SI:
37170 case V16QI_FTYPE_V16QI_V16QI_HI:
37171 case V16QI_FTYPE_QI_V16QI_HI:
37172 case V32HI_FTYPE_V8HI_V32HI_SI:
37173 case V32HI_FTYPE_HI_V32HI_SI:
37174 case V16HI_FTYPE_V8HI_V16HI_HI:
37175 case V16HI_FTYPE_HI_V16HI_HI:
37176 case V8HI_FTYPE_V8HI_V8HI_QI:
37177 case V8HI_FTYPE_HI_V8HI_QI:
37178 case V8SF_FTYPE_V8HI_V8SF_QI:
37179 case V4SF_FTYPE_V8HI_V4SF_QI:
37180 case V8SI_FTYPE_V8SF_V8SI_QI:
37181 case V4SI_FTYPE_V4SF_V4SI_QI:
37182 case V8DI_FTYPE_V8SF_V8DI_QI:
37183 case V4DI_FTYPE_V4SF_V4DI_QI:
37184 case V2DI_FTYPE_V4SF_V2DI_QI:
37185 case V8SF_FTYPE_V8DI_V8SF_QI:
37186 case V4SF_FTYPE_V4DI_V4SF_QI:
37187 case V4SF_FTYPE_V2DI_V4SF_QI:
37188 case V8DF_FTYPE_V8DI_V8DF_QI:
37189 case V4DF_FTYPE_V4DI_V4DF_QI:
37190 case V2DF_FTYPE_V2DI_V2DF_QI:
37191 case V16QI_FTYPE_V8HI_V16QI_QI:
37192 case V16QI_FTYPE_V16HI_V16QI_HI:
37193 case V16QI_FTYPE_V4SI_V16QI_QI:
37194 case V16QI_FTYPE_V8SI_V16QI_QI:
37195 case V8HI_FTYPE_V4SI_V8HI_QI:
37196 case V8HI_FTYPE_V8SI_V8HI_QI:
37197 case V16QI_FTYPE_V2DI_V16QI_QI:
37198 case V16QI_FTYPE_V4DI_V16QI_QI:
37199 case V8HI_FTYPE_V2DI_V8HI_QI:
37200 case V8HI_FTYPE_V4DI_V8HI_QI:
37201 case V4SI_FTYPE_V2DI_V4SI_QI:
37202 case V4SI_FTYPE_V4DI_V4SI_QI:
37203 case V32QI_FTYPE_V32HI_V32QI_SI:
37204 case HI_FTYPE_V16QI_V16QI_HI:
37205 case SI_FTYPE_V32QI_V32QI_SI:
37206 case DI_FTYPE_V64QI_V64QI_DI:
37207 case QI_FTYPE_V8HI_V8HI_QI:
37208 case HI_FTYPE_V16HI_V16HI_HI:
37209 case SI_FTYPE_V32HI_V32HI_SI:
37210 case QI_FTYPE_V4SI_V4SI_QI:
37211 case QI_FTYPE_V8SI_V8SI_QI:
37212 case QI_FTYPE_V2DI_V2DI_QI:
37213 case QI_FTYPE_V4DI_V4DI_QI:
37214 case V4SF_FTYPE_V2DF_V4SF_QI:
37215 case V4SF_FTYPE_V4DF_V4SF_QI:
37216 case V16SI_FTYPE_V16SI_V16SI_HI:
37217 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37218 case V16SI_FTYPE_V4SI_V16SI_HI:
37219 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37220 case V2DI_FTYPE_V4SI_V2DI_QI:
37221 case V2DI_FTYPE_V8HI_V2DI_QI:
37222 case V2DI_FTYPE_V16QI_V2DI_QI:
37223 case V4DI_FTYPE_V4DI_V4DI_QI:
37224 case V4DI_FTYPE_V4SI_V4DI_QI:
37225 case V4DI_FTYPE_V8HI_V4DI_QI:
37226 case V4DI_FTYPE_V16QI_V4DI_QI:
37227 case V8DI_FTYPE_V8DF_V8DI_QI:
37228 case V4DI_FTYPE_V4DF_V4DI_QI:
37229 case V2DI_FTYPE_V2DF_V2DI_QI:
37230 case V4SI_FTYPE_V4DF_V4SI_QI:
37231 case V4SI_FTYPE_V2DF_V4SI_QI:
37232 case V4SI_FTYPE_V8HI_V4SI_QI:
37233 case V4SI_FTYPE_V16QI_V4SI_QI:
37234 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37235 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37236 case V8DF_FTYPE_V2DF_V8DF_QI:
37237 case V8DF_FTYPE_V4DF_V8DF_QI:
37238 case V8DF_FTYPE_V8DF_V8DF_QI:
37239 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37240 case V8SF_FTYPE_V8SF_V8SF_QI:
37241 case V8SF_FTYPE_V8SI_V8SF_QI:
37242 case V4DF_FTYPE_V4DF_V4DF_QI:
37243 case V4SF_FTYPE_V4SF_V4SF_QI:
37244 case V2DF_FTYPE_V2DF_V2DF_QI:
37245 case V2DF_FTYPE_V4SF_V2DF_QI:
37246 case V2DF_FTYPE_V4SI_V2DF_QI:
37247 case V4SF_FTYPE_V4SI_V4SF_QI:
37248 case V4DF_FTYPE_V4SF_V4DF_QI:
37249 case V4DF_FTYPE_V4SI_V4DF_QI:
37250 case V8SI_FTYPE_V8SI_V8SI_QI:
37251 case V8SI_FTYPE_V8HI_V8SI_QI:
37252 case V8SI_FTYPE_V16QI_V8SI_QI:
37253 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37254 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37255 case V8DF_FTYPE_V8SF_V8DF_QI:
37256 case V8DF_FTYPE_V8SI_V8DF_QI:
37257 case V8DI_FTYPE_DI_V8DI_QI:
37258 case V16SF_FTYPE_V8SF_V16SF_HI:
37259 case V16SI_FTYPE_V8SI_V16SI_HI:
37260 case V16HI_FTYPE_V16HI_V16HI_HI:
37261 case V8HI_FTYPE_V16QI_V8HI_QI:
37262 case V16HI_FTYPE_V16QI_V16HI_HI:
37263 case V32HI_FTYPE_V32HI_V32HI_SI:
37264 case V32HI_FTYPE_V32QI_V32HI_SI:
37265 case V8DI_FTYPE_V16QI_V8DI_QI:
37266 case V8DI_FTYPE_V2DI_V8DI_QI:
37267 case V8DI_FTYPE_V4DI_V8DI_QI:
37268 case V8DI_FTYPE_V8DI_V8DI_QI:
37269 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37270 case V8DI_FTYPE_V8HI_V8DI_QI:
37271 case V8DI_FTYPE_V8SI_V8DI_QI:
37272 case V8HI_FTYPE_V8DI_V8HI_QI:
37273 case V8SF_FTYPE_V8DF_V8SF_QI:
37274 case V8SI_FTYPE_V8DF_V8SI_QI:
37275 case V8SI_FTYPE_V8DI_V8SI_QI:
37276 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37279 case V32QI_FTYPE_V32QI_V32QI_INT:
37280 case V16HI_FTYPE_V16HI_V16HI_INT:
37281 case V16QI_FTYPE_V16QI_V16QI_INT:
37282 case V4DI_FTYPE_V4DI_V4DI_INT:
37283 case V8HI_FTYPE_V8HI_V8HI_INT:
37284 case V8SI_FTYPE_V8SI_V8SI_INT:
37285 case V8SI_FTYPE_V8SI_V4SI_INT:
37286 case V8SF_FTYPE_V8SF_V8SF_INT:
37287 case V8SF_FTYPE_V8SF_V4SF_INT:
37288 case V4SI_FTYPE_V4SI_V4SI_INT:
37289 case V4DF_FTYPE_V4DF_V4DF_INT:
37290 case V16SF_FTYPE_V16SF_V16SF_INT:
37291 case V16SF_FTYPE_V16SF_V4SF_INT:
37292 case V16SI_FTYPE_V16SI_V4SI_INT:
37293 case V4DF_FTYPE_V4DF_V2DF_INT:
37294 case V4SF_FTYPE_V4SF_V4SF_INT:
37295 case V2DI_FTYPE_V2DI_V2DI_INT:
37296 case V4DI_FTYPE_V4DI_V2DI_INT:
37297 case V2DF_FTYPE_V2DF_V2DF_INT:
37298 case QI_FTYPE_V8DI_V8DI_INT:
37299 case QI_FTYPE_V8DF_V8DF_INT:
37300 case QI_FTYPE_V2DF_V2DF_INT:
37301 case QI_FTYPE_V4SF_V4SF_INT:
37302 case HI_FTYPE_V16SI_V16SI_INT:
37303 case HI_FTYPE_V16SF_V16SF_INT:
37305 nargs_constant = 1;
37307 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37310 nargs_constant = 1;
37312 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37315 nargs_constant = 1;
37317 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37320 nargs_constant = 1;
37322 case V2DI_FTYPE_V2DI_UINT_UINT:
37324 nargs_constant = 2;
37326 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37329 nargs_constant = 1;
37331 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37335 nargs_constant = 1;
37337 case QI_FTYPE_V8DF_INT_QI:
37338 case QI_FTYPE_V4DF_INT_QI:
37339 case QI_FTYPE_V2DF_INT_QI:
37340 case HI_FTYPE_V16SF_INT_HI:
37341 case QI_FTYPE_V8SF_INT_QI:
37342 case QI_FTYPE_V4SF_INT_QI:
37345 nargs_constant = 1;
37347 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37351 nargs_constant = 1;
37353 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37357 nargs_constant = 1;
37359 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37360 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37361 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37362 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37363 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37364 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37365 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37366 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37367 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37368 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37369 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37370 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37371 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37372 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37373 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37374 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37375 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37376 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37377 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37378 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37379 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37380 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37381 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37382 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37383 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37384 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37385 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37386 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37387 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37388 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37389 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37390 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37391 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37392 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37393 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37394 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37395 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37396 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37397 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37398 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37399 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37400 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37401 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37402 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37403 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37404 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37405 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37406 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37407 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37408 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37409 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37410 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37411 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37412 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37415 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37416 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37417 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37418 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37419 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37421 nargs_constant = 1;
37423 case QI_FTYPE_V4DI_V4DI_INT_QI:
37424 case QI_FTYPE_V8SI_V8SI_INT_QI:
37425 case QI_FTYPE_V4DF_V4DF_INT_QI:
37426 case QI_FTYPE_V8SF_V8SF_INT_QI:
37427 case QI_FTYPE_V2DI_V2DI_INT_QI:
37428 case QI_FTYPE_V4SI_V4SI_INT_QI:
37429 case QI_FTYPE_V2DF_V2DF_INT_QI:
37430 case QI_FTYPE_V4SF_V4SF_INT_QI:
37431 case DI_FTYPE_V64QI_V64QI_INT_DI:
37432 case SI_FTYPE_V32QI_V32QI_INT_SI:
37433 case HI_FTYPE_V16QI_V16QI_INT_HI:
37434 case SI_FTYPE_V32HI_V32HI_INT_SI:
37435 case HI_FTYPE_V16HI_V16HI_INT_HI:
37436 case QI_FTYPE_V8HI_V8HI_INT_QI:
37439 nargs_constant = 1;
37441 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37443 nargs_constant = 2;
37445 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37446 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37449 case QI_FTYPE_V8DI_V8DI_INT_QI:
37450 case HI_FTYPE_V16SI_V16SI_INT_HI:
37451 case QI_FTYPE_V8DF_V8DF_INT_QI:
37452 case HI_FTYPE_V16SF_V16SF_INT_HI:
37455 nargs_constant = 1;
37457 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37458 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37459 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37460 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37461 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37462 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37463 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37464 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37465 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37466 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37467 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37468 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37469 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37470 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37471 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37472 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37473 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37474 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37475 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37476 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37477 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37478 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37479 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37480 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37481 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37482 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37483 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37484 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37485 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37486 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37489 nargs_constant = 1;
37491 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37492 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37493 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37494 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37495 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37496 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37497 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37498 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37499 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37500 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37501 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37502 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37503 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37504 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37505 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37506 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37507 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37508 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37509 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37510 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37511 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37512 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37513 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37514 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37515 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37516 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37517 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37520 nargs_constant = 1;
37522 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37523 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37524 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37525 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37526 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37527 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37528 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37529 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37530 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37531 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37532 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37536 nargs_constant = 1;
37540 gcc_unreachable ();
37543 gcc_assert (nargs <= ARRAY_SIZE (args));
37545 if (comparison != UNKNOWN)
37547 gcc_assert (nargs == 2);
37548 return ix86_expand_sse_compare (d, exp, target, swap);
37551 if (rmode == VOIDmode || rmode == tmode)
37555 || GET_MODE (target) != tmode
37556 || !insn_p->operand[0].predicate (target, tmode))
37557 target = gen_reg_rtx (tmode);
37558 real_target = target;
37562 real_target = gen_reg_rtx (tmode);
37563 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37566 for (i = 0; i < nargs; i++)
37568 tree arg = CALL_EXPR_ARG (exp, i);
37569 rtx op = expand_normal (arg);
37570 machine_mode mode = insn_p->operand[i + 1].mode;
37571 bool match = insn_p->operand[i + 1].predicate (op, mode);
37573 if (last_arg_count && (i + 1) == nargs)
37575 /* SIMD shift insns take either an 8-bit immediate or
37576 register as count. But builtin functions take int as
37577 count. If count doesn't match, we put it in register. */
37580 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37581 if (!insn_p->operand[i + 1].predicate (op, mode))
37582 op = copy_to_reg (op);
37585 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37586 (!mask_pos && (nargs - i) <= nargs_constant))
37591 case CODE_FOR_avx_vinsertf128v4di:
37592 case CODE_FOR_avx_vextractf128v4di:
37593 error ("the last argument must be an 1-bit immediate");
37596 case CODE_FOR_avx512f_cmpv8di3_mask:
37597 case CODE_FOR_avx512f_cmpv16si3_mask:
37598 case CODE_FOR_avx512f_ucmpv8di3_mask:
37599 case CODE_FOR_avx512f_ucmpv16si3_mask:
37600 case CODE_FOR_avx512vl_cmpv4di3_mask:
37601 case CODE_FOR_avx512vl_cmpv8si3_mask:
37602 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37603 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37604 case CODE_FOR_avx512vl_cmpv2di3_mask:
37605 case CODE_FOR_avx512vl_cmpv4si3_mask:
37606 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37607 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37608 error ("the last argument must be a 3-bit immediate");
37611 case CODE_FOR_sse4_1_roundsd:
37612 case CODE_FOR_sse4_1_roundss:
37614 case CODE_FOR_sse4_1_roundpd:
37615 case CODE_FOR_sse4_1_roundps:
37616 case CODE_FOR_avx_roundpd256:
37617 case CODE_FOR_avx_roundps256:
37619 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37620 case CODE_FOR_sse4_1_roundps_sfix:
37621 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37622 case CODE_FOR_avx_roundps_sfix256:
37624 case CODE_FOR_sse4_1_blendps:
37625 case CODE_FOR_avx_blendpd256:
37626 case CODE_FOR_avx_vpermilv4df:
37627 case CODE_FOR_avx_vpermilv4df_mask:
37628 case CODE_FOR_avx512f_getmantv8df_mask:
37629 case CODE_FOR_avx512f_getmantv16sf_mask:
37630 case CODE_FOR_avx512vl_getmantv8sf_mask:
37631 case CODE_FOR_avx512vl_getmantv4df_mask:
37632 case CODE_FOR_avx512vl_getmantv4sf_mask:
37633 case CODE_FOR_avx512vl_getmantv2df_mask:
37634 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37635 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37636 case CODE_FOR_avx512dq_rangepv4df_mask:
37637 case CODE_FOR_avx512dq_rangepv8sf_mask:
37638 case CODE_FOR_avx512dq_rangepv2df_mask:
37639 case CODE_FOR_avx512dq_rangepv4sf_mask:
37640 case CODE_FOR_avx_shufpd256_mask:
37641 error ("the last argument must be a 4-bit immediate");
37644 case CODE_FOR_sha1rnds4:
37645 case CODE_FOR_sse4_1_blendpd:
37646 case CODE_FOR_avx_vpermilv2df:
37647 case CODE_FOR_avx_vpermilv2df_mask:
37648 case CODE_FOR_xop_vpermil2v2df3:
37649 case CODE_FOR_xop_vpermil2v4sf3:
37650 case CODE_FOR_xop_vpermil2v4df3:
37651 case CODE_FOR_xop_vpermil2v8sf3:
37652 case CODE_FOR_avx512f_vinsertf32x4_mask:
37653 case CODE_FOR_avx512f_vinserti32x4_mask:
37654 case CODE_FOR_avx512f_vextractf32x4_mask:
37655 case CODE_FOR_avx512f_vextracti32x4_mask:
37656 case CODE_FOR_sse2_shufpd:
37657 case CODE_FOR_sse2_shufpd_mask:
37658 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37659 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37660 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37661 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37662 error ("the last argument must be a 2-bit immediate");
37665 case CODE_FOR_avx_vextractf128v4df:
37666 case CODE_FOR_avx_vextractf128v8sf:
37667 case CODE_FOR_avx_vextractf128v8si:
37668 case CODE_FOR_avx_vinsertf128v4df:
37669 case CODE_FOR_avx_vinsertf128v8sf:
37670 case CODE_FOR_avx_vinsertf128v8si:
37671 case CODE_FOR_avx512f_vinsertf64x4_mask:
37672 case CODE_FOR_avx512f_vinserti64x4_mask:
37673 case CODE_FOR_avx512f_vextractf64x4_mask:
37674 case CODE_FOR_avx512f_vextracti64x4_mask:
37675 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37676 case CODE_FOR_avx512dq_vinserti32x8_mask:
37677 case CODE_FOR_avx512vl_vinsertv4df:
37678 case CODE_FOR_avx512vl_vinsertv4di:
37679 case CODE_FOR_avx512vl_vinsertv8sf:
37680 case CODE_FOR_avx512vl_vinsertv8si:
37681 error ("the last argument must be a 1-bit immediate");
37684 case CODE_FOR_avx_vmcmpv2df3:
37685 case CODE_FOR_avx_vmcmpv4sf3:
37686 case CODE_FOR_avx_cmpv2df3:
37687 case CODE_FOR_avx_cmpv4sf3:
37688 case CODE_FOR_avx_cmpv4df3:
37689 case CODE_FOR_avx_cmpv8sf3:
37690 case CODE_FOR_avx512f_cmpv8df3_mask:
37691 case CODE_FOR_avx512f_cmpv16sf3_mask:
37692 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37693 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37694 error ("the last argument must be a 5-bit immediate");
37698 switch (nargs_constant)
37701 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37702 (!mask_pos && (nargs - i) == nargs_constant))
37704 error ("the next to last argument must be an 8-bit immediate");
37708 error ("the last argument must be an 8-bit immediate");
37711 gcc_unreachable ();
37718 if (VECTOR_MODE_P (mode))
37719 op = safe_vector_operand (op, mode);
37721 /* If we aren't optimizing, only allow one memory operand to
37723 if (memory_operand (op, mode))
37726 op = fixup_modeless_constant (op, mode);
37728 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37730 if (optimize || !match || num_memory > 1)
37731 op = copy_to_mode_reg (mode, op);
37735 op = copy_to_reg (op);
37736 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37741 args[i].mode = mode;
37747 pat = GEN_FCN (icode) (real_target, args[0].op);
37750 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37753 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37757 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37758 args[2].op, args[3].op);
37761 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37762 args[2].op, args[3].op, args[4].op);
37765 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37766 args[2].op, args[3].op, args[4].op,
37770 gcc_unreachable ();
37780 /* Transform pattern of following layout:
37783 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37791 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37795 (parallel [ A B ... ]) */
37798 ix86_erase_embedded_rounding (rtx pat)
37800 if (GET_CODE (pat) == INSN)
37801 pat = PATTERN (pat);
37803 gcc_assert (GET_CODE (pat) == PARALLEL);
37805 if (XVECLEN (pat, 0) == 2)
37807 rtx p0 = XVECEXP (pat, 0, 0);
37808 rtx p1 = XVECEXP (pat, 0, 1);
37810 gcc_assert (GET_CODE (p0) == SET
37811 && GET_CODE (p1) == UNSPEC
37812 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37818 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37822 for (; i < XVECLEN (pat, 0); ++i)
37824 rtx elem = XVECEXP (pat, 0, i);
37825 if (GET_CODE (elem) != UNSPEC
37826 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37830 /* No more than 1 occurence was removed. */
37831 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37833 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37837 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37840 ix86_expand_sse_comi_round (const struct builtin_description *d,
37841 tree exp, rtx target)
37844 tree arg0 = CALL_EXPR_ARG (exp, 0);
37845 tree arg1 = CALL_EXPR_ARG (exp, 1);
37846 tree arg2 = CALL_EXPR_ARG (exp, 2);
37847 tree arg3 = CALL_EXPR_ARG (exp, 3);
37848 rtx op0 = expand_normal (arg0);
37849 rtx op1 = expand_normal (arg1);
37850 rtx op2 = expand_normal (arg2);
37851 rtx op3 = expand_normal (arg3);
37852 enum insn_code icode = d->icode;
37853 const struct insn_data_d *insn_p = &insn_data[icode];
37854 machine_mode mode0 = insn_p->operand[0].mode;
37855 machine_mode mode1 = insn_p->operand[1].mode;
37856 enum rtx_code comparison = UNEQ;
37857 bool need_ucomi = false;
37859 /* See avxintrin.h for values. */
37860 enum rtx_code comi_comparisons[32] =
37862 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37863 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37864 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37866 bool need_ucomi_values[32] =
37868 true, false, false, true, true, false, false, true,
37869 true, false, false, true, true, false, false, true,
37870 false, true, true, false, false, true, true, false,
37871 false, true, true, false, false, true, true, false
37874 if (!CONST_INT_P (op2))
37876 error ("the third argument must be comparison constant");
37879 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37881 error ("incorrect comparison mode");
37885 if (!insn_p->operand[2].predicate (op3, SImode))
37887 error ("incorrect rounding operand");
37891 comparison = comi_comparisons[INTVAL (op2)];
37892 need_ucomi = need_ucomi_values[INTVAL (op2)];
37894 if (VECTOR_MODE_P (mode0))
37895 op0 = safe_vector_operand (op0, mode0);
37896 if (VECTOR_MODE_P (mode1))
37897 op1 = safe_vector_operand (op1, mode1);
37899 target = gen_reg_rtx (SImode);
37900 emit_move_insn (target, const0_rtx);
37901 target = gen_rtx_SUBREG (QImode, target, 0);
37903 if ((optimize && !register_operand (op0, mode0))
37904 || !insn_p->operand[0].predicate (op0, mode0))
37905 op0 = copy_to_mode_reg (mode0, op0);
37906 if ((optimize && !register_operand (op1, mode1))
37907 || !insn_p->operand[1].predicate (op1, mode1))
37908 op1 = copy_to_mode_reg (mode1, op1);
37911 icode = icode == CODE_FOR_sse_comi_round
37912 ? CODE_FOR_sse_ucomi_round
37913 : CODE_FOR_sse2_ucomi_round;
37915 pat = GEN_FCN (icode) (op0, op1, op3);
37919 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37920 if (INTVAL (op3) == NO_ROUND)
37922 pat = ix86_erase_embedded_rounding (pat);
37926 set_dst = SET_DEST (pat);
37930 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37931 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37935 emit_insn (gen_rtx_SET (VOIDmode,
37936 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37937 gen_rtx_fmt_ee (comparison, QImode,
37941 return SUBREG_REG (target);
37945 ix86_expand_round_builtin (const struct builtin_description *d,
37946 tree exp, rtx target)
37949 unsigned int i, nargs;
37955 enum insn_code icode = d->icode;
37956 const struct insn_data_d *insn_p = &insn_data[icode];
37957 machine_mode tmode = insn_p->operand[0].mode;
37958 unsigned int nargs_constant = 0;
37959 unsigned int redundant_embed_rnd = 0;
37961 switch ((enum ix86_builtin_func_type) d->flag)
37963 case UINT64_FTYPE_V2DF_INT:
37964 case UINT64_FTYPE_V4SF_INT:
37965 case UINT_FTYPE_V2DF_INT:
37966 case UINT_FTYPE_V4SF_INT:
37967 case INT64_FTYPE_V2DF_INT:
37968 case INT64_FTYPE_V4SF_INT:
37969 case INT_FTYPE_V2DF_INT:
37970 case INT_FTYPE_V4SF_INT:
37973 case V4SF_FTYPE_V4SF_UINT_INT:
37974 case V4SF_FTYPE_V4SF_UINT64_INT:
37975 case V2DF_FTYPE_V2DF_UINT64_INT:
37976 case V4SF_FTYPE_V4SF_INT_INT:
37977 case V4SF_FTYPE_V4SF_INT64_INT:
37978 case V2DF_FTYPE_V2DF_INT64_INT:
37979 case V4SF_FTYPE_V4SF_V4SF_INT:
37980 case V2DF_FTYPE_V2DF_V2DF_INT:
37981 case V4SF_FTYPE_V4SF_V2DF_INT:
37982 case V2DF_FTYPE_V2DF_V4SF_INT:
37985 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37986 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37987 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37988 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37989 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37990 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37991 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37992 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37993 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37994 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37995 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37996 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37997 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37998 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
38001 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
38002 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
38003 nargs_constant = 2;
38006 case INT_FTYPE_V4SF_V4SF_INT_INT:
38007 case INT_FTYPE_V2DF_V2DF_INT_INT:
38008 return ix86_expand_sse_comi_round (d, exp, target);
38009 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
38010 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
38011 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
38012 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
38013 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
38014 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
38017 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
38018 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
38019 nargs_constant = 4;
38022 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
38023 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
38024 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
38025 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
38026 nargs_constant = 3;
38029 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
38030 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
38031 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
38032 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
38034 nargs_constant = 4;
38036 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
38037 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
38038 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
38039 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
38041 nargs_constant = 3;
38044 gcc_unreachable ();
38046 gcc_assert (nargs <= ARRAY_SIZE (args));
38050 || GET_MODE (target) != tmode
38051 || !insn_p->operand[0].predicate (target, tmode))
38052 target = gen_reg_rtx (tmode);
38054 for (i = 0; i < nargs; i++)
38056 tree arg = CALL_EXPR_ARG (exp, i);
38057 rtx op = expand_normal (arg);
38058 machine_mode mode = insn_p->operand[i + 1].mode;
38059 bool match = insn_p->operand[i + 1].predicate (op, mode);
38061 if (i == nargs - nargs_constant)
38067 case CODE_FOR_avx512f_getmantv8df_mask_round:
38068 case CODE_FOR_avx512f_getmantv16sf_mask_round:
38069 case CODE_FOR_avx512f_vgetmantv2df_round:
38070 case CODE_FOR_avx512f_vgetmantv4sf_round:
38071 error ("the immediate argument must be a 4-bit immediate");
38073 case CODE_FOR_avx512f_cmpv8df3_mask_round:
38074 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
38075 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
38076 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
38077 error ("the immediate argument must be a 5-bit immediate");
38080 error ("the immediate argument must be an 8-bit immediate");
38085 else if (i == nargs-1)
38087 if (!insn_p->operand[nargs].predicate (op, SImode))
38089 error ("incorrect rounding operand");
38093 /* If there is no rounding use normal version of the pattern. */
38094 if (INTVAL (op) == NO_ROUND)
38095 redundant_embed_rnd = 1;
38099 if (VECTOR_MODE_P (mode))
38100 op = safe_vector_operand (op, mode);
38102 op = fixup_modeless_constant (op, mode);
38104 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38106 if (optimize || !match)
38107 op = copy_to_mode_reg (mode, op);
38111 op = copy_to_reg (op);
38112 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38117 args[i].mode = mode;
38123 pat = GEN_FCN (icode) (target, args[0].op);
38126 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38129 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38133 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38134 args[2].op, args[3].op);
38137 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38138 args[2].op, args[3].op, args[4].op);
38141 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38142 args[2].op, args[3].op, args[4].op,
38146 gcc_unreachable ();
38152 if (redundant_embed_rnd)
38153 pat = ix86_erase_embedded_rounding (pat);
38159 /* Subroutine of ix86_expand_builtin to take care of special insns
38160 with variable number of operands. */
38163 ix86_expand_special_args_builtin (const struct builtin_description *d,
38164 tree exp, rtx target)
38168 unsigned int i, nargs, arg_adjust, memory;
38169 bool aligned_mem = false;
38175 enum insn_code icode = d->icode;
38176 bool last_arg_constant = false;
38177 const struct insn_data_d *insn_p = &insn_data[icode];
38178 machine_mode tmode = insn_p->operand[0].mode;
38179 enum { load, store } klass;
38181 switch ((enum ix86_builtin_func_type) d->flag)
38183 case VOID_FTYPE_VOID:
38184 emit_insn (GEN_FCN (icode) (target));
38186 case VOID_FTYPE_UINT64:
38187 case VOID_FTYPE_UNSIGNED:
38193 case INT_FTYPE_VOID:
38194 case USHORT_FTYPE_VOID:
38195 case UINT64_FTYPE_VOID:
38196 case UNSIGNED_FTYPE_VOID:
38201 case UINT64_FTYPE_PUNSIGNED:
38202 case V2DI_FTYPE_PV2DI:
38203 case V4DI_FTYPE_PV4DI:
38204 case V32QI_FTYPE_PCCHAR:
38205 case V16QI_FTYPE_PCCHAR:
38206 case V8SF_FTYPE_PCV4SF:
38207 case V8SF_FTYPE_PCFLOAT:
38208 case V4SF_FTYPE_PCFLOAT:
38209 case V4DF_FTYPE_PCV2DF:
38210 case V4DF_FTYPE_PCDOUBLE:
38211 case V2DF_FTYPE_PCDOUBLE:
38212 case VOID_FTYPE_PVOID:
38213 case V16SI_FTYPE_PV4SI:
38214 case V16SF_FTYPE_PV4SF:
38215 case V8DI_FTYPE_PV4DI:
38216 case V8DI_FTYPE_PV8DI:
38217 case V8DF_FTYPE_PV4DF:
38223 case CODE_FOR_sse4_1_movntdqa:
38224 case CODE_FOR_avx2_movntdqa:
38225 case CODE_FOR_avx512f_movntdqa:
38226 aligned_mem = true;
38232 case VOID_FTYPE_PV2SF_V4SF:
38233 case VOID_FTYPE_PV8DI_V8DI:
38234 case VOID_FTYPE_PV4DI_V4DI:
38235 case VOID_FTYPE_PV2DI_V2DI:
38236 case VOID_FTYPE_PCHAR_V32QI:
38237 case VOID_FTYPE_PCHAR_V16QI:
38238 case VOID_FTYPE_PFLOAT_V16SF:
38239 case VOID_FTYPE_PFLOAT_V8SF:
38240 case VOID_FTYPE_PFLOAT_V4SF:
38241 case VOID_FTYPE_PDOUBLE_V8DF:
38242 case VOID_FTYPE_PDOUBLE_V4DF:
38243 case VOID_FTYPE_PDOUBLE_V2DF:
38244 case VOID_FTYPE_PLONGLONG_LONGLONG:
38245 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38246 case VOID_FTYPE_PINT_INT:
38249 /* Reserve memory operand for target. */
38250 memory = ARRAY_SIZE (args);
38253 /* These builtins and instructions require the memory
38254 to be properly aligned. */
38255 case CODE_FOR_avx_movntv4di:
38256 case CODE_FOR_sse2_movntv2di:
38257 case CODE_FOR_avx_movntv8sf:
38258 case CODE_FOR_sse_movntv4sf:
38259 case CODE_FOR_sse4a_vmmovntv4sf:
38260 case CODE_FOR_avx_movntv4df:
38261 case CODE_FOR_sse2_movntv2df:
38262 case CODE_FOR_sse4a_vmmovntv2df:
38263 case CODE_FOR_sse2_movntidi:
38264 case CODE_FOR_sse_movntq:
38265 case CODE_FOR_sse2_movntisi:
38266 case CODE_FOR_avx512f_movntv16sf:
38267 case CODE_FOR_avx512f_movntv8df:
38268 case CODE_FOR_avx512f_movntv8di:
38269 aligned_mem = true;
38275 case V4SF_FTYPE_V4SF_PCV2SF:
38276 case V2DF_FTYPE_V2DF_PCDOUBLE:
38281 case V8SF_FTYPE_PCV8SF_V8SI:
38282 case V4DF_FTYPE_PCV4DF_V4DI:
38283 case V4SF_FTYPE_PCV4SF_V4SI:
38284 case V2DF_FTYPE_PCV2DF_V2DI:
38285 case V8SI_FTYPE_PCV8SI_V8SI:
38286 case V4DI_FTYPE_PCV4DI_V4DI:
38287 case V4SI_FTYPE_PCV4SI_V4SI:
38288 case V2DI_FTYPE_PCV2DI_V2DI:
38293 case VOID_FTYPE_PV8DF_V8DF_QI:
38294 case VOID_FTYPE_PV4DF_V4DF_QI:
38295 case VOID_FTYPE_PV2DF_V2DF_QI:
38296 case VOID_FTYPE_PV16SF_V16SF_HI:
38297 case VOID_FTYPE_PV8SF_V8SF_QI:
38298 case VOID_FTYPE_PV4SF_V4SF_QI:
38299 case VOID_FTYPE_PV8DI_V8DI_QI:
38300 case VOID_FTYPE_PV4DI_V4DI_QI:
38301 case VOID_FTYPE_PV2DI_V2DI_QI:
38302 case VOID_FTYPE_PV16SI_V16SI_HI:
38303 case VOID_FTYPE_PV8SI_V8SI_QI:
38304 case VOID_FTYPE_PV4SI_V4SI_QI:
38307 /* These builtins and instructions require the memory
38308 to be properly aligned. */
38309 case CODE_FOR_avx512f_storev16sf_mask:
38310 case CODE_FOR_avx512f_storev16si_mask:
38311 case CODE_FOR_avx512f_storev8df_mask:
38312 case CODE_FOR_avx512f_storev8di_mask:
38313 case CODE_FOR_avx512vl_storev8sf_mask:
38314 case CODE_FOR_avx512vl_storev8si_mask:
38315 case CODE_FOR_avx512vl_storev4df_mask:
38316 case CODE_FOR_avx512vl_storev4di_mask:
38317 case CODE_FOR_avx512vl_storev4sf_mask:
38318 case CODE_FOR_avx512vl_storev4si_mask:
38319 case CODE_FOR_avx512vl_storev2df_mask:
38320 case CODE_FOR_avx512vl_storev2di_mask:
38321 aligned_mem = true;
38327 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38328 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38329 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38330 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38331 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38332 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38333 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38334 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38335 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38336 case VOID_FTYPE_PFLOAT_V4SF_QI:
38337 case VOID_FTYPE_PV8SI_V8DI_QI:
38338 case VOID_FTYPE_PV8HI_V8DI_QI:
38339 case VOID_FTYPE_PV16HI_V16SI_HI:
38340 case VOID_FTYPE_PV16QI_V8DI_QI:
38341 case VOID_FTYPE_PV16QI_V16SI_HI:
38342 case VOID_FTYPE_PV4SI_V4DI_QI:
38343 case VOID_FTYPE_PV4SI_V2DI_QI:
38344 case VOID_FTYPE_PV8HI_V4DI_QI:
38345 case VOID_FTYPE_PV8HI_V2DI_QI:
38346 case VOID_FTYPE_PV8HI_V8SI_QI:
38347 case VOID_FTYPE_PV8HI_V4SI_QI:
38348 case VOID_FTYPE_PV16QI_V4DI_QI:
38349 case VOID_FTYPE_PV16QI_V2DI_QI:
38350 case VOID_FTYPE_PV16QI_V8SI_QI:
38351 case VOID_FTYPE_PV16QI_V4SI_QI:
38352 case VOID_FTYPE_PV8HI_V8HI_QI:
38353 case VOID_FTYPE_PV16HI_V16HI_HI:
38354 case VOID_FTYPE_PV32HI_V32HI_SI:
38355 case VOID_FTYPE_PV16QI_V16QI_HI:
38356 case VOID_FTYPE_PV32QI_V32QI_SI:
38357 case VOID_FTYPE_PV64QI_V64QI_DI:
38360 /* Reserve memory operand for target. */
38361 memory = ARRAY_SIZE (args);
38363 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38364 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38365 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38366 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38367 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38368 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38369 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38370 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38371 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38372 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38373 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38374 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38375 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38376 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38377 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38378 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38379 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38380 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38381 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38382 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38388 /* These builtins and instructions require the memory
38389 to be properly aligned. */
38390 case CODE_FOR_avx512f_loadv16sf_mask:
38391 case CODE_FOR_avx512f_loadv16si_mask:
38392 case CODE_FOR_avx512f_loadv8df_mask:
38393 case CODE_FOR_avx512f_loadv8di_mask:
38394 case CODE_FOR_avx512vl_loadv8sf_mask:
38395 case CODE_FOR_avx512vl_loadv8si_mask:
38396 case CODE_FOR_avx512vl_loadv4df_mask:
38397 case CODE_FOR_avx512vl_loadv4di_mask:
38398 case CODE_FOR_avx512vl_loadv4sf_mask:
38399 case CODE_FOR_avx512vl_loadv4si_mask:
38400 case CODE_FOR_avx512vl_loadv2df_mask:
38401 case CODE_FOR_avx512vl_loadv2di_mask:
38402 case CODE_FOR_avx512bw_loadv64qi_mask:
38403 case CODE_FOR_avx512vl_loadv32qi_mask:
38404 case CODE_FOR_avx512vl_loadv16qi_mask:
38405 case CODE_FOR_avx512bw_loadv32hi_mask:
38406 case CODE_FOR_avx512vl_loadv16hi_mask:
38407 case CODE_FOR_avx512vl_loadv8hi_mask:
38408 aligned_mem = true;
38414 case VOID_FTYPE_UINT_UINT_UINT:
38415 case VOID_FTYPE_UINT64_UINT_UINT:
38416 case UCHAR_FTYPE_UINT_UINT_UINT:
38417 case UCHAR_FTYPE_UINT64_UINT_UINT:
38420 memory = ARRAY_SIZE (args);
38421 last_arg_constant = true;
38424 gcc_unreachable ();
38427 gcc_assert (nargs <= ARRAY_SIZE (args));
38429 if (klass == store)
38431 arg = CALL_EXPR_ARG (exp, 0);
38432 op = expand_normal (arg);
38433 gcc_assert (target == 0);
38436 op = ix86_zero_extend_to_Pmode (op);
38437 target = gen_rtx_MEM (tmode, op);
38438 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38439 on it. Try to improve it using get_pointer_alignment,
38440 and if the special builtin is one that requires strict
38441 mode alignment, also from it's GET_MODE_ALIGNMENT.
38442 Failure to do so could lead to ix86_legitimate_combined_insn
38443 rejecting all changes to such insns. */
38444 unsigned int align = get_pointer_alignment (arg);
38445 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38446 align = GET_MODE_ALIGNMENT (tmode);
38447 if (MEM_ALIGN (target) < align)
38448 set_mem_align (target, align);
38451 target = force_reg (tmode, op);
38459 || !register_operand (target, tmode)
38460 || GET_MODE (target) != tmode)
38461 target = gen_reg_rtx (tmode);
38464 for (i = 0; i < nargs; i++)
38466 machine_mode mode = insn_p->operand[i + 1].mode;
38469 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38470 op = expand_normal (arg);
38471 match = insn_p->operand[i + 1].predicate (op, mode);
38473 if (last_arg_constant && (i + 1) == nargs)
38477 if (icode == CODE_FOR_lwp_lwpvalsi3
38478 || icode == CODE_FOR_lwp_lwpinssi3
38479 || icode == CODE_FOR_lwp_lwpvaldi3
38480 || icode == CODE_FOR_lwp_lwpinsdi3)
38481 error ("the last argument must be a 32-bit immediate");
38483 error ("the last argument must be an 8-bit immediate");
38491 /* This must be the memory operand. */
38492 op = ix86_zero_extend_to_Pmode (op);
38493 op = gen_rtx_MEM (mode, op);
38494 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38495 on it. Try to improve it using get_pointer_alignment,
38496 and if the special builtin is one that requires strict
38497 mode alignment, also from it's GET_MODE_ALIGNMENT.
38498 Failure to do so could lead to ix86_legitimate_combined_insn
38499 rejecting all changes to such insns. */
38500 unsigned int align = get_pointer_alignment (arg);
38501 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38502 align = GET_MODE_ALIGNMENT (mode);
38503 if (MEM_ALIGN (op) < align)
38504 set_mem_align (op, align);
38508 /* This must be register. */
38509 if (VECTOR_MODE_P (mode))
38510 op = safe_vector_operand (op, mode);
38512 op = fixup_modeless_constant (op, mode);
38514 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38515 op = copy_to_mode_reg (mode, op);
38518 op = copy_to_reg (op);
38519 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38525 args[i].mode = mode;
38531 pat = GEN_FCN (icode) (target);
38534 pat = GEN_FCN (icode) (target, args[0].op);
38537 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38540 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38543 gcc_unreachable ();
38549 return klass == store ? 0 : target;
38552 /* Return the integer constant in ARG. Constrain it to be in the range
38553 of the subparts of VEC_TYPE; issue an error if not. */
38556 get_element_number (tree vec_type, tree arg)
38558 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38560 if (!tree_fits_uhwi_p (arg)
38561 || (elt = tree_to_uhwi (arg), elt > max))
38563 error ("selector must be an integer constant in the range 0..%wi", max);
38570 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38571 ix86_expand_vector_init. We DO have language-level syntax for this, in
38572 the form of (type){ init-list }. Except that since we can't place emms
38573 instructions from inside the compiler, we can't allow the use of MMX
38574 registers unless the user explicitly asks for it. So we do *not* define
38575 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38576 we have builtins invoked by mmintrin.h that gives us license to emit
38577 these sorts of instructions. */
38580 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38582 machine_mode tmode = TYPE_MODE (type);
38583 machine_mode inner_mode = GET_MODE_INNER (tmode);
38584 int i, n_elt = GET_MODE_NUNITS (tmode);
38585 rtvec v = rtvec_alloc (n_elt);
38587 gcc_assert (VECTOR_MODE_P (tmode));
38588 gcc_assert (call_expr_nargs (exp) == n_elt);
38590 for (i = 0; i < n_elt; ++i)
38592 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38593 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38596 if (!target || !register_operand (target, tmode))
38597 target = gen_reg_rtx (tmode);
38599 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38603 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38604 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38605 had a language-level syntax for referencing vector elements. */
38608 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38610 machine_mode tmode, mode0;
38615 arg0 = CALL_EXPR_ARG (exp, 0);
38616 arg1 = CALL_EXPR_ARG (exp, 1);
38618 op0 = expand_normal (arg0);
38619 elt = get_element_number (TREE_TYPE (arg0), arg1);
38621 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38622 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38623 gcc_assert (VECTOR_MODE_P (mode0));
38625 op0 = force_reg (mode0, op0);
38627 if (optimize || !target || !register_operand (target, tmode))
38628 target = gen_reg_rtx (tmode);
38630 ix86_expand_vector_extract (true, target, op0, elt);
38635 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38636 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38637 a language-level syntax for referencing vector elements. */
38640 ix86_expand_vec_set_builtin (tree exp)
38642 machine_mode tmode, mode1;
38643 tree arg0, arg1, arg2;
38645 rtx op0, op1, target;
38647 arg0 = CALL_EXPR_ARG (exp, 0);
38648 arg1 = CALL_EXPR_ARG (exp, 1);
38649 arg2 = CALL_EXPR_ARG (exp, 2);
38651 tmode = TYPE_MODE (TREE_TYPE (arg0));
38652 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38653 gcc_assert (VECTOR_MODE_P (tmode));
38655 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38656 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38657 elt = get_element_number (TREE_TYPE (arg0), arg2);
38659 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38660 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38662 op0 = force_reg (tmode, op0);
38663 op1 = force_reg (mode1, op1);
38665 /* OP0 is the source of these builtin functions and shouldn't be
38666 modified. Create a copy, use it and return it as target. */
38667 target = gen_reg_rtx (tmode);
38668 emit_move_insn (target, op0);
38669 ix86_expand_vector_set (true, target, op1, elt);
38674 /* Emit conditional move of SRC to DST with condition
38677 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38683 t = ix86_expand_compare (code, op1, op2);
38684 emit_insn (gen_rtx_SET (VOIDmode, dst,
38685 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38690 rtx nomove = gen_label_rtx ();
38691 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38692 const0_rtx, GET_MODE (op1), 1, nomove);
38693 emit_move_insn (dst, src);
38694 emit_label (nomove);
38698 /* Choose max of DST and SRC and put it to DST. */
38700 ix86_emit_move_max (rtx dst, rtx src)
38702 ix86_emit_cmove (dst, src, LTU, dst, src);
38705 /* Expand an expression EXP that calls a built-in function,
38706 with result going to TARGET if that's convenient
38707 (and in mode MODE if that's convenient).
38708 SUBTARGET may be used as the target for computing one of EXP's operands.
38709 IGNORE is nonzero if the value is to be ignored. */
38712 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38713 machine_mode mode, int ignore)
38715 const struct builtin_description *d;
38717 enum insn_code icode;
38718 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38719 tree arg0, arg1, arg2, arg3, arg4;
38720 rtx op0, op1, op2, op3, op4, pat, insn;
38721 machine_mode mode0, mode1, mode2, mode3, mode4;
38722 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38724 /* For CPU builtins that can be folded, fold first and expand the fold. */
38727 case IX86_BUILTIN_CPU_INIT:
38729 /* Make it call __cpu_indicator_init in libgcc. */
38730 tree call_expr, fndecl, type;
38731 type = build_function_type_list (integer_type_node, NULL_TREE);
38732 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38733 call_expr = build_call_expr (fndecl, 0);
38734 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38736 case IX86_BUILTIN_CPU_IS:
38737 case IX86_BUILTIN_CPU_SUPPORTS:
38739 tree arg0 = CALL_EXPR_ARG (exp, 0);
38740 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38741 gcc_assert (fold_expr != NULL_TREE);
38742 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38746 /* Determine whether the builtin function is available under the current ISA.
38747 Originally the builtin was not created if it wasn't applicable to the
38748 current ISA based on the command line switches. With function specific
38749 options, we need to check in the context of the function making the call
38750 whether it is supported. */
38751 if (ix86_builtins_isa[fcode].isa
38752 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38754 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38755 NULL, (enum fpmath_unit) 0, false);
38758 error ("%qE needs unknown isa option", fndecl);
38761 gcc_assert (opts != NULL);
38762 error ("%qE needs isa option %s", fndecl, opts);
38770 case IX86_BUILTIN_BNDMK:
38772 || GET_MODE (target) != BNDmode
38773 || !register_operand (target, BNDmode))
38774 target = gen_reg_rtx (BNDmode);
38776 arg0 = CALL_EXPR_ARG (exp, 0);
38777 arg1 = CALL_EXPR_ARG (exp, 1);
38779 op0 = expand_normal (arg0);
38780 op1 = expand_normal (arg1);
38782 if (!register_operand (op0, Pmode))
38783 op0 = ix86_zero_extend_to_Pmode (op0);
38784 if (!register_operand (op1, Pmode))
38785 op1 = ix86_zero_extend_to_Pmode (op1);
38787 /* Builtin arg1 is size of block but instruction op1 should
38789 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38790 NULL_RTX, 1, OPTAB_DIRECT);
38792 emit_insn (BNDmode == BND64mode
38793 ? gen_bnd64_mk (target, op0, op1)
38794 : gen_bnd32_mk (target, op0, op1));
38797 case IX86_BUILTIN_BNDSTX:
38798 arg0 = CALL_EXPR_ARG (exp, 0);
38799 arg1 = CALL_EXPR_ARG (exp, 1);
38800 arg2 = CALL_EXPR_ARG (exp, 2);
38802 op0 = expand_normal (arg0);
38803 op1 = expand_normal (arg1);
38804 op2 = expand_normal (arg2);
38806 if (!register_operand (op0, Pmode))
38807 op0 = ix86_zero_extend_to_Pmode (op0);
38808 if (!register_operand (op1, BNDmode))
38809 op1 = copy_to_mode_reg (BNDmode, op1);
38810 if (!register_operand (op2, Pmode))
38811 op2 = ix86_zero_extend_to_Pmode (op2);
38813 emit_insn (BNDmode == BND64mode
38814 ? gen_bnd64_stx (op2, op0, op1)
38815 : gen_bnd32_stx (op2, op0, op1));
38818 case IX86_BUILTIN_BNDLDX:
38820 || GET_MODE (target) != BNDmode
38821 || !register_operand (target, BNDmode))
38822 target = gen_reg_rtx (BNDmode);
38824 arg0 = CALL_EXPR_ARG (exp, 0);
38825 arg1 = CALL_EXPR_ARG (exp, 1);
38827 op0 = expand_normal (arg0);
38828 op1 = expand_normal (arg1);
38830 if (!register_operand (op0, Pmode))
38831 op0 = ix86_zero_extend_to_Pmode (op0);
38832 if (!register_operand (op1, Pmode))
38833 op1 = ix86_zero_extend_to_Pmode (op1);
38835 emit_insn (BNDmode == BND64mode
38836 ? gen_bnd64_ldx (target, op0, op1)
38837 : gen_bnd32_ldx (target, op0, op1));
38840 case IX86_BUILTIN_BNDCL:
38841 arg0 = CALL_EXPR_ARG (exp, 0);
38842 arg1 = CALL_EXPR_ARG (exp, 1);
38844 op0 = expand_normal (arg0);
38845 op1 = expand_normal (arg1);
38847 if (!register_operand (op0, Pmode))
38848 op0 = ix86_zero_extend_to_Pmode (op0);
38849 if (!register_operand (op1, BNDmode))
38850 op1 = copy_to_mode_reg (BNDmode, op1);
38852 emit_insn (BNDmode == BND64mode
38853 ? gen_bnd64_cl (op1, op0)
38854 : gen_bnd32_cl (op1, op0));
38857 case IX86_BUILTIN_BNDCU:
38858 arg0 = CALL_EXPR_ARG (exp, 0);
38859 arg1 = CALL_EXPR_ARG (exp, 1);
38861 op0 = expand_normal (arg0);
38862 op1 = expand_normal (arg1);
38864 if (!register_operand (op0, Pmode))
38865 op0 = ix86_zero_extend_to_Pmode (op0);
38866 if (!register_operand (op1, BNDmode))
38867 op1 = copy_to_mode_reg (BNDmode, op1);
38869 emit_insn (BNDmode == BND64mode
38870 ? gen_bnd64_cu (op1, op0)
38871 : gen_bnd32_cu (op1, op0));
38874 case IX86_BUILTIN_BNDRET:
38875 arg0 = CALL_EXPR_ARG (exp, 0);
38876 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38877 target = chkp_get_rtl_bounds (arg0);
38879 /* If no bounds were specified for returned value,
38880 then use INIT bounds. It usually happens when
38881 some built-in function is expanded. */
38884 rtx t1 = gen_reg_rtx (Pmode);
38885 rtx t2 = gen_reg_rtx (Pmode);
38886 target = gen_reg_rtx (BNDmode);
38887 emit_move_insn (t1, const0_rtx);
38888 emit_move_insn (t2, constm1_rtx);
38889 emit_insn (BNDmode == BND64mode
38890 ? gen_bnd64_mk (target, t1, t2)
38891 : gen_bnd32_mk (target, t1, t2));
38894 gcc_assert (target && REG_P (target));
38897 case IX86_BUILTIN_BNDNARROW:
38899 rtx m1, m1h1, m1h2, lb, ub, t1;
38901 /* Return value and lb. */
38902 arg0 = CALL_EXPR_ARG (exp, 0);
38904 arg1 = CALL_EXPR_ARG (exp, 1);
38906 arg2 = CALL_EXPR_ARG (exp, 2);
38908 lb = expand_normal (arg0);
38909 op1 = expand_normal (arg1);
38910 op2 = expand_normal (arg2);
38912 /* Size was passed but we need to use (size - 1) as for bndmk. */
38913 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38914 NULL_RTX, 1, OPTAB_DIRECT);
38916 /* Add LB to size and inverse to get UB. */
38917 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38918 op2, 1, OPTAB_DIRECT);
38919 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38921 if (!register_operand (lb, Pmode))
38922 lb = ix86_zero_extend_to_Pmode (lb);
38923 if (!register_operand (ub, Pmode))
38924 ub = ix86_zero_extend_to_Pmode (ub);
38926 /* We need to move bounds to memory before any computations. */
38931 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38932 emit_move_insn (m1, op1);
38935 /* Generate mem expression to be used for access to LB and UB. */
38936 m1h1 = adjust_address (m1, Pmode, 0);
38937 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38939 t1 = gen_reg_rtx (Pmode);
38942 emit_move_insn (t1, m1h1);
38943 ix86_emit_move_max (t1, lb);
38944 emit_move_insn (m1h1, t1);
38946 /* Compute UB. UB is stored in 1's complement form. Therefore
38947 we also use max here. */
38948 emit_move_insn (t1, m1h2);
38949 ix86_emit_move_max (t1, ub);
38950 emit_move_insn (m1h2, t1);
38952 op2 = gen_reg_rtx (BNDmode);
38953 emit_move_insn (op2, m1);
38955 return chkp_join_splitted_slot (lb, op2);
38958 case IX86_BUILTIN_BNDINT:
38960 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38963 || GET_MODE (target) != BNDmode
38964 || !register_operand (target, BNDmode))
38965 target = gen_reg_rtx (BNDmode);
38967 arg0 = CALL_EXPR_ARG (exp, 0);
38968 arg1 = CALL_EXPR_ARG (exp, 1);
38970 op0 = expand_normal (arg0);
38971 op1 = expand_normal (arg1);
38973 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38974 rh1 = adjust_address (res, Pmode, 0);
38975 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38977 /* Put first bounds to temporaries. */
38978 lb1 = gen_reg_rtx (Pmode);
38979 ub1 = gen_reg_rtx (Pmode);
38982 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38983 emit_move_insn (ub1, adjust_address (op0, Pmode,
38984 GET_MODE_SIZE (Pmode)));
38988 emit_move_insn (res, op0);
38989 emit_move_insn (lb1, rh1);
38990 emit_move_insn (ub1, rh2);
38993 /* Put second bounds to temporaries. */
38994 lb2 = gen_reg_rtx (Pmode);
38995 ub2 = gen_reg_rtx (Pmode);
38998 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38999 emit_move_insn (ub2, adjust_address (op1, Pmode,
39000 GET_MODE_SIZE (Pmode)));
39004 emit_move_insn (res, op1);
39005 emit_move_insn (lb2, rh1);
39006 emit_move_insn (ub2, rh2);
39010 ix86_emit_move_max (lb1, lb2);
39011 emit_move_insn (rh1, lb1);
39013 /* Compute UB. UB is stored in 1's complement form. Therefore
39014 we also use max here. */
39015 ix86_emit_move_max (ub1, ub2);
39016 emit_move_insn (rh2, ub1);
39018 emit_move_insn (target, res);
39023 case IX86_BUILTIN_SIZEOF:
39029 || GET_MODE (target) != Pmode
39030 || !register_operand (target, Pmode))
39031 target = gen_reg_rtx (Pmode);
39033 arg0 = CALL_EXPR_ARG (exp, 0);
39034 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
39036 name = DECL_ASSEMBLER_NAME (arg0);
39037 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
39039 emit_insn (Pmode == SImode
39040 ? gen_move_size_reloc_si (target, symbol)
39041 : gen_move_size_reloc_di (target, symbol));
39046 case IX86_BUILTIN_BNDLOWER:
39051 || GET_MODE (target) != Pmode
39052 || !register_operand (target, Pmode))
39053 target = gen_reg_rtx (Pmode);
39055 arg0 = CALL_EXPR_ARG (exp, 0);
39056 op0 = expand_normal (arg0);
39058 /* We need to move bounds to memory first. */
39063 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39064 emit_move_insn (mem, op0);
39067 /* Generate mem expression to access LB and load it. */
39068 hmem = adjust_address (mem, Pmode, 0);
39069 emit_move_insn (target, hmem);
39074 case IX86_BUILTIN_BNDUPPER:
39076 rtx mem, hmem, res;
39079 || GET_MODE (target) != Pmode
39080 || !register_operand (target, Pmode))
39081 target = gen_reg_rtx (Pmode);
39083 arg0 = CALL_EXPR_ARG (exp, 0);
39084 op0 = expand_normal (arg0);
39086 /* We need to move bounds to memory first. */
39091 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39092 emit_move_insn (mem, op0);
39095 /* Generate mem expression to access UB. */
39096 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39098 /* We need to inverse all bits of UB. */
39099 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39102 emit_move_insn (target, res);
39107 case IX86_BUILTIN_MASKMOVQ:
39108 case IX86_BUILTIN_MASKMOVDQU:
39109 icode = (fcode == IX86_BUILTIN_MASKMOVQ
39110 ? CODE_FOR_mmx_maskmovq
39111 : CODE_FOR_sse2_maskmovdqu);
39112 /* Note the arg order is different from the operand order. */
39113 arg1 = CALL_EXPR_ARG (exp, 0);
39114 arg2 = CALL_EXPR_ARG (exp, 1);
39115 arg0 = CALL_EXPR_ARG (exp, 2);
39116 op0 = expand_normal (arg0);
39117 op1 = expand_normal (arg1);
39118 op2 = expand_normal (arg2);
39119 mode0 = insn_data[icode].operand[0].mode;
39120 mode1 = insn_data[icode].operand[1].mode;
39121 mode2 = insn_data[icode].operand[2].mode;
39123 op0 = ix86_zero_extend_to_Pmode (op0);
39124 op0 = gen_rtx_MEM (mode1, op0);
39126 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39127 op0 = copy_to_mode_reg (mode0, op0);
39128 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39129 op1 = copy_to_mode_reg (mode1, op1);
39130 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39131 op2 = copy_to_mode_reg (mode2, op2);
39132 pat = GEN_FCN (icode) (op0, op1, op2);
39138 case IX86_BUILTIN_LDMXCSR:
39139 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39140 target = assign_386_stack_local (SImode, SLOT_TEMP);
39141 emit_move_insn (target, op0);
39142 emit_insn (gen_sse_ldmxcsr (target));
39145 case IX86_BUILTIN_STMXCSR:
39146 target = assign_386_stack_local (SImode, SLOT_TEMP);
39147 emit_insn (gen_sse_stmxcsr (target));
39148 return copy_to_mode_reg (SImode, target);
39150 case IX86_BUILTIN_CLFLUSH:
39151 arg0 = CALL_EXPR_ARG (exp, 0);
39152 op0 = expand_normal (arg0);
39153 icode = CODE_FOR_sse2_clflush;
39154 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39155 op0 = ix86_zero_extend_to_Pmode (op0);
39157 emit_insn (gen_sse2_clflush (op0));
39160 case IX86_BUILTIN_CLWB:
39161 arg0 = CALL_EXPR_ARG (exp, 0);
39162 op0 = expand_normal (arg0);
39163 icode = CODE_FOR_clwb;
39164 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39165 op0 = ix86_zero_extend_to_Pmode (op0);
39167 emit_insn (gen_clwb (op0));
39170 case IX86_BUILTIN_CLFLUSHOPT:
39171 arg0 = CALL_EXPR_ARG (exp, 0);
39172 op0 = expand_normal (arg0);
39173 icode = CODE_FOR_clflushopt;
39174 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39175 op0 = ix86_zero_extend_to_Pmode (op0);
39177 emit_insn (gen_clflushopt (op0));
39180 case IX86_BUILTIN_MONITOR:
39181 case IX86_BUILTIN_MONITORX:
39182 arg0 = CALL_EXPR_ARG (exp, 0);
39183 arg1 = CALL_EXPR_ARG (exp, 1);
39184 arg2 = CALL_EXPR_ARG (exp, 2);
39185 op0 = expand_normal (arg0);
39186 op1 = expand_normal (arg1);
39187 op2 = expand_normal (arg2);
39189 op0 = ix86_zero_extend_to_Pmode (op0);
39191 op1 = copy_to_mode_reg (SImode, op1);
39193 op2 = copy_to_mode_reg (SImode, op2);
39195 emit_insn (fcode == IX86_BUILTIN_MONITOR
39196 ? ix86_gen_monitor (op0, op1, op2)
39197 : ix86_gen_monitorx (op0, op1, op2));
39200 case IX86_BUILTIN_MWAIT:
39201 arg0 = CALL_EXPR_ARG (exp, 0);
39202 arg1 = CALL_EXPR_ARG (exp, 1);
39203 op0 = expand_normal (arg0);
39204 op1 = expand_normal (arg1);
39206 op0 = copy_to_mode_reg (SImode, op0);
39208 op1 = copy_to_mode_reg (SImode, op1);
39209 emit_insn (gen_sse3_mwait (op0, op1));
39212 case IX86_BUILTIN_MWAITX:
39213 arg0 = CALL_EXPR_ARG (exp, 0);
39214 arg1 = CALL_EXPR_ARG (exp, 1);
39215 arg2 = CALL_EXPR_ARG (exp, 2);
39216 op0 = expand_normal (arg0);
39217 op1 = expand_normal (arg1);
39218 op2 = expand_normal (arg2);
39220 op0 = copy_to_mode_reg (SImode, op0);
39222 op1 = copy_to_mode_reg (SImode, op1);
39224 op2 = copy_to_mode_reg (SImode, op2);
39225 emit_insn (gen_mwaitx (op0, op1, op2));
39228 case IX86_BUILTIN_VEC_INIT_V2SI:
39229 case IX86_BUILTIN_VEC_INIT_V4HI:
39230 case IX86_BUILTIN_VEC_INIT_V8QI:
39231 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39233 case IX86_BUILTIN_VEC_EXT_V2DF:
39234 case IX86_BUILTIN_VEC_EXT_V2DI:
39235 case IX86_BUILTIN_VEC_EXT_V4SF:
39236 case IX86_BUILTIN_VEC_EXT_V4SI:
39237 case IX86_BUILTIN_VEC_EXT_V8HI:
39238 case IX86_BUILTIN_VEC_EXT_V2SI:
39239 case IX86_BUILTIN_VEC_EXT_V4HI:
39240 case IX86_BUILTIN_VEC_EXT_V16QI:
39241 return ix86_expand_vec_ext_builtin (exp, target);
39243 case IX86_BUILTIN_VEC_SET_V2DI:
39244 case IX86_BUILTIN_VEC_SET_V4SF:
39245 case IX86_BUILTIN_VEC_SET_V4SI:
39246 case IX86_BUILTIN_VEC_SET_V8HI:
39247 case IX86_BUILTIN_VEC_SET_V4HI:
39248 case IX86_BUILTIN_VEC_SET_V16QI:
39249 return ix86_expand_vec_set_builtin (exp);
39251 case IX86_BUILTIN_INFQ:
39252 case IX86_BUILTIN_HUGE_VALQ:
39254 REAL_VALUE_TYPE inf;
39258 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39260 tmp = validize_mem (force_const_mem (mode, tmp));
39263 target = gen_reg_rtx (mode);
39265 emit_move_insn (target, tmp);
39269 case IX86_BUILTIN_RDPMC:
39270 case IX86_BUILTIN_RDTSC:
39271 case IX86_BUILTIN_RDTSCP:
39273 op0 = gen_reg_rtx (DImode);
39274 op1 = gen_reg_rtx (DImode);
39276 if (fcode == IX86_BUILTIN_RDPMC)
39278 arg0 = CALL_EXPR_ARG (exp, 0);
39279 op2 = expand_normal (arg0);
39280 if (!register_operand (op2, SImode))
39281 op2 = copy_to_mode_reg (SImode, op2);
39283 insn = (TARGET_64BIT
39284 ? gen_rdpmc_rex64 (op0, op1, op2)
39285 : gen_rdpmc (op0, op2));
39288 else if (fcode == IX86_BUILTIN_RDTSC)
39290 insn = (TARGET_64BIT
39291 ? gen_rdtsc_rex64 (op0, op1)
39292 : gen_rdtsc (op0));
39297 op2 = gen_reg_rtx (SImode);
39299 insn = (TARGET_64BIT
39300 ? gen_rdtscp_rex64 (op0, op1, op2)
39301 : gen_rdtscp (op0, op2));
39304 arg0 = CALL_EXPR_ARG (exp, 0);
39305 op4 = expand_normal (arg0);
39306 if (!address_operand (op4, VOIDmode))
39308 op4 = convert_memory_address (Pmode, op4);
39309 op4 = copy_addr_to_reg (op4);
39311 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39316 /* mode is VOIDmode if __builtin_rd* has been called
39318 if (mode == VOIDmode)
39320 target = gen_reg_rtx (mode);
39325 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39326 op1, 1, OPTAB_DIRECT);
39327 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39328 op0, 1, OPTAB_DIRECT);
39331 emit_move_insn (target, op0);
39334 case IX86_BUILTIN_FXSAVE:
39335 case IX86_BUILTIN_FXRSTOR:
39336 case IX86_BUILTIN_FXSAVE64:
39337 case IX86_BUILTIN_FXRSTOR64:
39338 case IX86_BUILTIN_FNSTENV:
39339 case IX86_BUILTIN_FLDENV:
39343 case IX86_BUILTIN_FXSAVE:
39344 icode = CODE_FOR_fxsave;
39346 case IX86_BUILTIN_FXRSTOR:
39347 icode = CODE_FOR_fxrstor;
39349 case IX86_BUILTIN_FXSAVE64:
39350 icode = CODE_FOR_fxsave64;
39352 case IX86_BUILTIN_FXRSTOR64:
39353 icode = CODE_FOR_fxrstor64;
39355 case IX86_BUILTIN_FNSTENV:
39356 icode = CODE_FOR_fnstenv;
39358 case IX86_BUILTIN_FLDENV:
39359 icode = CODE_FOR_fldenv;
39362 gcc_unreachable ();
39365 arg0 = CALL_EXPR_ARG (exp, 0);
39366 op0 = expand_normal (arg0);
39368 if (!address_operand (op0, VOIDmode))
39370 op0 = convert_memory_address (Pmode, op0);
39371 op0 = copy_addr_to_reg (op0);
39373 op0 = gen_rtx_MEM (mode0, op0);
39375 pat = GEN_FCN (icode) (op0);
39380 case IX86_BUILTIN_XSAVE:
39381 case IX86_BUILTIN_XRSTOR:
39382 case IX86_BUILTIN_XSAVE64:
39383 case IX86_BUILTIN_XRSTOR64:
39384 case IX86_BUILTIN_XSAVEOPT:
39385 case IX86_BUILTIN_XSAVEOPT64:
39386 case IX86_BUILTIN_XSAVES:
39387 case IX86_BUILTIN_XRSTORS:
39388 case IX86_BUILTIN_XSAVES64:
39389 case IX86_BUILTIN_XRSTORS64:
39390 case IX86_BUILTIN_XSAVEC:
39391 case IX86_BUILTIN_XSAVEC64:
39392 arg0 = CALL_EXPR_ARG (exp, 0);
39393 arg1 = CALL_EXPR_ARG (exp, 1);
39394 op0 = expand_normal (arg0);
39395 op1 = expand_normal (arg1);
39397 if (!address_operand (op0, VOIDmode))
39399 op0 = convert_memory_address (Pmode, op0);
39400 op0 = copy_addr_to_reg (op0);
39402 op0 = gen_rtx_MEM (BLKmode, op0);
39404 op1 = force_reg (DImode, op1);
39408 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39409 NULL, 1, OPTAB_DIRECT);
39412 case IX86_BUILTIN_XSAVE:
39413 icode = CODE_FOR_xsave_rex64;
39415 case IX86_BUILTIN_XRSTOR:
39416 icode = CODE_FOR_xrstor_rex64;
39418 case IX86_BUILTIN_XSAVE64:
39419 icode = CODE_FOR_xsave64;
39421 case IX86_BUILTIN_XRSTOR64:
39422 icode = CODE_FOR_xrstor64;
39424 case IX86_BUILTIN_XSAVEOPT:
39425 icode = CODE_FOR_xsaveopt_rex64;
39427 case IX86_BUILTIN_XSAVEOPT64:
39428 icode = CODE_FOR_xsaveopt64;
39430 case IX86_BUILTIN_XSAVES:
39431 icode = CODE_FOR_xsaves_rex64;
39433 case IX86_BUILTIN_XRSTORS:
39434 icode = CODE_FOR_xrstors_rex64;
39436 case IX86_BUILTIN_XSAVES64:
39437 icode = CODE_FOR_xsaves64;
39439 case IX86_BUILTIN_XRSTORS64:
39440 icode = CODE_FOR_xrstors64;
39442 case IX86_BUILTIN_XSAVEC:
39443 icode = CODE_FOR_xsavec_rex64;
39445 case IX86_BUILTIN_XSAVEC64:
39446 icode = CODE_FOR_xsavec64;
39449 gcc_unreachable ();
39452 op2 = gen_lowpart (SImode, op2);
39453 op1 = gen_lowpart (SImode, op1);
39454 pat = GEN_FCN (icode) (op0, op1, op2);
39460 case IX86_BUILTIN_XSAVE:
39461 icode = CODE_FOR_xsave;
39463 case IX86_BUILTIN_XRSTOR:
39464 icode = CODE_FOR_xrstor;
39466 case IX86_BUILTIN_XSAVEOPT:
39467 icode = CODE_FOR_xsaveopt;
39469 case IX86_BUILTIN_XSAVES:
39470 icode = CODE_FOR_xsaves;
39472 case IX86_BUILTIN_XRSTORS:
39473 icode = CODE_FOR_xrstors;
39475 case IX86_BUILTIN_XSAVEC:
39476 icode = CODE_FOR_xsavec;
39479 gcc_unreachable ();
39481 pat = GEN_FCN (icode) (op0, op1);
39488 case IX86_BUILTIN_LLWPCB:
39489 arg0 = CALL_EXPR_ARG (exp, 0);
39490 op0 = expand_normal (arg0);
39491 icode = CODE_FOR_lwp_llwpcb;
39492 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39493 op0 = ix86_zero_extend_to_Pmode (op0);
39494 emit_insn (gen_lwp_llwpcb (op0));
39497 case IX86_BUILTIN_SLWPCB:
39498 icode = CODE_FOR_lwp_slwpcb;
39500 || !insn_data[icode].operand[0].predicate (target, Pmode))
39501 target = gen_reg_rtx (Pmode);
39502 emit_insn (gen_lwp_slwpcb (target));
39505 case IX86_BUILTIN_BEXTRI32:
39506 case IX86_BUILTIN_BEXTRI64:
39507 arg0 = CALL_EXPR_ARG (exp, 0);
39508 arg1 = CALL_EXPR_ARG (exp, 1);
39509 op0 = expand_normal (arg0);
39510 op1 = expand_normal (arg1);
39511 icode = (fcode == IX86_BUILTIN_BEXTRI32
39512 ? CODE_FOR_tbm_bextri_si
39513 : CODE_FOR_tbm_bextri_di);
39514 if (!CONST_INT_P (op1))
39516 error ("last argument must be an immediate");
39521 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39522 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39523 op1 = GEN_INT (length);
39524 op2 = GEN_INT (lsb_index);
39525 pat = GEN_FCN (icode) (target, op0, op1, op2);
39531 case IX86_BUILTIN_RDRAND16_STEP:
39532 icode = CODE_FOR_rdrandhi_1;
39536 case IX86_BUILTIN_RDRAND32_STEP:
39537 icode = CODE_FOR_rdrandsi_1;
39541 case IX86_BUILTIN_RDRAND64_STEP:
39542 icode = CODE_FOR_rdranddi_1;
39546 op0 = gen_reg_rtx (mode0);
39547 emit_insn (GEN_FCN (icode) (op0));
39549 arg0 = CALL_EXPR_ARG (exp, 0);
39550 op1 = expand_normal (arg0);
39551 if (!address_operand (op1, VOIDmode))
39553 op1 = convert_memory_address (Pmode, op1);
39554 op1 = copy_addr_to_reg (op1);
39556 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39558 op1 = gen_reg_rtx (SImode);
39559 emit_move_insn (op1, CONST1_RTX (SImode));
39561 /* Emit SImode conditional move. */
39562 if (mode0 == HImode)
39564 op2 = gen_reg_rtx (SImode);
39565 emit_insn (gen_zero_extendhisi2 (op2, op0));
39567 else if (mode0 == SImode)
39570 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39573 || !register_operand (target, SImode))
39574 target = gen_reg_rtx (SImode);
39576 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39578 emit_insn (gen_rtx_SET (VOIDmode, target,
39579 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39582 case IX86_BUILTIN_RDSEED16_STEP:
39583 icode = CODE_FOR_rdseedhi_1;
39587 case IX86_BUILTIN_RDSEED32_STEP:
39588 icode = CODE_FOR_rdseedsi_1;
39592 case IX86_BUILTIN_RDSEED64_STEP:
39593 icode = CODE_FOR_rdseeddi_1;
39597 op0 = gen_reg_rtx (mode0);
39598 emit_insn (GEN_FCN (icode) (op0));
39600 arg0 = CALL_EXPR_ARG (exp, 0);
39601 op1 = expand_normal (arg0);
39602 if (!address_operand (op1, VOIDmode))
39604 op1 = convert_memory_address (Pmode, op1);
39605 op1 = copy_addr_to_reg (op1);
39607 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39609 op2 = gen_reg_rtx (QImode);
39611 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39613 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39616 || !register_operand (target, SImode))
39617 target = gen_reg_rtx (SImode);
39619 emit_insn (gen_zero_extendqisi2 (target, op2));
39622 case IX86_BUILTIN_SBB32:
39623 icode = CODE_FOR_subborrowsi;
39627 case IX86_BUILTIN_SBB64:
39628 icode = CODE_FOR_subborrowdi;
39632 case IX86_BUILTIN_ADDCARRYX32:
39633 icode = CODE_FOR_addcarrysi;
39637 case IX86_BUILTIN_ADDCARRYX64:
39638 icode = CODE_FOR_addcarrydi;
39642 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39643 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39644 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39645 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39647 op1 = expand_normal (arg0);
39648 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39650 op2 = expand_normal (arg1);
39651 if (!register_operand (op2, mode0))
39652 op2 = copy_to_mode_reg (mode0, op2);
39654 op3 = expand_normal (arg2);
39655 if (!register_operand (op3, mode0))
39656 op3 = copy_to_mode_reg (mode0, op3);
39658 op4 = expand_normal (arg3);
39659 if (!address_operand (op4, VOIDmode))
39661 op4 = convert_memory_address (Pmode, op4);
39662 op4 = copy_addr_to_reg (op4);
39665 /* Generate CF from input operand. */
39666 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
39668 /* Generate instruction that consumes CF. */
39669 op0 = gen_reg_rtx (mode0);
39671 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
39672 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
39673 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
39675 /* Return current CF value. */
39677 target = gen_reg_rtx (QImode);
39679 PUT_MODE (pat, QImode);
39680 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39682 /* Store the result. */
39683 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39687 case IX86_BUILTIN_READ_FLAGS:
39688 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39691 || target == NULL_RTX
39692 || !nonimmediate_operand (target, word_mode)
39693 || GET_MODE (target) != word_mode)
39694 target = gen_reg_rtx (word_mode);
39696 emit_insn (gen_pop (target));
39699 case IX86_BUILTIN_WRITE_FLAGS:
39701 arg0 = CALL_EXPR_ARG (exp, 0);
39702 op0 = expand_normal (arg0);
39703 if (!general_no_elim_operand (op0, word_mode))
39704 op0 = copy_to_mode_reg (word_mode, op0);
39706 emit_insn (gen_push (op0));
39707 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39710 case IX86_BUILTIN_KORTESTC16:
39711 icode = CODE_FOR_kortestchi;
39716 case IX86_BUILTIN_KORTESTZ16:
39717 icode = CODE_FOR_kortestzhi;
39722 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39723 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39724 op0 = expand_normal (arg0);
39725 op1 = expand_normal (arg1);
39727 op0 = copy_to_reg (op0);
39728 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39729 op1 = copy_to_reg (op1);
39730 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39732 target = gen_reg_rtx (QImode);
39733 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39735 /* Emit kortest. */
39736 emit_insn (GEN_FCN (icode) (op0, op1));
39737 /* And use setcc to return result from flags. */
39738 ix86_expand_setcc (target, EQ,
39739 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39742 case IX86_BUILTIN_GATHERSIV2DF:
39743 icode = CODE_FOR_avx2_gathersiv2df;
39745 case IX86_BUILTIN_GATHERSIV4DF:
39746 icode = CODE_FOR_avx2_gathersiv4df;
39748 case IX86_BUILTIN_GATHERDIV2DF:
39749 icode = CODE_FOR_avx2_gatherdiv2df;
39751 case IX86_BUILTIN_GATHERDIV4DF:
39752 icode = CODE_FOR_avx2_gatherdiv4df;
39754 case IX86_BUILTIN_GATHERSIV4SF:
39755 icode = CODE_FOR_avx2_gathersiv4sf;
39757 case IX86_BUILTIN_GATHERSIV8SF:
39758 icode = CODE_FOR_avx2_gathersiv8sf;
39760 case IX86_BUILTIN_GATHERDIV4SF:
39761 icode = CODE_FOR_avx2_gatherdiv4sf;
39763 case IX86_BUILTIN_GATHERDIV8SF:
39764 icode = CODE_FOR_avx2_gatherdiv8sf;
39766 case IX86_BUILTIN_GATHERSIV2DI:
39767 icode = CODE_FOR_avx2_gathersiv2di;
39769 case IX86_BUILTIN_GATHERSIV4DI:
39770 icode = CODE_FOR_avx2_gathersiv4di;
39772 case IX86_BUILTIN_GATHERDIV2DI:
39773 icode = CODE_FOR_avx2_gatherdiv2di;
39775 case IX86_BUILTIN_GATHERDIV4DI:
39776 icode = CODE_FOR_avx2_gatherdiv4di;
39778 case IX86_BUILTIN_GATHERSIV4SI:
39779 icode = CODE_FOR_avx2_gathersiv4si;
39781 case IX86_BUILTIN_GATHERSIV8SI:
39782 icode = CODE_FOR_avx2_gathersiv8si;
39784 case IX86_BUILTIN_GATHERDIV4SI:
39785 icode = CODE_FOR_avx2_gatherdiv4si;
39787 case IX86_BUILTIN_GATHERDIV8SI:
39788 icode = CODE_FOR_avx2_gatherdiv8si;
39790 case IX86_BUILTIN_GATHERALTSIV4DF:
39791 icode = CODE_FOR_avx2_gathersiv4df;
39793 case IX86_BUILTIN_GATHERALTDIV8SF:
39794 icode = CODE_FOR_avx2_gatherdiv8sf;
39796 case IX86_BUILTIN_GATHERALTSIV4DI:
39797 icode = CODE_FOR_avx2_gathersiv4di;
39799 case IX86_BUILTIN_GATHERALTDIV8SI:
39800 icode = CODE_FOR_avx2_gatherdiv8si;
39802 case IX86_BUILTIN_GATHER3SIV16SF:
39803 icode = CODE_FOR_avx512f_gathersiv16sf;
39805 case IX86_BUILTIN_GATHER3SIV8DF:
39806 icode = CODE_FOR_avx512f_gathersiv8df;
39808 case IX86_BUILTIN_GATHER3DIV16SF:
39809 icode = CODE_FOR_avx512f_gatherdiv16sf;
39811 case IX86_BUILTIN_GATHER3DIV8DF:
39812 icode = CODE_FOR_avx512f_gatherdiv8df;
39814 case IX86_BUILTIN_GATHER3SIV16SI:
39815 icode = CODE_FOR_avx512f_gathersiv16si;
39817 case IX86_BUILTIN_GATHER3SIV8DI:
39818 icode = CODE_FOR_avx512f_gathersiv8di;
39820 case IX86_BUILTIN_GATHER3DIV16SI:
39821 icode = CODE_FOR_avx512f_gatherdiv16si;
39823 case IX86_BUILTIN_GATHER3DIV8DI:
39824 icode = CODE_FOR_avx512f_gatherdiv8di;
39826 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39827 icode = CODE_FOR_avx512f_gathersiv8df;
39829 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39830 icode = CODE_FOR_avx512f_gatherdiv16sf;
39832 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39833 icode = CODE_FOR_avx512f_gathersiv8di;
39835 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39836 icode = CODE_FOR_avx512f_gatherdiv16si;
39838 case IX86_BUILTIN_GATHER3SIV2DF:
39839 icode = CODE_FOR_avx512vl_gathersiv2df;
39841 case IX86_BUILTIN_GATHER3SIV4DF:
39842 icode = CODE_FOR_avx512vl_gathersiv4df;
39844 case IX86_BUILTIN_GATHER3DIV2DF:
39845 icode = CODE_FOR_avx512vl_gatherdiv2df;
39847 case IX86_BUILTIN_GATHER3DIV4DF:
39848 icode = CODE_FOR_avx512vl_gatherdiv4df;
39850 case IX86_BUILTIN_GATHER3SIV4SF:
39851 icode = CODE_FOR_avx512vl_gathersiv4sf;
39853 case IX86_BUILTIN_GATHER3SIV8SF:
39854 icode = CODE_FOR_avx512vl_gathersiv8sf;
39856 case IX86_BUILTIN_GATHER3DIV4SF:
39857 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39859 case IX86_BUILTIN_GATHER3DIV8SF:
39860 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39862 case IX86_BUILTIN_GATHER3SIV2DI:
39863 icode = CODE_FOR_avx512vl_gathersiv2di;
39865 case IX86_BUILTIN_GATHER3SIV4DI:
39866 icode = CODE_FOR_avx512vl_gathersiv4di;
39868 case IX86_BUILTIN_GATHER3DIV2DI:
39869 icode = CODE_FOR_avx512vl_gatherdiv2di;
39871 case IX86_BUILTIN_GATHER3DIV4DI:
39872 icode = CODE_FOR_avx512vl_gatherdiv4di;
39874 case IX86_BUILTIN_GATHER3SIV4SI:
39875 icode = CODE_FOR_avx512vl_gathersiv4si;
39877 case IX86_BUILTIN_GATHER3SIV8SI:
39878 icode = CODE_FOR_avx512vl_gathersiv8si;
39880 case IX86_BUILTIN_GATHER3DIV4SI:
39881 icode = CODE_FOR_avx512vl_gatherdiv4si;
39883 case IX86_BUILTIN_GATHER3DIV8SI:
39884 icode = CODE_FOR_avx512vl_gatherdiv8si;
39886 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39887 icode = CODE_FOR_avx512vl_gathersiv4df;
39889 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39890 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39892 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39893 icode = CODE_FOR_avx512vl_gathersiv4di;
39895 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39896 icode = CODE_FOR_avx512vl_gatherdiv8si;
39898 case IX86_BUILTIN_SCATTERSIV16SF:
39899 icode = CODE_FOR_avx512f_scattersiv16sf;
39901 case IX86_BUILTIN_SCATTERSIV8DF:
39902 icode = CODE_FOR_avx512f_scattersiv8df;
39904 case IX86_BUILTIN_SCATTERDIV16SF:
39905 icode = CODE_FOR_avx512f_scatterdiv16sf;
39907 case IX86_BUILTIN_SCATTERDIV8DF:
39908 icode = CODE_FOR_avx512f_scatterdiv8df;
39910 case IX86_BUILTIN_SCATTERSIV16SI:
39911 icode = CODE_FOR_avx512f_scattersiv16si;
39913 case IX86_BUILTIN_SCATTERSIV8DI:
39914 icode = CODE_FOR_avx512f_scattersiv8di;
39916 case IX86_BUILTIN_SCATTERDIV16SI:
39917 icode = CODE_FOR_avx512f_scatterdiv16si;
39919 case IX86_BUILTIN_SCATTERDIV8DI:
39920 icode = CODE_FOR_avx512f_scatterdiv8di;
39922 case IX86_BUILTIN_SCATTERSIV8SF:
39923 icode = CODE_FOR_avx512vl_scattersiv8sf;
39925 case IX86_BUILTIN_SCATTERSIV4SF:
39926 icode = CODE_FOR_avx512vl_scattersiv4sf;
39928 case IX86_BUILTIN_SCATTERSIV4DF:
39929 icode = CODE_FOR_avx512vl_scattersiv4df;
39931 case IX86_BUILTIN_SCATTERSIV2DF:
39932 icode = CODE_FOR_avx512vl_scattersiv2df;
39934 case IX86_BUILTIN_SCATTERDIV8SF:
39935 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39937 case IX86_BUILTIN_SCATTERDIV4SF:
39938 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39940 case IX86_BUILTIN_SCATTERDIV4DF:
39941 icode = CODE_FOR_avx512vl_scatterdiv4df;
39943 case IX86_BUILTIN_SCATTERDIV2DF:
39944 icode = CODE_FOR_avx512vl_scatterdiv2df;
39946 case IX86_BUILTIN_SCATTERSIV8SI:
39947 icode = CODE_FOR_avx512vl_scattersiv8si;
39949 case IX86_BUILTIN_SCATTERSIV4SI:
39950 icode = CODE_FOR_avx512vl_scattersiv4si;
39952 case IX86_BUILTIN_SCATTERSIV4DI:
39953 icode = CODE_FOR_avx512vl_scattersiv4di;
39955 case IX86_BUILTIN_SCATTERSIV2DI:
39956 icode = CODE_FOR_avx512vl_scattersiv2di;
39958 case IX86_BUILTIN_SCATTERDIV8SI:
39959 icode = CODE_FOR_avx512vl_scatterdiv8si;
39961 case IX86_BUILTIN_SCATTERDIV4SI:
39962 icode = CODE_FOR_avx512vl_scatterdiv4si;
39964 case IX86_BUILTIN_SCATTERDIV4DI:
39965 icode = CODE_FOR_avx512vl_scatterdiv4di;
39967 case IX86_BUILTIN_SCATTERDIV2DI:
39968 icode = CODE_FOR_avx512vl_scatterdiv2di;
39970 case IX86_BUILTIN_GATHERPFDPD:
39971 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39972 goto vec_prefetch_gen;
39973 case IX86_BUILTIN_GATHERPFDPS:
39974 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39975 goto vec_prefetch_gen;
39976 case IX86_BUILTIN_GATHERPFQPD:
39977 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39978 goto vec_prefetch_gen;
39979 case IX86_BUILTIN_GATHERPFQPS:
39980 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39981 goto vec_prefetch_gen;
39982 case IX86_BUILTIN_SCATTERPFDPD:
39983 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39984 goto vec_prefetch_gen;
39985 case IX86_BUILTIN_SCATTERPFDPS:
39986 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39987 goto vec_prefetch_gen;
39988 case IX86_BUILTIN_SCATTERPFQPD:
39989 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39990 goto vec_prefetch_gen;
39991 case IX86_BUILTIN_SCATTERPFQPS:
39992 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39993 goto vec_prefetch_gen;
39997 rtx (*gen) (rtx, rtx);
39999 arg0 = CALL_EXPR_ARG (exp, 0);
40000 arg1 = CALL_EXPR_ARG (exp, 1);
40001 arg2 = CALL_EXPR_ARG (exp, 2);
40002 arg3 = CALL_EXPR_ARG (exp, 3);
40003 arg4 = CALL_EXPR_ARG (exp, 4);
40004 op0 = expand_normal (arg0);
40005 op1 = expand_normal (arg1);
40006 op2 = expand_normal (arg2);
40007 op3 = expand_normal (arg3);
40008 op4 = expand_normal (arg4);
40009 /* Note the arg order is different from the operand order. */
40010 mode0 = insn_data[icode].operand[1].mode;
40011 mode2 = insn_data[icode].operand[3].mode;
40012 mode3 = insn_data[icode].operand[4].mode;
40013 mode4 = insn_data[icode].operand[5].mode;
40015 if (target == NULL_RTX
40016 || GET_MODE (target) != insn_data[icode].operand[0].mode
40017 || !insn_data[icode].operand[0].predicate (target,
40018 GET_MODE (target)))
40019 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
40021 subtarget = target;
40025 case IX86_BUILTIN_GATHER3ALTSIV8DF:
40026 case IX86_BUILTIN_GATHER3ALTSIV8DI:
40027 half = gen_reg_rtx (V8SImode);
40028 if (!nonimmediate_operand (op2, V16SImode))
40029 op2 = copy_to_mode_reg (V16SImode, op2);
40030 emit_insn (gen_vec_extract_lo_v16si (half, op2));
40033 case IX86_BUILTIN_GATHER3ALTSIV4DF:
40034 case IX86_BUILTIN_GATHER3ALTSIV4DI:
40035 case IX86_BUILTIN_GATHERALTSIV4DF:
40036 case IX86_BUILTIN_GATHERALTSIV4DI:
40037 half = gen_reg_rtx (V4SImode);
40038 if (!nonimmediate_operand (op2, V8SImode))
40039 op2 = copy_to_mode_reg (V8SImode, op2);
40040 emit_insn (gen_vec_extract_lo_v8si (half, op2));
40043 case IX86_BUILTIN_GATHER3ALTDIV16SF:
40044 case IX86_BUILTIN_GATHER3ALTDIV16SI:
40045 half = gen_reg_rtx (mode0);
40046 if (mode0 == V8SFmode)
40047 gen = gen_vec_extract_lo_v16sf;
40049 gen = gen_vec_extract_lo_v16si;
40050 if (!nonimmediate_operand (op0, GET_MODE (op0)))
40051 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40052 emit_insn (gen (half, op0));
40054 if (GET_MODE (op3) != VOIDmode)
40056 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40057 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40058 emit_insn (gen (half, op3));
40062 case IX86_BUILTIN_GATHER3ALTDIV8SF:
40063 case IX86_BUILTIN_GATHER3ALTDIV8SI:
40064 case IX86_BUILTIN_GATHERALTDIV8SF:
40065 case IX86_BUILTIN_GATHERALTDIV8SI:
40066 half = gen_reg_rtx (mode0);
40067 if (mode0 == V4SFmode)
40068 gen = gen_vec_extract_lo_v8sf;
40070 gen = gen_vec_extract_lo_v8si;
40071 if (!nonimmediate_operand (op0, GET_MODE (op0)))
40072 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40073 emit_insn (gen (half, op0));
40075 if (GET_MODE (op3) != VOIDmode)
40077 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40078 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40079 emit_insn (gen (half, op3));
40087 /* Force memory operand only with base register here. But we
40088 don't want to do it on memory operand for other builtin
40090 op1 = ix86_zero_extend_to_Pmode (op1);
40092 if (!insn_data[icode].operand[1].predicate (op0, mode0))
40093 op0 = copy_to_mode_reg (mode0, op0);
40094 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40095 op1 = copy_to_mode_reg (Pmode, op1);
40096 if (!insn_data[icode].operand[3].predicate (op2, mode2))
40097 op2 = copy_to_mode_reg (mode2, op2);
40099 op3 = fixup_modeless_constant (op3, mode3);
40101 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40103 if (!insn_data[icode].operand[4].predicate (op3, mode3))
40104 op3 = copy_to_mode_reg (mode3, op3);
40108 op3 = copy_to_reg (op3);
40109 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40111 if (!insn_data[icode].operand[5].predicate (op4, mode4))
40113 error ("the last argument must be scale 1, 2, 4, 8");
40117 /* Optimize. If mask is known to have all high bits set,
40118 replace op0 with pc_rtx to signal that the instruction
40119 overwrites the whole destination and doesn't use its
40120 previous contents. */
40123 if (TREE_CODE (arg3) == INTEGER_CST)
40125 if (integer_all_onesp (arg3))
40128 else if (TREE_CODE (arg3) == VECTOR_CST)
40130 unsigned int negative = 0;
40131 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40133 tree cst = VECTOR_CST_ELT (arg3, i);
40134 if (TREE_CODE (cst) == INTEGER_CST
40135 && tree_int_cst_sign_bit (cst))
40137 else if (TREE_CODE (cst) == REAL_CST
40138 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40141 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40144 else if (TREE_CODE (arg3) == SSA_NAME
40145 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40147 /* Recognize also when mask is like:
40148 __v2df src = _mm_setzero_pd ();
40149 __v2df mask = _mm_cmpeq_pd (src, src);
40151 __v8sf src = _mm256_setzero_ps ();
40152 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40153 as that is a cheaper way to load all ones into
40154 a register than having to load a constant from
40156 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40157 if (is_gimple_call (def_stmt))
40159 tree fndecl = gimple_call_fndecl (def_stmt);
40161 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40162 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40164 case IX86_BUILTIN_CMPPD:
40165 case IX86_BUILTIN_CMPPS:
40166 case IX86_BUILTIN_CMPPD256:
40167 case IX86_BUILTIN_CMPPS256:
40168 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40171 case IX86_BUILTIN_CMPEQPD:
40172 case IX86_BUILTIN_CMPEQPS:
40173 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40174 && initializer_zerop (gimple_call_arg (def_stmt,
40185 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40192 case IX86_BUILTIN_GATHER3DIV16SF:
40193 if (target == NULL_RTX)
40194 target = gen_reg_rtx (V8SFmode);
40195 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40197 case IX86_BUILTIN_GATHER3DIV16SI:
40198 if (target == NULL_RTX)
40199 target = gen_reg_rtx (V8SImode);
40200 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40202 case IX86_BUILTIN_GATHER3DIV8SF:
40203 case IX86_BUILTIN_GATHERDIV8SF:
40204 if (target == NULL_RTX)
40205 target = gen_reg_rtx (V4SFmode);
40206 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40208 case IX86_BUILTIN_GATHER3DIV8SI:
40209 case IX86_BUILTIN_GATHERDIV8SI:
40210 if (target == NULL_RTX)
40211 target = gen_reg_rtx (V4SImode);
40212 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40215 target = subtarget;
40221 arg0 = CALL_EXPR_ARG (exp, 0);
40222 arg1 = CALL_EXPR_ARG (exp, 1);
40223 arg2 = CALL_EXPR_ARG (exp, 2);
40224 arg3 = CALL_EXPR_ARG (exp, 3);
40225 arg4 = CALL_EXPR_ARG (exp, 4);
40226 op0 = expand_normal (arg0);
40227 op1 = expand_normal (arg1);
40228 op2 = expand_normal (arg2);
40229 op3 = expand_normal (arg3);
40230 op4 = expand_normal (arg4);
40231 mode1 = insn_data[icode].operand[1].mode;
40232 mode2 = insn_data[icode].operand[2].mode;
40233 mode3 = insn_data[icode].operand[3].mode;
40234 mode4 = insn_data[icode].operand[4].mode;
40236 /* Force memory operand only with base register here. But we
40237 don't want to do it on memory operand for other builtin
40239 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40241 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40242 op0 = copy_to_mode_reg (Pmode, op0);
40244 op1 = fixup_modeless_constant (op1, mode1);
40246 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40248 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40249 op1 = copy_to_mode_reg (mode1, op1);
40253 op1 = copy_to_reg (op1);
40254 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40257 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40258 op2 = copy_to_mode_reg (mode2, op2);
40260 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40261 op3 = copy_to_mode_reg (mode3, op3);
40263 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40265 error ("the last argument must be scale 1, 2, 4, 8");
40269 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40277 arg0 = CALL_EXPR_ARG (exp, 0);
40278 arg1 = CALL_EXPR_ARG (exp, 1);
40279 arg2 = CALL_EXPR_ARG (exp, 2);
40280 arg3 = CALL_EXPR_ARG (exp, 3);
40281 arg4 = CALL_EXPR_ARG (exp, 4);
40282 op0 = expand_normal (arg0);
40283 op1 = expand_normal (arg1);
40284 op2 = expand_normal (arg2);
40285 op3 = expand_normal (arg3);
40286 op4 = expand_normal (arg4);
40287 mode0 = insn_data[icode].operand[0].mode;
40288 mode1 = insn_data[icode].operand[1].mode;
40289 mode3 = insn_data[icode].operand[3].mode;
40290 mode4 = insn_data[icode].operand[4].mode;
40292 op0 = fixup_modeless_constant (op0, mode0);
40294 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
40296 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40297 op0 = copy_to_mode_reg (mode0, op0);
40301 op0 = copy_to_reg (op0);
40302 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40305 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40306 op1 = copy_to_mode_reg (mode1, op1);
40308 /* Force memory operand only with base register here. But we
40309 don't want to do it on memory operand for other builtin
40311 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40313 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40314 op2 = copy_to_mode_reg (Pmode, op2);
40316 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40318 error ("the forth argument must be scale 1, 2, 4, 8");
40322 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40324 error ("incorrect hint operand");
40328 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40336 case IX86_BUILTIN_XABORT:
40337 icode = CODE_FOR_xabort;
40338 arg0 = CALL_EXPR_ARG (exp, 0);
40339 op0 = expand_normal (arg0);
40340 mode0 = insn_data[icode].operand[0].mode;
40341 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40343 error ("the xabort's argument must be an 8-bit immediate");
40346 emit_insn (gen_xabort (op0));
40353 for (i = 0, d = bdesc_special_args;
40354 i < ARRAY_SIZE (bdesc_special_args);
40356 if (d->code == fcode)
40357 return ix86_expand_special_args_builtin (d, exp, target);
40359 for (i = 0, d = bdesc_args;
40360 i < ARRAY_SIZE (bdesc_args);
40362 if (d->code == fcode)
40365 case IX86_BUILTIN_FABSQ:
40366 case IX86_BUILTIN_COPYSIGNQ:
40368 /* Emit a normal call if SSE isn't available. */
40369 return expand_call (exp, target, ignore);
40371 return ix86_expand_args_builtin (d, exp, target);
40374 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40375 if (d->code == fcode)
40376 return ix86_expand_sse_comi (d, exp, target);
40378 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40379 if (d->code == fcode)
40380 return ix86_expand_round_builtin (d, exp, target);
40382 for (i = 0, d = bdesc_pcmpestr;
40383 i < ARRAY_SIZE (bdesc_pcmpestr);
40385 if (d->code == fcode)
40386 return ix86_expand_sse_pcmpestr (d, exp, target);
40388 for (i = 0, d = bdesc_pcmpistr;
40389 i < ARRAY_SIZE (bdesc_pcmpistr);
40391 if (d->code == fcode)
40392 return ix86_expand_sse_pcmpistr (d, exp, target);
40394 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40395 if (d->code == fcode)
40396 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40397 (enum ix86_builtin_func_type)
40398 d->flag, d->comparison);
40400 gcc_unreachable ();
40403 /* This returns the target-specific builtin with code CODE if
40404 current_function_decl has visibility on this builtin, which is checked
40405 using isa flags. Returns NULL_TREE otherwise. */
40407 static tree ix86_get_builtin (enum ix86_builtins code)
40409 struct cl_target_option *opts;
40410 tree target_tree = NULL_TREE;
40412 /* Determine the isa flags of current_function_decl. */
40414 if (current_function_decl)
40415 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40417 if (target_tree == NULL)
40418 target_tree = target_option_default_node;
40420 opts = TREE_TARGET_OPTION (target_tree);
40422 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40423 return ix86_builtin_decl (code, true);
40428 /* Return function decl for target specific builtin
40429 for given MPX builtin passed i FCODE. */
40431 ix86_builtin_mpx_function (unsigned fcode)
40435 case BUILT_IN_CHKP_BNDMK:
40436 return ix86_builtins[IX86_BUILTIN_BNDMK];
40438 case BUILT_IN_CHKP_BNDSTX:
40439 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40441 case BUILT_IN_CHKP_BNDLDX:
40442 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40444 case BUILT_IN_CHKP_BNDCL:
40445 return ix86_builtins[IX86_BUILTIN_BNDCL];
40447 case BUILT_IN_CHKP_BNDCU:
40448 return ix86_builtins[IX86_BUILTIN_BNDCU];
40450 case BUILT_IN_CHKP_BNDRET:
40451 return ix86_builtins[IX86_BUILTIN_BNDRET];
40453 case BUILT_IN_CHKP_INTERSECT:
40454 return ix86_builtins[IX86_BUILTIN_BNDINT];
40456 case BUILT_IN_CHKP_NARROW:
40457 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40459 case BUILT_IN_CHKP_SIZEOF:
40460 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40462 case BUILT_IN_CHKP_EXTRACT_LOWER:
40463 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40465 case BUILT_IN_CHKP_EXTRACT_UPPER:
40466 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40472 gcc_unreachable ();
40475 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40477 Return an address to be used to load/store bounds for pointer
40480 SLOT_NO is an integer constant holding number of a target
40481 dependent special slot to be used in case SLOT is not a memory.
40483 SPECIAL_BASE is a pointer to be used as a base of fake address
40484 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40485 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40488 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40492 /* NULL slot means we pass bounds for pointer not passed to the
40493 function at all. Register slot means we pass pointer in a
40494 register. In both these cases bounds are passed via Bounds
40495 Table. Since we do not have actual pointer stored in memory,
40496 we have to use fake addresses to access Bounds Table. We
40497 start with (special_base - sizeof (void*)) and decrease this
40498 address by pointer size to get addresses for other slots. */
40499 if (!slot || REG_P (slot))
40501 gcc_assert (CONST_INT_P (slot_no));
40502 addr = plus_constant (Pmode, special_base,
40503 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40505 /* If pointer is passed in a memory then its address is used to
40506 access Bounds Table. */
40507 else if (MEM_P (slot))
40509 addr = XEXP (slot, 0);
40510 if (!register_operand (addr, Pmode))
40511 addr = copy_addr_to_reg (addr);
40514 gcc_unreachable ();
40519 /* Expand pass uses this hook to load bounds for function parameter
40520 PTR passed in SLOT in case its bounds are not passed in a register.
40522 If SLOT is a memory, then bounds are loaded as for regular pointer
40523 loaded from memory. PTR may be NULL in case SLOT is a memory.
40524 In such case value of PTR (if required) may be loaded from SLOT.
40526 If SLOT is NULL or a register then SLOT_NO is an integer constant
40527 holding number of the target dependent special slot which should be
40528 used to obtain bounds.
40530 Return loaded bounds. */
40533 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40535 rtx reg = gen_reg_rtx (BNDmode);
40538 /* Get address to be used to access Bounds Table. Special slots start
40539 at the location of return address of the current function. */
40540 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40542 /* Load pointer value from a memory if we don't have it. */
40545 gcc_assert (MEM_P (slot));
40546 ptr = copy_addr_to_reg (slot);
40549 emit_insn (BNDmode == BND64mode
40550 ? gen_bnd64_ldx (reg, addr, ptr)
40551 : gen_bnd32_ldx (reg, addr, ptr));
40556 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40557 passed in SLOT in case BOUNDS are not passed in a register.
40559 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40560 stored in memory. PTR may be NULL in case SLOT is a memory.
40561 In such case value of PTR (if required) may be loaded from SLOT.
40563 If SLOT is NULL or a register then SLOT_NO is an integer constant
40564 holding number of the target dependent special slot which should be
40565 used to store BOUNDS. */
40568 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40572 /* Get address to be used to access Bounds Table. Special slots start
40573 at the location of return address of a called function. */
40574 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40576 /* Load pointer value from a memory if we don't have it. */
40579 gcc_assert (MEM_P (slot));
40580 ptr = copy_addr_to_reg (slot);
40583 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40584 if (!register_operand (bounds, BNDmode))
40585 bounds = copy_to_mode_reg (BNDmode, bounds);
40587 emit_insn (BNDmode == BND64mode
40588 ? gen_bnd64_stx (addr, ptr, bounds)
40589 : gen_bnd32_stx (addr, ptr, bounds));
40592 /* Load and return bounds returned by function in SLOT. */
40595 ix86_load_returned_bounds (rtx slot)
40599 gcc_assert (REG_P (slot));
40600 res = gen_reg_rtx (BNDmode);
40601 emit_move_insn (res, slot);
40606 /* Store BOUNDS returned by function into SLOT. */
40609 ix86_store_returned_bounds (rtx slot, rtx bounds)
40611 gcc_assert (REG_P (slot));
40612 emit_move_insn (slot, bounds);
40615 /* Returns a function decl for a vectorized version of the builtin function
40616 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40617 if it is not available. */
40620 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40623 machine_mode in_mode, out_mode;
40625 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40627 if (TREE_CODE (type_out) != VECTOR_TYPE
40628 || TREE_CODE (type_in) != VECTOR_TYPE
40629 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40632 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40633 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40634 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40635 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40639 case BUILT_IN_SQRT:
40640 if (out_mode == DFmode && in_mode == DFmode)
40642 if (out_n == 2 && in_n == 2)
40643 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40644 else if (out_n == 4 && in_n == 4)
40645 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40646 else if (out_n == 8 && in_n == 8)
40647 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40651 case BUILT_IN_EXP2F:
40652 if (out_mode == SFmode && in_mode == SFmode)
40654 if (out_n == 16 && in_n == 16)
40655 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40659 case BUILT_IN_SQRTF:
40660 if (out_mode == SFmode && in_mode == SFmode)
40662 if (out_n == 4 && in_n == 4)
40663 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40664 else if (out_n == 8 && in_n == 8)
40665 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40666 else if (out_n == 16 && in_n == 16)
40667 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40671 case BUILT_IN_IFLOOR:
40672 case BUILT_IN_LFLOOR:
40673 case BUILT_IN_LLFLOOR:
40674 /* The round insn does not trap on denormals. */
40675 if (flag_trapping_math || !TARGET_ROUND)
40678 if (out_mode == SImode && in_mode == DFmode)
40680 if (out_n == 4 && in_n == 2)
40681 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40682 else if (out_n == 8 && in_n == 4)
40683 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40684 else if (out_n == 16 && in_n == 8)
40685 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40689 case BUILT_IN_IFLOORF:
40690 case BUILT_IN_LFLOORF:
40691 case BUILT_IN_LLFLOORF:
40692 /* The round insn does not trap on denormals. */
40693 if (flag_trapping_math || !TARGET_ROUND)
40696 if (out_mode == SImode && in_mode == SFmode)
40698 if (out_n == 4 && in_n == 4)
40699 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40700 else if (out_n == 8 && in_n == 8)
40701 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40705 case BUILT_IN_ICEIL:
40706 case BUILT_IN_LCEIL:
40707 case BUILT_IN_LLCEIL:
40708 /* The round insn does not trap on denormals. */
40709 if (flag_trapping_math || !TARGET_ROUND)
40712 if (out_mode == SImode && in_mode == DFmode)
40714 if (out_n == 4 && in_n == 2)
40715 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40716 else if (out_n == 8 && in_n == 4)
40717 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40718 else if (out_n == 16 && in_n == 8)
40719 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40723 case BUILT_IN_ICEILF:
40724 case BUILT_IN_LCEILF:
40725 case BUILT_IN_LLCEILF:
40726 /* The round insn does not trap on denormals. */
40727 if (flag_trapping_math || !TARGET_ROUND)
40730 if (out_mode == SImode && in_mode == SFmode)
40732 if (out_n == 4 && in_n == 4)
40733 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40734 else if (out_n == 8 && in_n == 8)
40735 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40739 case BUILT_IN_IRINT:
40740 case BUILT_IN_LRINT:
40741 case BUILT_IN_LLRINT:
40742 if (out_mode == SImode && in_mode == DFmode)
40744 if (out_n == 4 && in_n == 2)
40745 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40746 else if (out_n == 8 && in_n == 4)
40747 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40751 case BUILT_IN_IRINTF:
40752 case BUILT_IN_LRINTF:
40753 case BUILT_IN_LLRINTF:
40754 if (out_mode == SImode && in_mode == SFmode)
40756 if (out_n == 4 && in_n == 4)
40757 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40758 else if (out_n == 8 && in_n == 8)
40759 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40763 case BUILT_IN_IROUND:
40764 case BUILT_IN_LROUND:
40765 case BUILT_IN_LLROUND:
40766 /* The round insn does not trap on denormals. */
40767 if (flag_trapping_math || !TARGET_ROUND)
40770 if (out_mode == SImode && in_mode == DFmode)
40772 if (out_n == 4 && in_n == 2)
40773 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40774 else if (out_n == 8 && in_n == 4)
40775 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40776 else if (out_n == 16 && in_n == 8)
40777 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40781 case BUILT_IN_IROUNDF:
40782 case BUILT_IN_LROUNDF:
40783 case BUILT_IN_LLROUNDF:
40784 /* The round insn does not trap on denormals. */
40785 if (flag_trapping_math || !TARGET_ROUND)
40788 if (out_mode == SImode && in_mode == SFmode)
40790 if (out_n == 4 && in_n == 4)
40791 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40792 else if (out_n == 8 && in_n == 8)
40793 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40797 case BUILT_IN_COPYSIGN:
40798 if (out_mode == DFmode && in_mode == DFmode)
40800 if (out_n == 2 && in_n == 2)
40801 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40802 else if (out_n == 4 && in_n == 4)
40803 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40804 else if (out_n == 8 && in_n == 8)
40805 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40809 case BUILT_IN_COPYSIGNF:
40810 if (out_mode == SFmode && in_mode == SFmode)
40812 if (out_n == 4 && in_n == 4)
40813 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40814 else if (out_n == 8 && in_n == 8)
40815 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40816 else if (out_n == 16 && in_n == 16)
40817 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40821 case BUILT_IN_FLOOR:
40822 /* The round insn does not trap on denormals. */
40823 if (flag_trapping_math || !TARGET_ROUND)
40826 if (out_mode == DFmode && in_mode == DFmode)
40828 if (out_n == 2 && in_n == 2)
40829 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40830 else if (out_n == 4 && in_n == 4)
40831 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40835 case BUILT_IN_FLOORF:
40836 /* The round insn does not trap on denormals. */
40837 if (flag_trapping_math || !TARGET_ROUND)
40840 if (out_mode == SFmode && in_mode == SFmode)
40842 if (out_n == 4 && in_n == 4)
40843 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40844 else if (out_n == 8 && in_n == 8)
40845 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40849 case BUILT_IN_CEIL:
40850 /* The round insn does not trap on denormals. */
40851 if (flag_trapping_math || !TARGET_ROUND)
40854 if (out_mode == DFmode && in_mode == DFmode)
40856 if (out_n == 2 && in_n == 2)
40857 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40858 else if (out_n == 4 && in_n == 4)
40859 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40863 case BUILT_IN_CEILF:
40864 /* The round insn does not trap on denormals. */
40865 if (flag_trapping_math || !TARGET_ROUND)
40868 if (out_mode == SFmode && in_mode == SFmode)
40870 if (out_n == 4 && in_n == 4)
40871 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40872 else if (out_n == 8 && in_n == 8)
40873 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40877 case BUILT_IN_TRUNC:
40878 /* The round insn does not trap on denormals. */
40879 if (flag_trapping_math || !TARGET_ROUND)
40882 if (out_mode == DFmode && in_mode == DFmode)
40884 if (out_n == 2 && in_n == 2)
40885 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40886 else if (out_n == 4 && in_n == 4)
40887 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40891 case BUILT_IN_TRUNCF:
40892 /* The round insn does not trap on denormals. */
40893 if (flag_trapping_math || !TARGET_ROUND)
40896 if (out_mode == SFmode && in_mode == SFmode)
40898 if (out_n == 4 && in_n == 4)
40899 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40900 else if (out_n == 8 && in_n == 8)
40901 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40905 case BUILT_IN_RINT:
40906 /* The round insn does not trap on denormals. */
40907 if (flag_trapping_math || !TARGET_ROUND)
40910 if (out_mode == DFmode && in_mode == DFmode)
40912 if (out_n == 2 && in_n == 2)
40913 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40914 else if (out_n == 4 && in_n == 4)
40915 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40919 case BUILT_IN_RINTF:
40920 /* The round insn does not trap on denormals. */
40921 if (flag_trapping_math || !TARGET_ROUND)
40924 if (out_mode == SFmode && in_mode == SFmode)
40926 if (out_n == 4 && in_n == 4)
40927 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40928 else if (out_n == 8 && in_n == 8)
40929 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40933 case BUILT_IN_ROUND:
40934 /* The round insn does not trap on denormals. */
40935 if (flag_trapping_math || !TARGET_ROUND)
40938 if (out_mode == DFmode && in_mode == DFmode)
40940 if (out_n == 2 && in_n == 2)
40941 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40942 else if (out_n == 4 && in_n == 4)
40943 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40947 case BUILT_IN_ROUNDF:
40948 /* The round insn does not trap on denormals. */
40949 if (flag_trapping_math || !TARGET_ROUND)
40952 if (out_mode == SFmode && in_mode == SFmode)
40954 if (out_n == 4 && in_n == 4)
40955 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40956 else if (out_n == 8 && in_n == 8)
40957 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40962 if (out_mode == DFmode && in_mode == DFmode)
40964 if (out_n == 2 && in_n == 2)
40965 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40966 if (out_n == 4 && in_n == 4)
40967 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40971 case BUILT_IN_FMAF:
40972 if (out_mode == SFmode && in_mode == SFmode)
40974 if (out_n == 4 && in_n == 4)
40975 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40976 if (out_n == 8 && in_n == 8)
40977 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40985 /* Dispatch to a handler for a vectorization library. */
40986 if (ix86_veclib_handler)
40987 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40993 /* Handler for an SVML-style interface to
40994 a library with vectorized intrinsics. */
40997 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
41000 tree fntype, new_fndecl, args;
41003 machine_mode el_mode, in_mode;
41006 /* The SVML is suitable for unsafe math only. */
41007 if (!flag_unsafe_math_optimizations)
41010 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41011 n = TYPE_VECTOR_SUBPARTS (type_out);
41012 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41013 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41014 if (el_mode != in_mode
41022 case BUILT_IN_LOG10:
41024 case BUILT_IN_TANH:
41026 case BUILT_IN_ATAN:
41027 case BUILT_IN_ATAN2:
41028 case BUILT_IN_ATANH:
41029 case BUILT_IN_CBRT:
41030 case BUILT_IN_SINH:
41032 case BUILT_IN_ASINH:
41033 case BUILT_IN_ASIN:
41034 case BUILT_IN_COSH:
41036 case BUILT_IN_ACOSH:
41037 case BUILT_IN_ACOS:
41038 if (el_mode != DFmode || n != 2)
41042 case BUILT_IN_EXPF:
41043 case BUILT_IN_LOGF:
41044 case BUILT_IN_LOG10F:
41045 case BUILT_IN_POWF:
41046 case BUILT_IN_TANHF:
41047 case BUILT_IN_TANF:
41048 case BUILT_IN_ATANF:
41049 case BUILT_IN_ATAN2F:
41050 case BUILT_IN_ATANHF:
41051 case BUILT_IN_CBRTF:
41052 case BUILT_IN_SINHF:
41053 case BUILT_IN_SINF:
41054 case BUILT_IN_ASINHF:
41055 case BUILT_IN_ASINF:
41056 case BUILT_IN_COSHF:
41057 case BUILT_IN_COSF:
41058 case BUILT_IN_ACOSHF:
41059 case BUILT_IN_ACOSF:
41060 if (el_mode != SFmode || n != 4)
41068 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41070 if (fn == BUILT_IN_LOGF)
41071 strcpy (name, "vmlsLn4");
41072 else if (fn == BUILT_IN_LOG)
41073 strcpy (name, "vmldLn2");
41076 sprintf (name, "vmls%s", bname+10);
41077 name[strlen (name)-1] = '4';
41080 sprintf (name, "vmld%s2", bname+10);
41082 /* Convert to uppercase. */
41086 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41088 args = TREE_CHAIN (args))
41092 fntype = build_function_type_list (type_out, type_in, NULL);
41094 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41096 /* Build a function declaration for the vectorized function. */
41097 new_fndecl = build_decl (BUILTINS_LOCATION,
41098 FUNCTION_DECL, get_identifier (name), fntype);
41099 TREE_PUBLIC (new_fndecl) = 1;
41100 DECL_EXTERNAL (new_fndecl) = 1;
41101 DECL_IS_NOVOPS (new_fndecl) = 1;
41102 TREE_READONLY (new_fndecl) = 1;
41107 /* Handler for an ACML-style interface to
41108 a library with vectorized intrinsics. */
41111 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41113 char name[20] = "__vr.._";
41114 tree fntype, new_fndecl, args;
41117 machine_mode el_mode, in_mode;
41120 /* The ACML is 64bits only and suitable for unsafe math only as
41121 it does not correctly support parts of IEEE with the required
41122 precision such as denormals. */
41124 || !flag_unsafe_math_optimizations)
41127 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41128 n = TYPE_VECTOR_SUBPARTS (type_out);
41129 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41130 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41131 if (el_mode != in_mode
41141 case BUILT_IN_LOG2:
41142 case BUILT_IN_LOG10:
41145 if (el_mode != DFmode
41150 case BUILT_IN_SINF:
41151 case BUILT_IN_COSF:
41152 case BUILT_IN_EXPF:
41153 case BUILT_IN_POWF:
41154 case BUILT_IN_LOGF:
41155 case BUILT_IN_LOG2F:
41156 case BUILT_IN_LOG10F:
41159 if (el_mode != SFmode
41168 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41169 sprintf (name + 7, "%s", bname+10);
41172 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41174 args = TREE_CHAIN (args))
41178 fntype = build_function_type_list (type_out, type_in, NULL);
41180 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41182 /* Build a function declaration for the vectorized function. */
41183 new_fndecl = build_decl (BUILTINS_LOCATION,
41184 FUNCTION_DECL, get_identifier (name), fntype);
41185 TREE_PUBLIC (new_fndecl) = 1;
41186 DECL_EXTERNAL (new_fndecl) = 1;
41187 DECL_IS_NOVOPS (new_fndecl) = 1;
41188 TREE_READONLY (new_fndecl) = 1;
41193 /* Returns a decl of a function that implements gather load with
41194 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41195 Return NULL_TREE if it is not available. */
41198 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41199 const_tree index_type, int scale)
41202 enum ix86_builtins code;
41207 if ((TREE_CODE (index_type) != INTEGER_TYPE
41208 && !POINTER_TYPE_P (index_type))
41209 || (TYPE_MODE (index_type) != SImode
41210 && TYPE_MODE (index_type) != DImode))
41213 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41216 /* v*gather* insn sign extends index to pointer mode. */
41217 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41218 && TYPE_UNSIGNED (index_type))
41223 || (scale & (scale - 1)) != 0)
41226 si = TYPE_MODE (index_type) == SImode;
41227 switch (TYPE_MODE (mem_vectype))
41230 if (TARGET_AVX512VL)
41231 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41233 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41236 if (TARGET_AVX512VL)
41237 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41239 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41242 if (TARGET_AVX512VL)
41243 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41245 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41248 if (TARGET_AVX512VL)
41249 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41251 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41254 if (TARGET_AVX512VL)
41255 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41257 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41260 if (TARGET_AVX512VL)
41261 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41263 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41266 if (TARGET_AVX512VL)
41267 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41269 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41272 if (TARGET_AVX512VL)
41273 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41275 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41278 if (TARGET_AVX512F)
41279 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41284 if (TARGET_AVX512F)
41285 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41290 if (TARGET_AVX512F)
41291 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41296 if (TARGET_AVX512F)
41297 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41305 return ix86_get_builtin (code);
41308 /* Returns a code for a target-specific builtin that implements
41309 reciprocal of the function, or NULL_TREE if not available. */
41312 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41314 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41315 && flag_finite_math_only && !flag_trapping_math
41316 && flag_unsafe_math_optimizations))
41320 /* Machine dependent builtins. */
41323 /* Vectorized version of sqrt to rsqrt conversion. */
41324 case IX86_BUILTIN_SQRTPS_NR:
41325 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41327 case IX86_BUILTIN_SQRTPS_NR256:
41328 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41334 /* Normal builtins. */
41337 /* Sqrt to rsqrt conversion. */
41338 case BUILT_IN_SQRTF:
41339 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41346 /* Helper for avx_vpermilps256_operand et al. This is also used by
41347 the expansion functions to turn the parallel back into a mask.
41348 The return value is 0 for no match and the imm8+1 for a match. */
41351 avx_vpermilp_parallel (rtx par, machine_mode mode)
41353 unsigned i, nelt = GET_MODE_NUNITS (mode);
41355 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41357 if (XVECLEN (par, 0) != (int) nelt)
41360 /* Validate that all of the elements are constants, and not totally
41361 out of range. Copy the data into an integral array to make the
41362 subsequent checks easier. */
41363 for (i = 0; i < nelt; ++i)
41365 rtx er = XVECEXP (par, 0, i);
41366 unsigned HOST_WIDE_INT ei;
41368 if (!CONST_INT_P (er))
41379 /* In the 512-bit DFmode case, we can only move elements within
41380 a 128-bit lane. First fill the second part of the mask,
41382 for (i = 4; i < 6; ++i)
41384 if (ipar[i] < 4 || ipar[i] >= 6)
41386 mask |= (ipar[i] - 4) << i;
41388 for (i = 6; i < 8; ++i)
41392 mask |= (ipar[i] - 6) << i;
41397 /* In the 256-bit DFmode case, we can only move elements within
41399 for (i = 0; i < 2; ++i)
41403 mask |= ipar[i] << i;
41405 for (i = 2; i < 4; ++i)
41409 mask |= (ipar[i] - 2) << i;
41414 /* In 512 bit SFmode case, permutation in the upper 256 bits
41415 must mirror the permutation in the lower 256-bits. */
41416 for (i = 0; i < 8; ++i)
41417 if (ipar[i] + 8 != ipar[i + 8])
41422 /* In 256 bit SFmode case, we have full freedom of
41423 movement within the low 128-bit lane, but the high 128-bit
41424 lane must mirror the exact same pattern. */
41425 for (i = 0; i < 4; ++i)
41426 if (ipar[i] + 4 != ipar[i + 4])
41433 /* In the 128-bit case, we've full freedom in the placement of
41434 the elements from the source operand. */
41435 for (i = 0; i < nelt; ++i)
41436 mask |= ipar[i] << (i * (nelt / 2));
41440 gcc_unreachable ();
41443 /* Make sure success has a non-zero value by adding one. */
41447 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41448 the expansion functions to turn the parallel back into a mask.
41449 The return value is 0 for no match and the imm8+1 for a match. */
41452 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41454 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41456 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41458 if (XVECLEN (par, 0) != (int) nelt)
41461 /* Validate that all of the elements are constants, and not totally
41462 out of range. Copy the data into an integral array to make the
41463 subsequent checks easier. */
41464 for (i = 0; i < nelt; ++i)
41466 rtx er = XVECEXP (par, 0, i);
41467 unsigned HOST_WIDE_INT ei;
41469 if (!CONST_INT_P (er))
41472 if (ei >= 2 * nelt)
41477 /* Validate that the halves of the permute are halves. */
41478 for (i = 0; i < nelt2 - 1; ++i)
41479 if (ipar[i] + 1 != ipar[i + 1])
41481 for (i = nelt2; i < nelt - 1; ++i)
41482 if (ipar[i] + 1 != ipar[i + 1])
41485 /* Reconstruct the mask. */
41486 for (i = 0; i < 2; ++i)
41488 unsigned e = ipar[i * nelt2];
41492 mask |= e << (i * 4);
41495 /* Make sure success has a non-zero value by adding one. */
41499 /* Return a register priority for hard reg REGNO. */
41501 ix86_register_priority (int hard_regno)
41503 /* ebp and r13 as the base always wants a displacement, r12 as the
41504 base always wants an index. So discourage their usage in an
41506 if (hard_regno == R12_REG || hard_regno == R13_REG)
41508 if (hard_regno == BP_REG)
41510 /* New x86-64 int registers result in bigger code size. Discourage
41512 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41514 /* New x86-64 SSE registers result in bigger code size. Discourage
41516 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41518 /* Usage of AX register results in smaller code. Prefer it. */
41519 if (hard_regno == AX_REG)
41524 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41526 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41527 QImode must go into class Q_REGS.
41528 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41529 movdf to do mem-to-mem moves through integer regs. */
41532 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41534 machine_mode mode = GET_MODE (x);
41536 /* We're only allowed to return a subclass of CLASS. Many of the
41537 following checks fail for NO_REGS, so eliminate that early. */
41538 if (regclass == NO_REGS)
41541 /* All classes can load zeros. */
41542 if (x == CONST0_RTX (mode))
41545 /* Force constants into memory if we are loading a (nonzero) constant into
41546 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41547 instructions to load from a constant. */
41549 && (MAYBE_MMX_CLASS_P (regclass)
41550 || MAYBE_SSE_CLASS_P (regclass)
41551 || MAYBE_MASK_CLASS_P (regclass)))
41554 /* Prefer SSE regs only, if we can use them for math. */
41555 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41556 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41558 /* Floating-point constants need more complex checks. */
41559 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41561 /* General regs can load everything. */
41562 if (reg_class_subset_p (regclass, GENERAL_REGS))
41565 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41566 zero above. We only want to wind up preferring 80387 registers if
41567 we plan on doing computation with them. */
41569 && standard_80387_constant_p (x) > 0)
41571 /* Limit class to non-sse. */
41572 if (regclass == FLOAT_SSE_REGS)
41574 if (regclass == FP_TOP_SSE_REGS)
41576 if (regclass == FP_SECOND_SSE_REGS)
41577 return FP_SECOND_REG;
41578 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41585 /* Generally when we see PLUS here, it's the function invariant
41586 (plus soft-fp const_int). Which can only be computed into general
41588 if (GET_CODE (x) == PLUS)
41589 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41591 /* QImode constants are easy to load, but non-constant QImode data
41592 must go into Q_REGS. */
41593 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41595 if (reg_class_subset_p (regclass, Q_REGS))
41597 if (reg_class_subset_p (Q_REGS, regclass))
41605 /* Discourage putting floating-point values in SSE registers unless
41606 SSE math is being used, and likewise for the 387 registers. */
41608 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41610 machine_mode mode = GET_MODE (x);
41612 /* Restrict the output reload class to the register bank that we are doing
41613 math on. If we would like not to return a subset of CLASS, reject this
41614 alternative: if reload cannot do this, it will still use its choice. */
41615 mode = GET_MODE (x);
41616 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41617 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41619 if (X87_FLOAT_MODE_P (mode))
41621 if (regclass == FP_TOP_SSE_REGS)
41623 else if (regclass == FP_SECOND_SSE_REGS)
41624 return FP_SECOND_REG;
41626 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41633 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41634 machine_mode mode, secondary_reload_info *sri)
41636 /* Double-word spills from general registers to non-offsettable memory
41637 references (zero-extended addresses) require special handling. */
41640 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41641 && INTEGER_CLASS_P (rclass)
41642 && !offsettable_memref_p (x))
41645 ? CODE_FOR_reload_noff_load
41646 : CODE_FOR_reload_noff_store);
41647 /* Add the cost of moving address to a temporary. */
41648 sri->extra_cost = 1;
41653 /* QImode spills from non-QI registers require
41654 intermediate register on 32bit targets. */
41656 && (MAYBE_MASK_CLASS_P (rclass)
41657 || (!TARGET_64BIT && !in_p
41658 && INTEGER_CLASS_P (rclass)
41659 && MAYBE_NON_Q_CLASS_P (rclass))))
41668 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41669 regno = true_regnum (x);
41671 /* Return Q_REGS if the operand is in memory. */
41676 /* This condition handles corner case where an expression involving
41677 pointers gets vectorized. We're trying to use the address of a
41678 stack slot as a vector initializer.
41680 (set (reg:V2DI 74 [ vect_cst_.2 ])
41681 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41683 Eventually frame gets turned into sp+offset like this:
41685 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41686 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41687 (const_int 392 [0x188]))))
41689 That later gets turned into:
41691 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41692 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41693 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41695 We'll have the following reload recorded:
41697 Reload 0: reload_in (DI) =
41698 (plus:DI (reg/f:DI 7 sp)
41699 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41700 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41701 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41702 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41703 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41704 reload_reg_rtx: (reg:V2DI 22 xmm1)
41706 Which isn't going to work since SSE instructions can't handle scalar
41707 additions. Returning GENERAL_REGS forces the addition into integer
41708 register and reload can handle subsequent reloads without problems. */
41710 if (in_p && GET_CODE (x) == PLUS
41711 && SSE_CLASS_P (rclass)
41712 && SCALAR_INT_MODE_P (mode))
41713 return GENERAL_REGS;
41718 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41721 ix86_class_likely_spilled_p (reg_class_t rclass)
41732 case SSE_FIRST_REG:
41734 case FP_SECOND_REG:
41745 /* If we are copying between general and FP registers, we need a memory
41746 location. The same is true for SSE and MMX registers.
41748 To optimize register_move_cost performance, allow inline variant.
41750 The macro can't work reliably when one of the CLASSES is class containing
41751 registers from multiple units (SSE, MMX, integer). We avoid this by never
41752 combining those units in single alternative in the machine description.
41753 Ensure that this constraint holds to avoid unexpected surprises.
41755 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41756 enforce these sanity checks. */
41759 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41760 machine_mode mode, int strict)
41762 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41764 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41765 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41766 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41767 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41768 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41769 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41771 gcc_assert (!strict || lra_in_progress);
41775 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41778 /* Between mask and general, we have moves no larger than word size. */
41779 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41780 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41783 /* ??? This is a lie. We do have moves between mmx/general, and for
41784 mmx/sse2. But by saying we need secondary memory we discourage the
41785 register allocator from using the mmx registers unless needed. */
41786 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41789 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41791 /* SSE1 doesn't have any direct moves from other classes. */
41795 /* If the target says that inter-unit moves are more expensive
41796 than moving through memory, then don't generate them. */
41797 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41798 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41801 /* Between SSE and general, we have moves no larger than word size. */
41802 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41810 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41811 machine_mode mode, int strict)
41813 return inline_secondary_memory_needed (class1, class2, mode, strict);
41816 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41818 On the 80386, this is the size of MODE in words,
41819 except in the FP regs, where a single reg is always enough. */
41821 static unsigned char
41822 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41824 if (MAYBE_INTEGER_CLASS_P (rclass))
41826 if (mode == XFmode)
41827 return (TARGET_64BIT ? 2 : 3);
41828 else if (mode == XCmode)
41829 return (TARGET_64BIT ? 4 : 6);
41831 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41835 if (COMPLEX_MODE_P (mode))
41842 /* Return true if the registers in CLASS cannot represent the change from
41843 modes FROM to TO. */
41846 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41847 enum reg_class regclass)
41852 /* x87 registers can't do subreg at all, as all values are reformatted
41853 to extended precision. */
41854 if (MAYBE_FLOAT_CLASS_P (regclass))
41857 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41859 int from_size = GET_MODE_SIZE (from);
41860 int to_size = GET_MODE_SIZE (to);
41862 /* Vector registers do not support QI or HImode loads. If we don't
41863 disallow a change to these modes, reload will assume it's ok to
41864 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41865 the vec_dupv4hi pattern. */
41869 /* Further, we cannot allow word_mode subregs of full vector modes.
41870 Otherwise the middle-end will assume it's ok to store to
41871 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
41872 of the 128-bit register. However, after reload the subreg will
41873 be dropped leaving a plain DImode store. This is indistinguishable
41874 from a "normal" DImode move, and so we're justified to use movsd,
41875 which modifies the entire 128-bit register. */
41876 if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
41883 /* Return the cost of moving data of mode M between a
41884 register and memory. A value of 2 is the default; this cost is
41885 relative to those in `REGISTER_MOVE_COST'.
41887 This function is used extensively by register_move_cost that is used to
41888 build tables at startup. Make it inline in this case.
41889 When IN is 2, return maximum of in and out move cost.
41891 If moving between registers and memory is more expensive than
41892 between two registers, you should define this macro to express the
41895 Model also increased moving costs of QImode registers in non
41899 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41903 if (FLOAT_CLASS_P (regclass))
41921 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41922 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41924 if (SSE_CLASS_P (regclass))
41927 switch (GET_MODE_SIZE (mode))
41942 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41943 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41945 if (MMX_CLASS_P (regclass))
41948 switch (GET_MODE_SIZE (mode))
41960 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41961 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41963 switch (GET_MODE_SIZE (mode))
41966 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41969 return ix86_cost->int_store[0];
41970 if (TARGET_PARTIAL_REG_DEPENDENCY
41971 && optimize_function_for_speed_p (cfun))
41972 cost = ix86_cost->movzbl_load;
41974 cost = ix86_cost->int_load[0];
41976 return MAX (cost, ix86_cost->int_store[0]);
41982 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41984 return ix86_cost->movzbl_load;
41986 return ix86_cost->int_store[0] + 4;
41991 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41992 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41994 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41995 if (mode == TFmode)
41998 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
42000 cost = ix86_cost->int_load[2];
42002 cost = ix86_cost->int_store[2];
42003 return (cost * (((int) GET_MODE_SIZE (mode)
42004 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
42009 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
42012 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
42016 /* Return the cost of moving data from a register in class CLASS1 to
42017 one in class CLASS2.
42019 It is not required that the cost always equal 2 when FROM is the same as TO;
42020 on some machines it is expensive to move between registers if they are not
42021 general registers. */
42024 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
42025 reg_class_t class2_i)
42027 enum reg_class class1 = (enum reg_class) class1_i;
42028 enum reg_class class2 = (enum reg_class) class2_i;
42030 /* In case we require secondary memory, compute cost of the store followed
42031 by load. In order to avoid bad register allocation choices, we need
42032 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
42034 if (inline_secondary_memory_needed (class1, class2, mode, 0))
42038 cost += inline_memory_move_cost (mode, class1, 2);
42039 cost += inline_memory_move_cost (mode, class2, 2);
42041 /* In case of copying from general_purpose_register we may emit multiple
42042 stores followed by single load causing memory size mismatch stall.
42043 Count this as arbitrarily high cost of 20. */
42044 if (targetm.class_max_nregs (class1, mode)
42045 > targetm.class_max_nregs (class2, mode))
42048 /* In the case of FP/MMX moves, the registers actually overlap, and we
42049 have to switch modes in order to treat them differently. */
42050 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
42051 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
42057 /* Moves between SSE/MMX and integer unit are expensive. */
42058 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
42059 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42061 /* ??? By keeping returned value relatively high, we limit the number
42062 of moves between integer and MMX/SSE registers for all targets.
42063 Additionally, high value prevents problem with x86_modes_tieable_p(),
42064 where integer modes in MMX/SSE registers are not tieable
42065 because of missing QImode and HImode moves to, from or between
42066 MMX/SSE registers. */
42067 return MAX (8, ix86_cost->mmxsse_to_integer);
42069 if (MAYBE_FLOAT_CLASS_P (class1))
42070 return ix86_cost->fp_move;
42071 if (MAYBE_SSE_CLASS_P (class1))
42072 return ix86_cost->sse_move;
42073 if (MAYBE_MMX_CLASS_P (class1))
42074 return ix86_cost->mmx_move;
42078 /* Return TRUE if hard register REGNO can hold a value of machine-mode
42082 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
42084 /* Flags and only flags can only hold CCmode values. */
42085 if (CC_REGNO_P (regno))
42086 return GET_MODE_CLASS (mode) == MODE_CC;
42087 if (GET_MODE_CLASS (mode) == MODE_CC
42088 || GET_MODE_CLASS (mode) == MODE_RANDOM
42089 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
42091 if (STACK_REGNO_P (regno))
42092 return VALID_FP_MODE_P (mode);
42093 if (MASK_REGNO_P (regno))
42094 return (VALID_MASK_REG_MODE (mode)
42095 || (TARGET_AVX512BW
42096 && VALID_MASK_AVX512BW_MODE (mode)));
42097 if (BND_REGNO_P (regno))
42098 return VALID_BND_REG_MODE (mode);
42099 if (SSE_REGNO_P (regno))
42101 /* We implement the move patterns for all vector modes into and
42102 out of SSE registers, even when no operation instructions
42105 /* For AVX-512 we allow, regardless of regno:
42107 - any of 512-bit wide vector mode
42108 - any scalar mode. */
42111 || VALID_AVX512F_REG_MODE (mode)
42112 || VALID_AVX512F_SCALAR_MODE (mode)))
42115 /* TODO check for QI/HI scalars. */
42116 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
42117 if (TARGET_AVX512VL
42120 || VALID_AVX256_REG_MODE (mode)
42121 || VALID_AVX512VL_128_REG_MODE (mode)))
42124 /* xmm16-xmm31 are only available for AVX-512. */
42125 if (EXT_REX_SSE_REGNO_P (regno))
42128 /* OImode and AVX modes are available only when AVX is enabled. */
42129 return ((TARGET_AVX
42130 && VALID_AVX256_REG_OR_OI_MODE (mode))
42131 || VALID_SSE_REG_MODE (mode)
42132 || VALID_SSE2_REG_MODE (mode)
42133 || VALID_MMX_REG_MODE (mode)
42134 || VALID_MMX_REG_MODE_3DNOW (mode));
42136 if (MMX_REGNO_P (regno))
42138 /* We implement the move patterns for 3DNOW modes even in MMX mode,
42139 so if the register is available at all, then we can move data of
42140 the given mode into or out of it. */
42141 return (VALID_MMX_REG_MODE (mode)
42142 || VALID_MMX_REG_MODE_3DNOW (mode));
42145 if (mode == QImode)
42147 /* Take care for QImode values - they can be in non-QI regs,
42148 but then they do cause partial register stalls. */
42149 if (ANY_QI_REGNO_P (regno))
42151 if (!TARGET_PARTIAL_REG_STALL)
42153 /* LRA checks if the hard register is OK for the given mode.
42154 QImode values can live in non-QI regs, so we allow all
42156 if (lra_in_progress)
42158 return !can_create_pseudo_p ();
42160 /* We handle both integer and floats in the general purpose registers. */
42161 else if (VALID_INT_MODE_P (mode))
42163 else if (VALID_FP_MODE_P (mode))
42165 else if (VALID_DFP_MODE_P (mode))
42167 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
42168 on to use that value in smaller contexts, this can easily force a
42169 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
42170 supporting DImode, allow it. */
42171 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42177 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
42178 tieable integer mode. */
42181 ix86_tieable_integer_mode_p (machine_mode mode)
42190 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42193 return TARGET_64BIT;
42200 /* Return true if MODE1 is accessible in a register that can hold MODE2
42201 without copying. That is, all register classes that can hold MODE2
42202 can also hold MODE1. */
42205 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42207 if (mode1 == mode2)
42210 if (ix86_tieable_integer_mode_p (mode1)
42211 && ix86_tieable_integer_mode_p (mode2))
42214 /* MODE2 being XFmode implies fp stack or general regs, which means we
42215 can tie any smaller floating point modes to it. Note that we do not
42216 tie this with TFmode. */
42217 if (mode2 == XFmode)
42218 return mode1 == SFmode || mode1 == DFmode;
42220 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42221 that we can tie it with SFmode. */
42222 if (mode2 == DFmode)
42223 return mode1 == SFmode;
42225 /* If MODE2 is only appropriate for an SSE register, then tie with
42226 any other mode acceptable to SSE registers. */
42227 if (GET_MODE_SIZE (mode2) == 32
42228 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42229 return (GET_MODE_SIZE (mode1) == 32
42230 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42231 if (GET_MODE_SIZE (mode2) == 16
42232 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42233 return (GET_MODE_SIZE (mode1) == 16
42234 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42236 /* If MODE2 is appropriate for an MMX register, then tie
42237 with any other mode acceptable to MMX registers. */
42238 if (GET_MODE_SIZE (mode2) == 8
42239 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42240 return (GET_MODE_SIZE (mode1) == 8
42241 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42246 /* Return the cost of moving between two registers of mode MODE. */
42249 ix86_set_reg_reg_cost (machine_mode mode)
42251 unsigned int units = UNITS_PER_WORD;
42253 switch (GET_MODE_CLASS (mode))
42259 units = GET_MODE_SIZE (CCmode);
42263 if ((TARGET_SSE && mode == TFmode)
42264 || (TARGET_80387 && mode == XFmode)
42265 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42266 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42267 units = GET_MODE_SIZE (mode);
42270 case MODE_COMPLEX_FLOAT:
42271 if ((TARGET_SSE && mode == TCmode)
42272 || (TARGET_80387 && mode == XCmode)
42273 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42274 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42275 units = GET_MODE_SIZE (mode);
42278 case MODE_VECTOR_INT:
42279 case MODE_VECTOR_FLOAT:
42280 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42281 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42282 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42283 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42284 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42285 units = GET_MODE_SIZE (mode);
42288 /* Return the cost of moving between two registers of mode MODE,
42289 assuming that the move will be in pieces of at most UNITS bytes. */
42290 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42293 /* Compute a (partial) cost for rtx X. Return true if the complete
42294 cost has been computed, and false if subexpressions should be
42295 scanned. In either case, *TOTAL contains the cost result. */
42298 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42302 enum rtx_code code = (enum rtx_code) code_i;
42303 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42304 machine_mode mode = GET_MODE (x);
42305 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42310 if (register_operand (SET_DEST (x), VOIDmode)
42311 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42313 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42322 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42324 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42326 else if (flag_pic && SYMBOLIC_CONST (x)
42328 && (GET_CODE (x) == LABEL_REF
42329 || (GET_CODE (x) == SYMBOL_REF
42330 && SYMBOL_REF_LOCAL_P (x)))))
42337 if (mode == VOIDmode)
42342 switch (standard_80387_constant_p (x))
42347 default: /* Other constants */
42354 if (SSE_FLOAT_MODE_P (mode))
42357 switch (standard_sse_constant_p (x))
42361 case 1: /* 0: xor eliminates false dependency */
42364 default: /* -1: cmp contains false dependency */
42369 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42370 it'll probably end up. Add a penalty for size. */
42371 *total = (COSTS_N_INSNS (1)
42372 + (flag_pic != 0 && !TARGET_64BIT)
42373 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42377 /* The zero extensions is often completely free on x86_64, so make
42378 it as cheap as possible. */
42379 if (TARGET_64BIT && mode == DImode
42380 && GET_MODE (XEXP (x, 0)) == SImode)
42382 else if (TARGET_ZERO_EXTEND_WITH_AND)
42383 *total = cost->add;
42385 *total = cost->movzx;
42389 *total = cost->movsx;
42393 if (SCALAR_INT_MODE_P (mode)
42394 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42395 && CONST_INT_P (XEXP (x, 1)))
42397 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42400 *total = cost->add;
42403 if ((value == 2 || value == 3)
42404 && cost->lea <= cost->shift_const)
42406 *total = cost->lea;
42416 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42418 /* ??? Should be SSE vector operation cost. */
42419 /* At least for published AMD latencies, this really is the same
42420 as the latency for a simple fpu operation like fabs. */
42421 /* V*QImode is emulated with 1-11 insns. */
42422 if (mode == V16QImode || mode == V32QImode)
42425 if (TARGET_XOP && mode == V16QImode)
42427 /* For XOP we use vpshab, which requires a broadcast of the
42428 value to the variable shift insn. For constants this
42429 means a V16Q const in mem; even when we can perform the
42430 shift with one insn set the cost to prefer paddb. */
42431 if (CONSTANT_P (XEXP (x, 1)))
42433 *total = (cost->fabs
42434 + rtx_cost (XEXP (x, 0), code, 0, speed)
42435 + (speed ? 2 : COSTS_N_BYTES (16)));
42440 else if (TARGET_SSSE3)
42442 *total = cost->fabs * count;
42445 *total = cost->fabs;
42447 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42449 if (CONST_INT_P (XEXP (x, 1)))
42451 if (INTVAL (XEXP (x, 1)) > 32)
42452 *total = cost->shift_const + COSTS_N_INSNS (2);
42454 *total = cost->shift_const * 2;
42458 if (GET_CODE (XEXP (x, 1)) == AND)
42459 *total = cost->shift_var * 2;
42461 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42466 if (CONST_INT_P (XEXP (x, 1)))
42467 *total = cost->shift_const;
42468 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42469 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42471 /* Return the cost after shift-and truncation. */
42472 *total = cost->shift_var;
42476 *total = cost->shift_var;
42484 gcc_assert (FLOAT_MODE_P (mode));
42485 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42487 /* ??? SSE scalar/vector cost should be used here. */
42488 /* ??? Bald assumption that fma has the same cost as fmul. */
42489 *total = cost->fmul;
42490 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42492 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42494 if (GET_CODE (sub) == NEG)
42495 sub = XEXP (sub, 0);
42496 *total += rtx_cost (sub, FMA, 0, speed);
42499 if (GET_CODE (sub) == NEG)
42500 sub = XEXP (sub, 0);
42501 *total += rtx_cost (sub, FMA, 2, speed);
42506 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42508 /* ??? SSE scalar cost should be used here. */
42509 *total = cost->fmul;
42512 else if (X87_FLOAT_MODE_P (mode))
42514 *total = cost->fmul;
42517 else if (FLOAT_MODE_P (mode))
42519 /* ??? SSE vector cost should be used here. */
42520 *total = cost->fmul;
42523 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42525 /* V*QImode is emulated with 7-13 insns. */
42526 if (mode == V16QImode || mode == V32QImode)
42529 if (TARGET_XOP && mode == V16QImode)
42531 else if (TARGET_SSSE3)
42533 *total = cost->fmul * 2 + cost->fabs * extra;
42535 /* V*DImode is emulated with 5-8 insns. */
42536 else if (mode == V2DImode || mode == V4DImode)
42538 if (TARGET_XOP && mode == V2DImode)
42539 *total = cost->fmul * 2 + cost->fabs * 3;
42541 *total = cost->fmul * 3 + cost->fabs * 5;
42543 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42544 insns, including two PMULUDQ. */
42545 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42546 *total = cost->fmul * 2 + cost->fabs * 5;
42548 *total = cost->fmul;
42553 rtx op0 = XEXP (x, 0);
42554 rtx op1 = XEXP (x, 1);
42556 if (CONST_INT_P (XEXP (x, 1)))
42558 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42559 for (nbits = 0; value != 0; value &= value - 1)
42563 /* This is arbitrary. */
42566 /* Compute costs correctly for widening multiplication. */
42567 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42568 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42569 == GET_MODE_SIZE (mode))
42571 int is_mulwiden = 0;
42572 machine_mode inner_mode = GET_MODE (op0);
42574 if (GET_CODE (op0) == GET_CODE (op1))
42575 is_mulwiden = 1, op1 = XEXP (op1, 0);
42576 else if (CONST_INT_P (op1))
42578 if (GET_CODE (op0) == SIGN_EXTEND)
42579 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42582 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42586 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42589 *total = (cost->mult_init[MODE_INDEX (mode)]
42590 + nbits * cost->mult_bit
42591 + rtx_cost (op0, outer_code, opno, speed)
42592 + rtx_cost (op1, outer_code, opno, speed));
42601 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42602 /* ??? SSE cost should be used here. */
42603 *total = cost->fdiv;
42604 else if (X87_FLOAT_MODE_P (mode))
42605 *total = cost->fdiv;
42606 else if (FLOAT_MODE_P (mode))
42607 /* ??? SSE vector cost should be used here. */
42608 *total = cost->fdiv;
42610 *total = cost->divide[MODE_INDEX (mode)];
42614 if (GET_MODE_CLASS (mode) == MODE_INT
42615 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42617 if (GET_CODE (XEXP (x, 0)) == PLUS
42618 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42619 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42620 && CONSTANT_P (XEXP (x, 1)))
42622 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42623 if (val == 2 || val == 4 || val == 8)
42625 *total = cost->lea;
42626 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42627 outer_code, opno, speed);
42628 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42629 outer_code, opno, speed);
42630 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42634 else if (GET_CODE (XEXP (x, 0)) == MULT
42635 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42637 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42638 if (val == 2 || val == 4 || val == 8)
42640 *total = cost->lea;
42641 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42642 outer_code, opno, speed);
42643 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42647 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42649 *total = cost->lea;
42650 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42651 outer_code, opno, speed);
42652 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42653 outer_code, opno, speed);
42654 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42661 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42663 /* ??? SSE cost should be used here. */
42664 *total = cost->fadd;
42667 else if (X87_FLOAT_MODE_P (mode))
42669 *total = cost->fadd;
42672 else if (FLOAT_MODE_P (mode))
42674 /* ??? SSE vector cost should be used here. */
42675 *total = cost->fadd;
42683 if (GET_MODE_CLASS (mode) == MODE_INT
42684 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42686 *total = (cost->add * 2
42687 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42688 << (GET_MODE (XEXP (x, 0)) != DImode))
42689 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42690 << (GET_MODE (XEXP (x, 1)) != DImode)));
42696 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42698 /* ??? SSE cost should be used here. */
42699 *total = cost->fchs;
42702 else if (X87_FLOAT_MODE_P (mode))
42704 *total = cost->fchs;
42707 else if (FLOAT_MODE_P (mode))
42709 /* ??? SSE vector cost should be used here. */
42710 *total = cost->fchs;
42716 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42718 /* ??? Should be SSE vector operation cost. */
42719 /* At least for published AMD latencies, this really is the same
42720 as the latency for a simple fpu operation like fabs. */
42721 *total = cost->fabs;
42723 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42724 *total = cost->add * 2;
42726 *total = cost->add;
42730 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42731 && XEXP (XEXP (x, 0), 1) == const1_rtx
42732 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42733 && XEXP (x, 1) == const0_rtx)
42735 /* This kind of construct is implemented using test[bwl].
42736 Treat it as if we had an AND. */
42737 *total = (cost->add
42738 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42739 + rtx_cost (const1_rtx, outer_code, opno, speed));
42745 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42750 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42751 /* ??? SSE cost should be used here. */
42752 *total = cost->fabs;
42753 else if (X87_FLOAT_MODE_P (mode))
42754 *total = cost->fabs;
42755 else if (FLOAT_MODE_P (mode))
42756 /* ??? SSE vector cost should be used here. */
42757 *total = cost->fabs;
42761 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42762 /* ??? SSE cost should be used here. */
42763 *total = cost->fsqrt;
42764 else if (X87_FLOAT_MODE_P (mode))
42765 *total = cost->fsqrt;
42766 else if (FLOAT_MODE_P (mode))
42767 /* ??? SSE vector cost should be used here. */
42768 *total = cost->fsqrt;
42772 if (XINT (x, 1) == UNSPEC_TP)
42778 case VEC_DUPLICATE:
42779 /* ??? Assume all of these vector manipulation patterns are
42780 recognizable. In which case they all pretty much have the
42782 *total = cost->fabs;
42785 mask = XEXP (x, 2);
42786 /* This is masked instruction, assume the same cost,
42787 as nonmasked variant. */
42788 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42789 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42791 *total = cost->fabs;
42801 static int current_machopic_label_num;
42803 /* Given a symbol name and its associated stub, write out the
42804 definition of the stub. */
42807 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42809 unsigned int length;
42810 char *binder_name, *symbol_name, lazy_ptr_name[32];
42811 int label = ++current_machopic_label_num;
42813 /* For 64-bit we shouldn't get here. */
42814 gcc_assert (!TARGET_64BIT);
42816 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42817 symb = targetm.strip_name_encoding (symb);
42819 length = strlen (stub);
42820 binder_name = XALLOCAVEC (char, length + 32);
42821 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42823 length = strlen (symb);
42824 symbol_name = XALLOCAVEC (char, length + 32);
42825 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42827 sprintf (lazy_ptr_name, "L%d$lz", label);
42829 if (MACHOPIC_ATT_STUB)
42830 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42831 else if (MACHOPIC_PURE)
42832 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42834 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42836 fprintf (file, "%s:\n", stub);
42837 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42839 if (MACHOPIC_ATT_STUB)
42841 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42843 else if (MACHOPIC_PURE)
42846 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42847 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42848 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42849 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42850 label, lazy_ptr_name, label);
42851 fprintf (file, "\tjmp\t*%%ecx\n");
42854 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42856 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42857 it needs no stub-binding-helper. */
42858 if (MACHOPIC_ATT_STUB)
42861 fprintf (file, "%s:\n", binder_name);
42865 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42866 fprintf (file, "\tpushl\t%%ecx\n");
42869 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42871 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42873 /* N.B. Keep the correspondence of these
42874 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42875 old-pic/new-pic/non-pic stubs; altering this will break
42876 compatibility with existing dylibs. */
42879 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42880 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42883 /* 16-byte -mdynamic-no-pic stub. */
42884 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42886 fprintf (file, "%s:\n", lazy_ptr_name);
42887 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42888 fprintf (file, ASM_LONG "%s\n", binder_name);
42890 #endif /* TARGET_MACHO */
42892 /* Order the registers for register allocator. */
42895 x86_order_regs_for_local_alloc (void)
42900 /* First allocate the local general purpose registers. */
42901 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42902 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42903 reg_alloc_order [pos++] = i;
42905 /* Global general purpose registers. */
42906 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42907 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42908 reg_alloc_order [pos++] = i;
42910 /* x87 registers come first in case we are doing FP math
42912 if (!TARGET_SSE_MATH)
42913 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42914 reg_alloc_order [pos++] = i;
42916 /* SSE registers. */
42917 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42918 reg_alloc_order [pos++] = i;
42919 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42920 reg_alloc_order [pos++] = i;
42922 /* Extended REX SSE registers. */
42923 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42924 reg_alloc_order [pos++] = i;
42926 /* Mask register. */
42927 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42928 reg_alloc_order [pos++] = i;
42930 /* MPX bound registers. */
42931 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42932 reg_alloc_order [pos++] = i;
42934 /* x87 registers. */
42935 if (TARGET_SSE_MATH)
42936 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42937 reg_alloc_order [pos++] = i;
42939 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42940 reg_alloc_order [pos++] = i;
42942 /* Initialize the rest of array as we do not allocate some registers
42944 while (pos < FIRST_PSEUDO_REGISTER)
42945 reg_alloc_order [pos++] = 0;
42948 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42949 in struct attribute_spec handler. */
42951 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42954 bool *no_add_attrs)
42956 if (TREE_CODE (*node) != FUNCTION_TYPE
42957 && TREE_CODE (*node) != METHOD_TYPE
42958 && TREE_CODE (*node) != FIELD_DECL
42959 && TREE_CODE (*node) != TYPE_DECL)
42961 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42963 *no_add_attrs = true;
42968 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42970 *no_add_attrs = true;
42973 if (is_attribute_p ("callee_pop_aggregate_return", name))
42977 cst = TREE_VALUE (args);
42978 if (TREE_CODE (cst) != INTEGER_CST)
42980 warning (OPT_Wattributes,
42981 "%qE attribute requires an integer constant argument",
42983 *no_add_attrs = true;
42985 else if (compare_tree_int (cst, 0) != 0
42986 && compare_tree_int (cst, 1) != 0)
42988 warning (OPT_Wattributes,
42989 "argument to %qE attribute is neither zero, nor one",
42991 *no_add_attrs = true;
43000 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
43001 struct attribute_spec.handler. */
43003 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
43004 bool *no_add_attrs)
43006 if (TREE_CODE (*node) != FUNCTION_TYPE
43007 && TREE_CODE (*node) != METHOD_TYPE
43008 && TREE_CODE (*node) != FIELD_DECL
43009 && TREE_CODE (*node) != TYPE_DECL)
43011 warning (OPT_Wattributes, "%qE attribute only applies to functions",
43013 *no_add_attrs = true;
43017 /* Can combine regparm with all attributes but fastcall. */
43018 if (is_attribute_p ("ms_abi", name))
43020 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
43022 error ("ms_abi and sysv_abi attributes are not compatible");
43027 else if (is_attribute_p ("sysv_abi", name))
43029 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
43031 error ("ms_abi and sysv_abi attributes are not compatible");
43040 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
43041 struct attribute_spec.handler. */
43043 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
43044 bool *no_add_attrs)
43047 if (DECL_P (*node))
43049 if (TREE_CODE (*node) == TYPE_DECL)
43050 type = &TREE_TYPE (*node);
43055 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
43057 warning (OPT_Wattributes, "%qE attribute ignored",
43059 *no_add_attrs = true;
43062 else if ((is_attribute_p ("ms_struct", name)
43063 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
43064 || ((is_attribute_p ("gcc_struct", name)
43065 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
43067 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
43069 *no_add_attrs = true;
43076 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
43077 bool *no_add_attrs)
43079 if (TREE_CODE (*node) != FUNCTION_DECL)
43081 warning (OPT_Wattributes, "%qE attribute only applies to functions",
43083 *no_add_attrs = true;
43089 ix86_ms_bitfield_layout_p (const_tree record_type)
43091 return ((TARGET_MS_BITFIELD_LAYOUT
43092 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43093 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43096 /* Returns an expression indicating where the this parameter is
43097 located on entry to the FUNCTION. */
43100 x86_this_parameter (tree function)
43102 tree type = TREE_TYPE (function);
43103 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43108 const int *parm_regs;
43110 if (ix86_function_type_abi (type) == MS_ABI)
43111 parm_regs = x86_64_ms_abi_int_parameter_registers;
43113 parm_regs = x86_64_int_parameter_registers;
43114 return gen_rtx_REG (Pmode, parm_regs[aggr]);
43117 nregs = ix86_function_regparm (type, function);
43119 if (nregs > 0 && !stdarg_p (type))
43122 unsigned int ccvt = ix86_get_callcvt (type);
43124 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43125 regno = aggr ? DX_REG : CX_REG;
43126 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43130 return gen_rtx_MEM (SImode,
43131 plus_constant (Pmode, stack_pointer_rtx, 4));
43140 return gen_rtx_MEM (SImode,
43141 plus_constant (Pmode,
43142 stack_pointer_rtx, 4));
43145 return gen_rtx_REG (SImode, regno);
43148 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43152 /* Determine whether x86_output_mi_thunk can succeed. */
43155 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43156 const_tree function)
43158 /* 64-bit can handle anything. */
43162 /* For 32-bit, everything's fine if we have one free register. */
43163 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43166 /* Need a free register for vcall_offset. */
43170 /* Need a free register for GOT references. */
43171 if (flag_pic && !targetm.binds_local_p (function))
43174 /* Otherwise ok. */
43178 /* Output the assembler code for a thunk function. THUNK_DECL is the
43179 declaration for the thunk function itself, FUNCTION is the decl for
43180 the target function. DELTA is an immediate constant offset to be
43181 added to THIS. If VCALL_OFFSET is nonzero, the word at
43182 *(*this + vcall_offset) should be added to THIS. */
43185 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43186 HOST_WIDE_INT vcall_offset, tree function)
43188 rtx this_param = x86_this_parameter (function);
43189 rtx this_reg, tmp, fnaddr;
43190 unsigned int tmp_regno;
43194 tmp_regno = R10_REG;
43197 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43198 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43199 tmp_regno = AX_REG;
43200 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43201 tmp_regno = DX_REG;
43203 tmp_regno = CX_REG;
43206 emit_note (NOTE_INSN_PROLOGUE_END);
43208 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43209 pull it in now and let DELTA benefit. */
43210 if (REG_P (this_param))
43211 this_reg = this_param;
43212 else if (vcall_offset)
43214 /* Put the this parameter into %eax. */
43215 this_reg = gen_rtx_REG (Pmode, AX_REG);
43216 emit_move_insn (this_reg, this_param);
43219 this_reg = NULL_RTX;
43221 /* Adjust the this parameter by a fixed constant. */
43224 rtx delta_rtx = GEN_INT (delta);
43225 rtx delta_dst = this_reg ? this_reg : this_param;
43229 if (!x86_64_general_operand (delta_rtx, Pmode))
43231 tmp = gen_rtx_REG (Pmode, tmp_regno);
43232 emit_move_insn (tmp, delta_rtx);
43237 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43240 /* Adjust the this parameter by a value stored in the vtable. */
43243 rtx vcall_addr, vcall_mem, this_mem;
43245 tmp = gen_rtx_REG (Pmode, tmp_regno);
43247 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43248 if (Pmode != ptr_mode)
43249 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43250 emit_move_insn (tmp, this_mem);
43252 /* Adjust the this parameter. */
43253 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43255 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43257 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43258 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43259 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43262 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43263 if (Pmode != ptr_mode)
43264 emit_insn (gen_addsi_1_zext (this_reg,
43265 gen_rtx_REG (ptr_mode,
43269 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43272 /* If necessary, drop THIS back to its stack slot. */
43273 if (this_reg && this_reg != this_param)
43274 emit_move_insn (this_param, this_reg);
43276 fnaddr = XEXP (DECL_RTL (function), 0);
43279 if (!flag_pic || targetm.binds_local_p (function)
43284 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43285 tmp = gen_rtx_CONST (Pmode, tmp);
43286 fnaddr = gen_const_mem (Pmode, tmp);
43291 if (!flag_pic || targetm.binds_local_p (function))
43294 else if (TARGET_MACHO)
43296 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43297 fnaddr = XEXP (fnaddr, 0);
43299 #endif /* TARGET_MACHO */
43302 tmp = gen_rtx_REG (Pmode, CX_REG);
43303 output_set_got (tmp, NULL_RTX);
43305 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43306 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43307 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43308 fnaddr = gen_const_mem (Pmode, fnaddr);
43312 /* Our sibling call patterns do not allow memories, because we have no
43313 predicate that can distinguish between frame and non-frame memory.
43314 For our purposes here, we can get away with (ab)using a jump pattern,
43315 because we're going to do no optimization. */
43316 if (MEM_P (fnaddr))
43318 if (sibcall_insn_operand (fnaddr, word_mode))
43320 fnaddr = XEXP (DECL_RTL (function), 0);
43321 tmp = gen_rtx_MEM (QImode, fnaddr);
43322 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43323 tmp = emit_call_insn (tmp);
43324 SIBLING_CALL_P (tmp) = 1;
43327 emit_jump_insn (gen_indirect_jump (fnaddr));
43331 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43333 // CM_LARGE_PIC always uses pseudo PIC register which is
43334 // uninitialized. Since FUNCTION is local and calling it
43335 // doesn't go through PLT, we use scratch register %r11 as
43336 // PIC register and initialize it here.
43337 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43338 ix86_init_large_pic_reg (tmp_regno);
43339 fnaddr = legitimize_pic_address (fnaddr,
43340 gen_rtx_REG (Pmode, tmp_regno));
43343 if (!sibcall_insn_operand (fnaddr, word_mode))
43345 tmp = gen_rtx_REG (word_mode, tmp_regno);
43346 if (GET_MODE (fnaddr) != word_mode)
43347 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43348 emit_move_insn (tmp, fnaddr);
43352 tmp = gen_rtx_MEM (QImode, fnaddr);
43353 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43354 tmp = emit_call_insn (tmp);
43355 SIBLING_CALL_P (tmp) = 1;
43359 /* Emit just enough of rest_of_compilation to get the insns emitted.
43360 Note that use_thunk calls assemble_start_function et al. */
43361 insn = get_insns ();
43362 shorten_branches (insn);
43363 final_start_function (insn, file, 1);
43364 final (insn, file, 1);
43365 final_end_function ();
43369 x86_file_start (void)
43371 default_file_start ();
43373 fputs ("\t.code16gcc\n", asm_out_file);
43375 darwin_file_start ();
43377 if (X86_FILE_START_VERSION_DIRECTIVE)
43378 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43379 if (X86_FILE_START_FLTUSED)
43380 fputs ("\t.global\t__fltused\n", asm_out_file);
43381 if (ix86_asm_dialect == ASM_INTEL)
43382 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43386 x86_field_alignment (tree field, int computed)
43389 tree type = TREE_TYPE (field);
43391 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43393 mode = TYPE_MODE (strip_array_types (type));
43394 if (mode == DFmode || mode == DCmode
43395 || GET_MODE_CLASS (mode) == MODE_INT
43396 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43397 return MIN (32, computed);
43401 /* Print call to TARGET to FILE. */
43404 x86_print_call_or_nop (FILE *file, const char *target)
43406 if (flag_nop_mcount)
43407 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43409 fprintf (file, "1:\tcall\t%s\n", target);
43412 /* Output assembler code to FILE to increment profiler label # LABELNO
43413 for profiling a function entry. */
43415 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43417 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43421 #ifndef NO_PROFILE_COUNTERS
43422 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43425 if (!TARGET_PECOFF && flag_pic)
43426 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43428 x86_print_call_or_nop (file, mcount_name);
43432 #ifndef NO_PROFILE_COUNTERS
43433 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43436 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43440 #ifndef NO_PROFILE_COUNTERS
43441 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43444 x86_print_call_or_nop (file, mcount_name);
43447 if (flag_record_mcount)
43449 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43450 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43451 fprintf (file, "\t.previous\n");
43455 /* We don't have exact information about the insn sizes, but we may assume
43456 quite safely that we are informed about all 1 byte insns and memory
43457 address sizes. This is enough to eliminate unnecessary padding in
43461 min_insn_size (rtx_insn *insn)
43465 if (!INSN_P (insn) || !active_insn_p (insn))
43468 /* Discard alignments we've emit and jump instructions. */
43469 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43470 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43473 /* Important case - calls are always 5 bytes.
43474 It is common to have many calls in the row. */
43476 && symbolic_reference_mentioned_p (PATTERN (insn))
43477 && !SIBLING_CALL_P (insn))
43479 len = get_attr_length (insn);
43483 /* For normal instructions we rely on get_attr_length being exact,
43484 with a few exceptions. */
43485 if (!JUMP_P (insn))
43487 enum attr_type type = get_attr_type (insn);
43492 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43493 || asm_noperands (PATTERN (insn)) >= 0)
43500 /* Otherwise trust get_attr_length. */
43504 l = get_attr_length_address (insn);
43505 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43514 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43516 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43520 ix86_avoid_jump_mispredicts (void)
43522 rtx_insn *insn, *start = get_insns ();
43523 int nbytes = 0, njumps = 0;
43524 bool isjump = false;
43526 /* Look for all minimal intervals of instructions containing 4 jumps.
43527 The intervals are bounded by START and INSN. NBYTES is the total
43528 size of instructions in the interval including INSN and not including
43529 START. When the NBYTES is smaller than 16 bytes, it is possible
43530 that the end of START and INSN ends up in the same 16byte page.
43532 The smallest offset in the page INSN can start is the case where START
43533 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43534 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43536 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43537 have to, control transfer to label(s) can be performed through other
43538 means, and also we estimate minimum length of all asm stmts as 0. */
43539 for (insn = start; insn; insn = NEXT_INSN (insn))
43543 if (LABEL_P (insn))
43545 int align = label_to_alignment (insn);
43546 int max_skip = label_to_max_skip (insn);
43550 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43551 already in the current 16 byte page, because otherwise
43552 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43553 bytes to reach 16 byte boundary. */
43555 || (align <= 3 && max_skip != (1 << align) - 1))
43558 fprintf (dump_file, "Label %i with max_skip %i\n",
43559 INSN_UID (insn), max_skip);
43562 while (nbytes + max_skip >= 16)
43564 start = NEXT_INSN (start);
43565 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43567 njumps--, isjump = true;
43570 nbytes -= min_insn_size (start);
43576 min_size = min_insn_size (insn);
43577 nbytes += min_size;
43579 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43580 INSN_UID (insn), min_size);
43581 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43589 start = NEXT_INSN (start);
43590 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43592 njumps--, isjump = true;
43595 nbytes -= min_insn_size (start);
43597 gcc_assert (njumps >= 0);
43599 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43600 INSN_UID (start), INSN_UID (insn), nbytes);
43602 if (njumps == 3 && isjump && nbytes < 16)
43604 int padsize = 15 - nbytes + min_insn_size (insn);
43607 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43608 INSN_UID (insn), padsize);
43609 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43615 /* AMD Athlon works faster
43616 when RET is not destination of conditional jump or directly preceded
43617 by other jump instruction. We avoid the penalty by inserting NOP just
43618 before the RET instructions in such cases. */
43620 ix86_pad_returns (void)
43625 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43627 basic_block bb = e->src;
43628 rtx_insn *ret = BB_END (bb);
43630 bool replace = false;
43632 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43633 || optimize_bb_for_size_p (bb))
43635 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43636 if (active_insn_p (prev) || LABEL_P (prev))
43638 if (prev && LABEL_P (prev))
43643 FOR_EACH_EDGE (e, ei, bb->preds)
43644 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43645 && !(e->flags & EDGE_FALLTHRU))
43653 prev = prev_active_insn (ret);
43655 && ((JUMP_P (prev) && any_condjump_p (prev))
43658 /* Empty functions get branch mispredict even when
43659 the jump destination is not visible to us. */
43660 if (!prev && !optimize_function_for_size_p (cfun))
43665 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43671 /* Count the minimum number of instructions in BB. Return 4 if the
43672 number of instructions >= 4. */
43675 ix86_count_insn_bb (basic_block bb)
43678 int insn_count = 0;
43680 /* Count number of instructions in this block. Return 4 if the number
43681 of instructions >= 4. */
43682 FOR_BB_INSNS (bb, insn)
43684 /* Only happen in exit blocks. */
43686 && ANY_RETURN_P (PATTERN (insn)))
43689 if (NONDEBUG_INSN_P (insn)
43690 && GET_CODE (PATTERN (insn)) != USE
43691 && GET_CODE (PATTERN (insn)) != CLOBBER)
43694 if (insn_count >= 4)
43703 /* Count the minimum number of instructions in code path in BB.
43704 Return 4 if the number of instructions >= 4. */
43707 ix86_count_insn (basic_block bb)
43711 int min_prev_count;
43713 /* Only bother counting instructions along paths with no
43714 more than 2 basic blocks between entry and exit. Given
43715 that BB has an edge to exit, determine if a predecessor
43716 of BB has an edge from entry. If so, compute the number
43717 of instructions in the predecessor block. If there
43718 happen to be multiple such blocks, compute the minimum. */
43719 min_prev_count = 4;
43720 FOR_EACH_EDGE (e, ei, bb->preds)
43723 edge_iterator prev_ei;
43725 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43727 min_prev_count = 0;
43730 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43732 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43734 int count = ix86_count_insn_bb (e->src);
43735 if (count < min_prev_count)
43736 min_prev_count = count;
43742 if (min_prev_count < 4)
43743 min_prev_count += ix86_count_insn_bb (bb);
43745 return min_prev_count;
43748 /* Pad short function to 4 instructions. */
43751 ix86_pad_short_function (void)
43756 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43758 rtx_insn *ret = BB_END (e->src);
43759 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43761 int insn_count = ix86_count_insn (e->src);
43763 /* Pad short function. */
43764 if (insn_count < 4)
43766 rtx_insn *insn = ret;
43768 /* Find epilogue. */
43771 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43772 insn = PREV_INSN (insn);
43777 /* Two NOPs count as one instruction. */
43778 insn_count = 2 * (4 - insn_count);
43779 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43785 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43786 the epilogue, the Windows system unwinder will apply epilogue logic and
43787 produce incorrect offsets. This can be avoided by adding a nop between
43788 the last insn that can throw and the first insn of the epilogue. */
43791 ix86_seh_fixup_eh_fallthru (void)
43796 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43798 rtx_insn *insn, *next;
43800 /* Find the beginning of the epilogue. */
43801 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43802 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43807 /* We only care about preceding insns that can throw. */
43808 insn = prev_active_insn (insn);
43809 if (insn == NULL || !can_throw_internal (insn))
43812 /* Do not separate calls from their debug information. */
43813 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43815 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43816 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43821 emit_insn_after (gen_nops (const1_rtx), insn);
43825 /* Implement machine specific optimizations. We implement padding of returns
43826 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43830 /* We are freeing block_for_insn in the toplev to keep compatibility
43831 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43832 compute_bb_for_insn ();
43834 if (TARGET_SEH && current_function_has_exception_handlers ())
43835 ix86_seh_fixup_eh_fallthru ();
43837 if (optimize && optimize_function_for_speed_p (cfun))
43839 if (TARGET_PAD_SHORT_FUNCTION)
43840 ix86_pad_short_function ();
43841 else if (TARGET_PAD_RETURNS)
43842 ix86_pad_returns ();
43843 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43844 if (TARGET_FOUR_JUMP_LIMIT)
43845 ix86_avoid_jump_mispredicts ();
43850 /* Return nonzero when QImode register that must be represented via REX prefix
43853 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43856 extract_insn_cached (insn);
43857 for (i = 0; i < recog_data.n_operands; i++)
43858 if (GENERAL_REG_P (recog_data.operand[i])
43859 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43864 /* Return true when INSN mentions register that must be encoded using REX
43867 x86_extended_reg_mentioned_p (rtx insn)
43869 subrtx_iterator::array_type array;
43870 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43872 const_rtx x = *iter;
43874 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43880 /* If profitable, negate (without causing overflow) integer constant
43881 of mode MODE at location LOC. Return true in this case. */
43883 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43887 if (!CONST_INT_P (*loc))
43893 /* DImode x86_64 constants must fit in 32 bits. */
43894 gcc_assert (x86_64_immediate_operand (*loc, mode));
43905 gcc_unreachable ();
43908 /* Avoid overflows. */
43909 if (mode_signbit_p (mode, *loc))
43912 val = INTVAL (*loc);
43914 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43915 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43916 if ((val < 0 && val != -128)
43919 *loc = GEN_INT (-val);
43926 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43927 optabs would emit if we didn't have TFmode patterns. */
43930 x86_emit_floatuns (rtx operands[2])
43932 rtx_code_label *neglab, *donelab;
43933 rtx i0, i1, f0, in, out;
43934 machine_mode mode, inmode;
43936 inmode = GET_MODE (operands[1]);
43937 gcc_assert (inmode == SImode || inmode == DImode);
43940 in = force_reg (inmode, operands[1]);
43941 mode = GET_MODE (out);
43942 neglab = gen_label_rtx ();
43943 donelab = gen_label_rtx ();
43944 f0 = gen_reg_rtx (mode);
43946 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43948 expand_float (out, in, 0);
43950 emit_jump_insn (gen_jump (donelab));
43953 emit_label (neglab);
43955 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43957 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43959 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43961 expand_float (f0, i0, 0);
43963 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43965 emit_label (donelab);
43968 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43969 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43970 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43971 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43973 /* Get a vector mode of the same size as the original but with elements
43974 twice as wide. This is only guaranteed to apply to integral vectors. */
43976 static inline machine_mode
43977 get_mode_wider_vector (machine_mode o)
43979 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43980 machine_mode n = GET_MODE_WIDER_MODE (o);
43981 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43982 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43986 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43987 fill target with val via vec_duplicate. */
43990 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43996 /* First attempt to recognize VAL as-is. */
43997 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43998 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43999 if (recog_memoized (insn) < 0)
44002 /* If that fails, force VAL into a register. */
44005 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
44006 seq = get_insns ();
44009 emit_insn_before (seq, insn);
44011 ok = recog_memoized (insn) >= 0;
44017 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44018 with all elements equal to VAR. Return true if successful. */
44021 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
44022 rtx target, rtx val)
44046 return ix86_vector_duplicate_value (mode, target, val);
44051 if (TARGET_SSE || TARGET_3DNOW_A)
44055 val = gen_lowpart (SImode, val);
44056 x = gen_rtx_TRUNCATE (HImode, val);
44057 x = gen_rtx_VEC_DUPLICATE (mode, x);
44058 emit_insn (gen_rtx_SET (VOIDmode, target, x));
44070 return ix86_vector_duplicate_value (mode, target, val);
44074 struct expand_vec_perm_d dperm;
44078 memset (&dperm, 0, sizeof (dperm));
44079 dperm.target = target;
44080 dperm.vmode = mode;
44081 dperm.nelt = GET_MODE_NUNITS (mode);
44082 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
44083 dperm.one_operand_p = true;
44085 /* Extend to SImode using a paradoxical SUBREG. */
44086 tmp1 = gen_reg_rtx (SImode);
44087 emit_move_insn (tmp1, gen_lowpart (SImode, val));
44089 /* Insert the SImode value as low element of a V4SImode vector. */
44090 tmp2 = gen_reg_rtx (V4SImode);
44091 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44092 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44094 ok = (expand_vec_perm_1 (&dperm)
44095 || expand_vec_perm_broadcast_1 (&dperm));
44103 return ix86_vector_duplicate_value (mode, target, val);
44110 /* Replicate the value once into the next wider mode and recurse. */
44112 machine_mode smode, wsmode, wvmode;
44115 smode = GET_MODE_INNER (mode);
44116 wvmode = get_mode_wider_vector (mode);
44117 wsmode = GET_MODE_INNER (wvmode);
44119 val = convert_modes (wsmode, smode, val, true);
44120 x = expand_simple_binop (wsmode, ASHIFT, val,
44121 GEN_INT (GET_MODE_BITSIZE (smode)),
44122 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44123 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44125 x = gen_reg_rtx (wvmode);
44126 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44128 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44135 return ix86_vector_duplicate_value (mode, target, val);
44138 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44139 rtx x = gen_reg_rtx (hvmode);
44141 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44144 x = gen_rtx_VEC_CONCAT (mode, x, x);
44145 emit_insn (gen_rtx_SET (VOIDmode, target, x));
44151 if (TARGET_AVX512BW)
44152 return ix86_vector_duplicate_value (mode, target, val);
44155 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44156 rtx x = gen_reg_rtx (hvmode);
44158 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44161 x = gen_rtx_VEC_CONCAT (mode, x, x);
44162 emit_insn (gen_rtx_SET (VOIDmode, target, x));
44171 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44172 whose ONE_VAR element is VAR, and other elements are zero. Return true
44176 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44177 rtx target, rtx var, int one_var)
44179 machine_mode vsimode;
44182 bool use_vector_set = false;
44187 /* For SSE4.1, we normally use vector set. But if the second
44188 element is zero and inter-unit moves are OK, we use movq
44190 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44191 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
44197 use_vector_set = TARGET_SSE4_1;
44200 use_vector_set = TARGET_SSE2;
44203 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44210 use_vector_set = TARGET_AVX;
44213 /* Use ix86_expand_vector_set in 64bit mode only. */
44214 use_vector_set = TARGET_AVX && TARGET_64BIT;
44220 if (use_vector_set)
44222 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
44223 var = force_reg (GET_MODE_INNER (mode), var);
44224 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44240 var = force_reg (GET_MODE_INNER (mode), var);
44241 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44242 emit_insn (gen_rtx_SET (VOIDmode, target, x));
44247 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44248 new_target = gen_reg_rtx (mode);
44250 new_target = target;
44251 var = force_reg (GET_MODE_INNER (mode), var);
44252 x = gen_rtx_VEC_DUPLICATE (mode, var);
44253 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44254 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
44257 /* We need to shuffle the value to the correct position, so
44258 create a new pseudo to store the intermediate result. */
44260 /* With SSE2, we can use the integer shuffle insns. */
44261 if (mode != V4SFmode && TARGET_SSE2)
44263 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44265 GEN_INT (one_var == 1 ? 0 : 1),
44266 GEN_INT (one_var == 2 ? 0 : 1),
44267 GEN_INT (one_var == 3 ? 0 : 1)));
44268 if (target != new_target)
44269 emit_move_insn (target, new_target);
44273 /* Otherwise convert the intermediate result to V4SFmode and
44274 use the SSE1 shuffle instructions. */
44275 if (mode != V4SFmode)
44277 tmp = gen_reg_rtx (V4SFmode);
44278 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44283 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44285 GEN_INT (one_var == 1 ? 0 : 1),
44286 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44287 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44289 if (mode != V4SFmode)
44290 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44291 else if (tmp != target)
44292 emit_move_insn (target, tmp);
44294 else if (target != new_target)
44295 emit_move_insn (target, new_target);
44300 vsimode = V4SImode;
44306 vsimode = V2SImode;
44312 /* Zero extend the variable element to SImode and recurse. */
44313 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44315 x = gen_reg_rtx (vsimode);
44316 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44318 gcc_unreachable ();
44320 emit_move_insn (target, gen_lowpart (mode, x));
44328 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44329 consisting of the values in VALS. It is known that all elements
44330 except ONE_VAR are constants. Return true if successful. */
44333 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44334 rtx target, rtx vals, int one_var)
44336 rtx var = XVECEXP (vals, 0, one_var);
44337 machine_mode wmode;
44340 const_vec = copy_rtx (vals);
44341 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44342 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44350 /* For the two element vectors, it's just as easy to use
44351 the general case. */
44355 /* Use ix86_expand_vector_set in 64bit mode only. */
44378 /* There's no way to set one QImode entry easily. Combine
44379 the variable value with its adjacent constant value, and
44380 promote to an HImode set. */
44381 x = XVECEXP (vals, 0, one_var ^ 1);
44384 var = convert_modes (HImode, QImode, var, true);
44385 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44386 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44387 x = GEN_INT (INTVAL (x) & 0xff);
44391 var = convert_modes (HImode, QImode, var, true);
44392 x = gen_int_mode (INTVAL (x) << 8, HImode);
44394 if (x != const0_rtx)
44395 var = expand_simple_binop (HImode, IOR, var, x, var,
44396 1, OPTAB_LIB_WIDEN);
44398 x = gen_reg_rtx (wmode);
44399 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44400 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44402 emit_move_insn (target, gen_lowpart (mode, x));
44409 emit_move_insn (target, const_vec);
44410 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44414 /* A subroutine of ix86_expand_vector_init_general. Use vector
44415 concatenate to handle the most general case: all values variable,
44416 and none identical. */
44419 ix86_expand_vector_init_concat (machine_mode mode,
44420 rtx target, rtx *ops, int n)
44422 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44423 rtx first[16], second[8], third[4];
44475 gcc_unreachable ();
44478 if (!register_operand (ops[1], cmode))
44479 ops[1] = force_reg (cmode, ops[1]);
44480 if (!register_operand (ops[0], cmode))
44481 ops[0] = force_reg (cmode, ops[0]);
44482 emit_insn (gen_rtx_SET (VOIDmode, target,
44483 gen_rtx_VEC_CONCAT (mode, ops[0],
44503 gcc_unreachable ();
44527 gcc_unreachable ();
44545 gcc_unreachable ();
44550 /* FIXME: We process inputs backward to help RA. PR 36222. */
44553 for (; i > 0; i -= 2, j--)
44555 first[j] = gen_reg_rtx (cmode);
44556 v = gen_rtvec (2, ops[i - 1], ops[i]);
44557 ix86_expand_vector_init (false, first[j],
44558 gen_rtx_PARALLEL (cmode, v));
44564 gcc_assert (hmode != VOIDmode);
44565 gcc_assert (gmode != VOIDmode);
44566 for (i = j = 0; i < n; i += 2, j++)
44568 second[j] = gen_reg_rtx (hmode);
44569 ix86_expand_vector_init_concat (hmode, second [j],
44573 for (i = j = 0; i < n; i += 2, j++)
44575 third[j] = gen_reg_rtx (gmode);
44576 ix86_expand_vector_init_concat (gmode, third[j],
44580 ix86_expand_vector_init_concat (mode, target, third, n);
44584 gcc_assert (hmode != VOIDmode);
44585 for (i = j = 0; i < n; i += 2, j++)
44587 second[j] = gen_reg_rtx (hmode);
44588 ix86_expand_vector_init_concat (hmode, second [j],
44592 ix86_expand_vector_init_concat (mode, target, second, n);
44595 ix86_expand_vector_init_concat (mode, target, first, n);
44599 gcc_unreachable ();
44603 /* A subroutine of ix86_expand_vector_init_general. Use vector
44604 interleave to handle the most general case: all values variable,
44605 and none identical. */
44608 ix86_expand_vector_init_interleave (machine_mode mode,
44609 rtx target, rtx *ops, int n)
44611 machine_mode first_imode, second_imode, third_imode, inner_mode;
44614 rtx (*gen_load_even) (rtx, rtx, rtx);
44615 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44616 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44621 gen_load_even = gen_vec_setv8hi;
44622 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44623 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44624 inner_mode = HImode;
44625 first_imode = V4SImode;
44626 second_imode = V2DImode;
44627 third_imode = VOIDmode;
44630 gen_load_even = gen_vec_setv16qi;
44631 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44632 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44633 inner_mode = QImode;
44634 first_imode = V8HImode;
44635 second_imode = V4SImode;
44636 third_imode = V2DImode;
44639 gcc_unreachable ();
44642 for (i = 0; i < n; i++)
44644 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44645 op0 = gen_reg_rtx (SImode);
44646 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44648 /* Insert the SImode value as low element of V4SImode vector. */
44649 op1 = gen_reg_rtx (V4SImode);
44650 op0 = gen_rtx_VEC_MERGE (V4SImode,
44651 gen_rtx_VEC_DUPLICATE (V4SImode,
44653 CONST0_RTX (V4SImode),
44655 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44657 /* Cast the V4SImode vector back to a vector in orignal mode. */
44658 op0 = gen_reg_rtx (mode);
44659 emit_move_insn (op0, gen_lowpart (mode, op1));
44661 /* Load even elements into the second position. */
44662 emit_insn (gen_load_even (op0,
44663 force_reg (inner_mode,
44667 /* Cast vector to FIRST_IMODE vector. */
44668 ops[i] = gen_reg_rtx (first_imode);
44669 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44672 /* Interleave low FIRST_IMODE vectors. */
44673 for (i = j = 0; i < n; i += 2, j++)
44675 op0 = gen_reg_rtx (first_imode);
44676 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44678 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44679 ops[j] = gen_reg_rtx (second_imode);
44680 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44683 /* Interleave low SECOND_IMODE vectors. */
44684 switch (second_imode)
44687 for (i = j = 0; i < n / 2; i += 2, j++)
44689 op0 = gen_reg_rtx (second_imode);
44690 emit_insn (gen_interleave_second_low (op0, ops[i],
44693 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44695 ops[j] = gen_reg_rtx (third_imode);
44696 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44698 second_imode = V2DImode;
44699 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44703 op0 = gen_reg_rtx (second_imode);
44704 emit_insn (gen_interleave_second_low (op0, ops[0],
44707 /* Cast the SECOND_IMODE vector back to a vector on original
44709 emit_insn (gen_rtx_SET (VOIDmode, target,
44710 gen_lowpart (mode, op0)));
44714 gcc_unreachable ();
44718 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44719 all values variable, and none identical. */
44722 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44723 rtx target, rtx vals)
44725 rtx ops[64], op0, op1, op2, op3, op4, op5;
44726 machine_mode half_mode = VOIDmode;
44727 machine_mode quarter_mode = VOIDmode;
44734 if (!mmx_ok && !TARGET_SSE)
44750 n = GET_MODE_NUNITS (mode);
44751 for (i = 0; i < n; i++)
44752 ops[i] = XVECEXP (vals, 0, i);
44753 ix86_expand_vector_init_concat (mode, target, ops, n);
44757 half_mode = V16QImode;
44761 half_mode = V8HImode;
44765 n = GET_MODE_NUNITS (mode);
44766 for (i = 0; i < n; i++)
44767 ops[i] = XVECEXP (vals, 0, i);
44768 op0 = gen_reg_rtx (half_mode);
44769 op1 = gen_reg_rtx (half_mode);
44770 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44772 ix86_expand_vector_init_interleave (half_mode, op1,
44773 &ops [n >> 1], n >> 2);
44774 emit_insn (gen_rtx_SET (VOIDmode, target,
44775 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44779 quarter_mode = V16QImode;
44780 half_mode = V32QImode;
44784 quarter_mode = V8HImode;
44785 half_mode = V16HImode;
44789 n = GET_MODE_NUNITS (mode);
44790 for (i = 0; i < n; i++)
44791 ops[i] = XVECEXP (vals, 0, i);
44792 op0 = gen_reg_rtx (quarter_mode);
44793 op1 = gen_reg_rtx (quarter_mode);
44794 op2 = gen_reg_rtx (quarter_mode);
44795 op3 = gen_reg_rtx (quarter_mode);
44796 op4 = gen_reg_rtx (half_mode);
44797 op5 = gen_reg_rtx (half_mode);
44798 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44800 ix86_expand_vector_init_interleave (quarter_mode, op1,
44801 &ops [n >> 2], n >> 3);
44802 ix86_expand_vector_init_interleave (quarter_mode, op2,
44803 &ops [n >> 1], n >> 3);
44804 ix86_expand_vector_init_interleave (quarter_mode, op3,
44805 &ops [(n >> 1) | (n >> 2)], n >> 3);
44806 emit_insn (gen_rtx_SET (VOIDmode, op4,
44807 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44808 emit_insn (gen_rtx_SET (VOIDmode, op5,
44809 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44810 emit_insn (gen_rtx_SET (VOIDmode, target,
44811 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44815 if (!TARGET_SSE4_1)
44823 /* Don't use ix86_expand_vector_init_interleave if we can't
44824 move from GPR to SSE register directly. */
44825 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44828 n = GET_MODE_NUNITS (mode);
44829 for (i = 0; i < n; i++)
44830 ops[i] = XVECEXP (vals, 0, i);
44831 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44839 gcc_unreachable ();
44843 int i, j, n_elts, n_words, n_elt_per_word;
44844 machine_mode inner_mode;
44845 rtx words[4], shift;
44847 inner_mode = GET_MODE_INNER (mode);
44848 n_elts = GET_MODE_NUNITS (mode);
44849 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44850 n_elt_per_word = n_elts / n_words;
44851 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44853 for (i = 0; i < n_words; ++i)
44855 rtx word = NULL_RTX;
44857 for (j = 0; j < n_elt_per_word; ++j)
44859 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44860 elt = convert_modes (word_mode, inner_mode, elt, true);
44866 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44867 word, 1, OPTAB_LIB_WIDEN);
44868 word = expand_simple_binop (word_mode, IOR, word, elt,
44869 word, 1, OPTAB_LIB_WIDEN);
44877 emit_move_insn (target, gen_lowpart (mode, words[0]));
44878 else if (n_words == 2)
44880 rtx tmp = gen_reg_rtx (mode);
44881 emit_clobber (tmp);
44882 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44883 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44884 emit_move_insn (target, tmp);
44886 else if (n_words == 4)
44888 rtx tmp = gen_reg_rtx (V4SImode);
44889 gcc_assert (word_mode == SImode);
44890 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44891 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44892 emit_move_insn (target, gen_lowpart (mode, tmp));
44895 gcc_unreachable ();
44899 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44900 instructions unless MMX_OK is true. */
44903 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44905 machine_mode mode = GET_MODE (target);
44906 machine_mode inner_mode = GET_MODE_INNER (mode);
44907 int n_elts = GET_MODE_NUNITS (mode);
44908 int n_var = 0, one_var = -1;
44909 bool all_same = true, all_const_zero = true;
44913 for (i = 0; i < n_elts; ++i)
44915 x = XVECEXP (vals, 0, i);
44916 if (!(CONST_INT_P (x)
44917 || GET_CODE (x) == CONST_DOUBLE
44918 || GET_CODE (x) == CONST_FIXED))
44919 n_var++, one_var = i;
44920 else if (x != CONST0_RTX (inner_mode))
44921 all_const_zero = false;
44922 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44926 /* Constants are best loaded from the constant pool. */
44929 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44933 /* If all values are identical, broadcast the value. */
44935 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44936 XVECEXP (vals, 0, 0)))
44939 /* Values where only one field is non-constant are best loaded from
44940 the pool and overwritten via move later. */
44944 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44945 XVECEXP (vals, 0, one_var),
44949 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44953 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44957 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44959 machine_mode mode = GET_MODE (target);
44960 machine_mode inner_mode = GET_MODE_INNER (mode);
44961 machine_mode half_mode;
44962 bool use_vec_merge = false;
44964 static rtx (*gen_extract[6][2]) (rtx, rtx)
44966 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44967 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44968 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44969 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44970 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44971 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44973 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44975 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44976 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44977 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44978 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44979 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44980 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44983 machine_mode mmode = VOIDmode;
44984 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44992 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44993 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44995 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44997 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44998 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45004 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
45008 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
45009 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
45011 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
45013 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
45014 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45021 /* For the two element vectors, we implement a VEC_CONCAT with
45022 the extraction of the other element. */
45024 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
45025 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
45028 op0 = val, op1 = tmp;
45030 op0 = tmp, op1 = val;
45032 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
45033 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45038 use_vec_merge = TARGET_SSE4_1;
45045 use_vec_merge = true;
45049 /* tmp = target = A B C D */
45050 tmp = copy_to_reg (target);
45051 /* target = A A B B */
45052 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
45053 /* target = X A B B */
45054 ix86_expand_vector_set (false, target, val, 0);
45055 /* target = A X C D */
45056 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45057 const1_rtx, const0_rtx,
45058 GEN_INT (2+4), GEN_INT (3+4)));
45062 /* tmp = target = A B C D */
45063 tmp = copy_to_reg (target);
45064 /* tmp = X B C D */
45065 ix86_expand_vector_set (false, tmp, val, 0);
45066 /* target = A B X D */
45067 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45068 const0_rtx, const1_rtx,
45069 GEN_INT (0+4), GEN_INT (3+4)));
45073 /* tmp = target = A B C D */
45074 tmp = copy_to_reg (target);
45075 /* tmp = X B C D */
45076 ix86_expand_vector_set (false, tmp, val, 0);
45077 /* target = A B X D */
45078 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45079 const0_rtx, const1_rtx,
45080 GEN_INT (2+4), GEN_INT (0+4)));
45084 gcc_unreachable ();
45089 use_vec_merge = TARGET_SSE4_1;
45093 /* Element 0 handled by vec_merge below. */
45096 use_vec_merge = true;
45102 /* With SSE2, use integer shuffles to swap element 0 and ELT,
45103 store into element 0, then shuffle them back. */
45107 order[0] = GEN_INT (elt);
45108 order[1] = const1_rtx;
45109 order[2] = const2_rtx;
45110 order[3] = GEN_INT (3);
45111 order[elt] = const0_rtx;
45113 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45114 order[1], order[2], order[3]));
45116 ix86_expand_vector_set (false, target, val, 0);
45118 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45119 order[1], order[2], order[3]));
45123 /* For SSE1, we have to reuse the V4SF code. */
45124 rtx t = gen_reg_rtx (V4SFmode);
45125 emit_move_insn (t, gen_lowpart (V4SFmode, target));
45126 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45127 emit_move_insn (target, gen_lowpart (mode, t));
45132 use_vec_merge = TARGET_SSE2;
45135 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45139 use_vec_merge = TARGET_SSE4_1;
45146 half_mode = V16QImode;
45152 half_mode = V8HImode;
45158 half_mode = V4SImode;
45164 half_mode = V2DImode;
45170 half_mode = V4SFmode;
45176 half_mode = V2DFmode;
45182 /* Compute offset. */
45186 gcc_assert (i <= 1);
45188 /* Extract the half. */
45189 tmp = gen_reg_rtx (half_mode);
45190 emit_insn (gen_extract[j][i] (tmp, target));
45192 /* Put val in tmp at elt. */
45193 ix86_expand_vector_set (false, tmp, val, elt);
45196 emit_insn (gen_insert[j][i] (target, target, tmp));
45200 if (TARGET_AVX512F)
45203 gen_blendm = gen_avx512f_blendmv8df;
45208 if (TARGET_AVX512F)
45211 gen_blendm = gen_avx512f_blendmv8di;
45216 if (TARGET_AVX512F)
45219 gen_blendm = gen_avx512f_blendmv16si;
45224 if (TARGET_AVX512F)
45227 gen_blendm = gen_avx512f_blendmv16si;
45232 if (TARGET_AVX512F && TARGET_AVX512BW)
45235 gen_blendm = gen_avx512bw_blendmv32hi;
45240 if (TARGET_AVX512F && TARGET_AVX512BW)
45243 gen_blendm = gen_avx512bw_blendmv64qi;
45251 if (mmode != VOIDmode)
45253 tmp = gen_reg_rtx (mode);
45254 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45255 gen_rtx_VEC_DUPLICATE (mode, val)));
45256 /* The avx512*_blendm<mode> expanders have different operand order
45257 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
45258 elements where the mask is set and second input operand otherwise,
45259 in {sse,avx}*_*blend* the first input operand is used for elements
45260 where the mask is clear and second input operand otherwise. */
45261 emit_insn (gen_blendm (target, target, tmp,
45263 gen_int_mode (1 << elt, mmode))));
45265 else if (use_vec_merge)
45267 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45268 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45269 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45273 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45275 emit_move_insn (mem, target);
45277 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45278 emit_move_insn (tmp, val);
45280 emit_move_insn (target, mem);
45285 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45287 machine_mode mode = GET_MODE (vec);
45288 machine_mode inner_mode = GET_MODE_INNER (mode);
45289 bool use_vec_extr = false;
45302 use_vec_extr = true;
45306 use_vec_extr = TARGET_SSE4_1;
45318 tmp = gen_reg_rtx (mode);
45319 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45320 GEN_INT (elt), GEN_INT (elt),
45321 GEN_INT (elt+4), GEN_INT (elt+4)));
45325 tmp = gen_reg_rtx (mode);
45326 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45330 gcc_unreachable ();
45333 use_vec_extr = true;
45338 use_vec_extr = TARGET_SSE4_1;
45352 tmp = gen_reg_rtx (mode);
45353 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45354 GEN_INT (elt), GEN_INT (elt),
45355 GEN_INT (elt), GEN_INT (elt)));
45359 tmp = gen_reg_rtx (mode);
45360 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45364 gcc_unreachable ();
45367 use_vec_extr = true;
45372 /* For SSE1, we have to reuse the V4SF code. */
45373 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45374 gen_lowpart (V4SFmode, vec), elt);
45380 use_vec_extr = TARGET_SSE2;
45383 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45387 use_vec_extr = TARGET_SSE4_1;
45393 tmp = gen_reg_rtx (V4SFmode);
45395 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45397 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45398 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45406 tmp = gen_reg_rtx (V2DFmode);
45408 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45410 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45411 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45419 tmp = gen_reg_rtx (V16QImode);
45421 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45423 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45424 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45432 tmp = gen_reg_rtx (V8HImode);
45434 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45436 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45437 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45445 tmp = gen_reg_rtx (V4SImode);
45447 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45449 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45450 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45458 tmp = gen_reg_rtx (V2DImode);
45460 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45462 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45463 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45469 if (TARGET_AVX512BW)
45471 tmp = gen_reg_rtx (V16HImode);
45473 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45475 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45476 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45482 if (TARGET_AVX512BW)
45484 tmp = gen_reg_rtx (V32QImode);
45486 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45488 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45489 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45495 tmp = gen_reg_rtx (V8SFmode);
45497 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45499 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45500 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45504 tmp = gen_reg_rtx (V4DFmode);
45506 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45508 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45509 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45513 tmp = gen_reg_rtx (V8SImode);
45515 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45517 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45518 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45522 tmp = gen_reg_rtx (V4DImode);
45524 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45526 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45527 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45531 /* ??? Could extract the appropriate HImode element and shift. */
45538 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45539 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45541 /* Let the rtl optimizers know about the zero extension performed. */
45542 if (inner_mode == QImode || inner_mode == HImode)
45544 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45545 target = gen_lowpart (SImode, target);
45548 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45552 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45554 emit_move_insn (mem, vec);
45556 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45557 emit_move_insn (target, tmp);
45561 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45562 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45563 The upper bits of DEST are undefined, though they shouldn't cause
45564 exceptions (some bits from src or all zeros are ok). */
45567 emit_reduc_half (rtx dest, rtx src, int i)
45570 switch (GET_MODE (src))
45574 tem = gen_sse_movhlps (dest, src, src);
45576 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45577 GEN_INT (1 + 4), GEN_INT (1 + 4));
45580 tem = gen_vec_interleave_highv2df (dest, src, src);
45586 d = gen_reg_rtx (V1TImode);
45587 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45592 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45594 tem = gen_avx_shufps256 (dest, src, src,
45595 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45599 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45601 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45609 if (GET_MODE (dest) != V4DImode)
45610 d = gen_reg_rtx (V4DImode);
45611 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45612 gen_lowpart (V4DImode, src),
45617 d = gen_reg_rtx (V2TImode);
45618 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45629 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45630 gen_lowpart (V16SImode, src),
45631 gen_lowpart (V16SImode, src),
45632 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45633 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45634 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45635 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45636 GEN_INT (0xC), GEN_INT (0xD),
45637 GEN_INT (0xE), GEN_INT (0xF),
45638 GEN_INT (0x10), GEN_INT (0x11),
45639 GEN_INT (0x12), GEN_INT (0x13),
45640 GEN_INT (0x14), GEN_INT (0x15),
45641 GEN_INT (0x16), GEN_INT (0x17));
45643 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45644 gen_lowpart (V16SImode, src),
45645 GEN_INT (i == 128 ? 0x2 : 0x1),
45649 GEN_INT (i == 128 ? 0x6 : 0x5),
45653 GEN_INT (i == 128 ? 0xA : 0x9),
45657 GEN_INT (i == 128 ? 0xE : 0xD),
45663 gcc_unreachable ();
45667 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45670 /* Expand a vector reduction. FN is the binary pattern to reduce;
45671 DEST is the destination; IN is the input vector. */
45674 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45676 rtx half, dst, vec = in;
45677 machine_mode mode = GET_MODE (in);
45680 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45682 && mode == V8HImode
45683 && fn == gen_uminv8hi3)
45685 emit_insn (gen_sse4_1_phminposuw (dest, in));
45689 for (i = GET_MODE_BITSIZE (mode);
45690 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45693 half = gen_reg_rtx (mode);
45694 emit_reduc_half (half, vec, i);
45695 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45698 dst = gen_reg_rtx (mode);
45699 emit_insn (fn (dst, half, vec));
45704 /* Target hook for scalar_mode_supported_p. */
45706 ix86_scalar_mode_supported_p (machine_mode mode)
45708 if (DECIMAL_FLOAT_MODE_P (mode))
45709 return default_decimal_float_supported_p ();
45710 else if (mode == TFmode)
45713 return default_scalar_mode_supported_p (mode);
45716 /* Implements target hook vector_mode_supported_p. */
45718 ix86_vector_mode_supported_p (machine_mode mode)
45720 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45722 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45724 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45726 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45728 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45730 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45735 /* Implement target hook libgcc_floating_mode_supported_p. */
45737 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45747 #ifdef IX86_NO_LIBGCC_TFMODE
45749 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45750 return TARGET_LONG_DOUBLE_128;
45760 /* Target hook for c_mode_for_suffix. */
45761 static machine_mode
45762 ix86_c_mode_for_suffix (char suffix)
45772 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45774 We do this in the new i386 backend to maintain source compatibility
45775 with the old cc0-based compiler. */
45778 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45780 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45782 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45787 /* Implements target vector targetm.asm.encode_section_info. */
45789 static void ATTRIBUTE_UNUSED
45790 ix86_encode_section_info (tree decl, rtx rtl, int first)
45792 default_encode_section_info (decl, rtl, first);
45794 if (ix86_in_large_data_p (decl))
45795 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45798 /* Worker function for REVERSE_CONDITION. */
45801 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45803 return (mode != CCFPmode && mode != CCFPUmode
45804 ? reverse_condition (code)
45805 : reverse_condition_maybe_unordered (code));
45808 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45812 output_387_reg_move (rtx insn, rtx *operands)
45814 if (REG_P (operands[0]))
45816 if (REG_P (operands[1])
45817 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45819 if (REGNO (operands[0]) == FIRST_STACK_REG)
45820 return output_387_ffreep (operands, 0);
45821 return "fstp\t%y0";
45823 if (STACK_TOP_P (operands[0]))
45824 return "fld%Z1\t%y1";
45827 else if (MEM_P (operands[0]))
45829 gcc_assert (REG_P (operands[1]));
45830 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45831 return "fstp%Z0\t%y0";
45834 /* There is no non-popping store to memory for XFmode.
45835 So if we need one, follow the store with a load. */
45836 if (GET_MODE (operands[0]) == XFmode)
45837 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45839 return "fst%Z0\t%y0";
45846 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45847 FP status register is set. */
45850 ix86_emit_fp_unordered_jump (rtx label)
45852 rtx reg = gen_reg_rtx (HImode);
45855 emit_insn (gen_x86_fnstsw_1 (reg));
45857 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45859 emit_insn (gen_x86_sahf_1 (reg));
45861 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45862 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45866 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45868 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45869 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45872 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45873 gen_rtx_LABEL_REF (VOIDmode, label),
45875 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45877 emit_jump_insn (temp);
45878 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45881 /* Output code to perform a log1p XFmode calculation. */
45883 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45885 rtx_code_label *label1 = gen_label_rtx ();
45886 rtx_code_label *label2 = gen_label_rtx ();
45888 rtx tmp = gen_reg_rtx (XFmode);
45889 rtx tmp2 = gen_reg_rtx (XFmode);
45892 emit_insn (gen_absxf2 (tmp, op1));
45893 test = gen_rtx_GE (VOIDmode, tmp,
45894 CONST_DOUBLE_FROM_REAL_VALUE (
45895 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45897 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45899 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45900 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45901 emit_jump (label2);
45903 emit_label (label1);
45904 emit_move_insn (tmp, CONST1_RTX (XFmode));
45905 emit_insn (gen_addxf3 (tmp, op1, tmp));
45906 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45907 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45909 emit_label (label2);
45912 /* Emit code for round calculation. */
45913 void ix86_emit_i387_round (rtx op0, rtx op1)
45915 machine_mode inmode = GET_MODE (op1);
45916 machine_mode outmode = GET_MODE (op0);
45917 rtx e1, e2, res, tmp, tmp1, half;
45918 rtx scratch = gen_reg_rtx (HImode);
45919 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45920 rtx_code_label *jump_label = gen_label_rtx ();
45922 rtx (*gen_abs) (rtx, rtx);
45923 rtx (*gen_neg) (rtx, rtx);
45928 gen_abs = gen_abssf2;
45931 gen_abs = gen_absdf2;
45934 gen_abs = gen_absxf2;
45937 gcc_unreachable ();
45943 gen_neg = gen_negsf2;
45946 gen_neg = gen_negdf2;
45949 gen_neg = gen_negxf2;
45952 gen_neg = gen_neghi2;
45955 gen_neg = gen_negsi2;
45958 gen_neg = gen_negdi2;
45961 gcc_unreachable ();
45964 e1 = gen_reg_rtx (inmode);
45965 e2 = gen_reg_rtx (inmode);
45966 res = gen_reg_rtx (outmode);
45968 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45970 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45972 /* scratch = fxam(op1) */
45973 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45974 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45976 /* e1 = fabs(op1) */
45977 emit_insn (gen_abs (e1, op1));
45979 /* e2 = e1 + 0.5 */
45980 half = force_reg (inmode, half);
45981 emit_insn (gen_rtx_SET (VOIDmode, e2,
45982 gen_rtx_PLUS (inmode, e1, half)));
45984 /* res = floor(e2) */
45985 if (inmode != XFmode)
45987 tmp1 = gen_reg_rtx (XFmode);
45989 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45990 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
46000 rtx tmp0 = gen_reg_rtx (XFmode);
46002 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46004 emit_insn (gen_rtx_SET (VOIDmode, res,
46005 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46006 UNSPEC_TRUNC_NOOP)));
46010 emit_insn (gen_frndintxf2_floor (res, tmp1));
46013 emit_insn (gen_lfloorxfhi2 (res, tmp1));
46016 emit_insn (gen_lfloorxfsi2 (res, tmp1));
46019 emit_insn (gen_lfloorxfdi2 (res, tmp1));
46022 gcc_unreachable ();
46025 /* flags = signbit(a) */
46026 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46028 /* if (flags) then res = -res */
46029 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46030 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46031 gen_rtx_LABEL_REF (VOIDmode, jump_label),
46033 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46034 predict_jump (REG_BR_PROB_BASE * 50 / 100);
46035 JUMP_LABEL (insn) = jump_label;
46037 emit_insn (gen_neg (res, res));
46039 emit_label (jump_label);
46040 LABEL_NUSES (jump_label) = 1;
46042 emit_move_insn (op0, res);
46045 /* Output code to perform a Newton-Rhapson approximation of a single precision
46046 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
46048 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46050 rtx x0, x1, e0, e1;
46052 x0 = gen_reg_rtx (mode);
46053 e0 = gen_reg_rtx (mode);
46054 e1 = gen_reg_rtx (mode);
46055 x1 = gen_reg_rtx (mode);
46057 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46059 b = force_reg (mode, b);
46061 /* x0 = rcp(b) estimate */
46062 if (mode == V16SFmode || mode == V8DFmode)
46063 emit_insn (gen_rtx_SET (VOIDmode, x0,
46064 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46067 emit_insn (gen_rtx_SET (VOIDmode, x0,
46068 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46072 emit_insn (gen_rtx_SET (VOIDmode, e0,
46073 gen_rtx_MULT (mode, x0, b)));
46076 emit_insn (gen_rtx_SET (VOIDmode, e0,
46077 gen_rtx_MULT (mode, x0, e0)));
46080 emit_insn (gen_rtx_SET (VOIDmode, e1,
46081 gen_rtx_PLUS (mode, x0, x0)));
46084 emit_insn (gen_rtx_SET (VOIDmode, x1,
46085 gen_rtx_MINUS (mode, e1, e0)));
46088 emit_insn (gen_rtx_SET (VOIDmode, res,
46089 gen_rtx_MULT (mode, a, x1)));
46092 /* Output code to perform a Newton-Rhapson approximation of a
46093 single precision floating point [reciprocal] square root. */
46095 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46098 rtx x0, e0, e1, e2, e3, mthree, mhalf;
46102 x0 = gen_reg_rtx (mode);
46103 e0 = gen_reg_rtx (mode);
46104 e1 = gen_reg_rtx (mode);
46105 e2 = gen_reg_rtx (mode);
46106 e3 = gen_reg_rtx (mode);
46108 real_from_integer (&r, VOIDmode, -3, SIGNED);
46109 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46111 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46112 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46113 unspec = UNSPEC_RSQRT;
46115 if (VECTOR_MODE_P (mode))
46117 mthree = ix86_build_const_vector (mode, true, mthree);
46118 mhalf = ix86_build_const_vector (mode, true, mhalf);
46119 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46120 if (GET_MODE_SIZE (mode) == 64)
46121 unspec = UNSPEC_RSQRT14;
46124 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46125 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46127 a = force_reg (mode, a);
46129 /* x0 = rsqrt(a) estimate */
46130 emit_insn (gen_rtx_SET (VOIDmode, x0,
46131 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46134 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46139 zero = gen_reg_rtx (mode);
46140 mask = gen_reg_rtx (mode);
46142 zero = force_reg (mode, CONST0_RTX(mode));
46144 /* Handle masked compare. */
46145 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46147 mask = gen_reg_rtx (HImode);
46148 /* Imm value 0x4 corresponds to not-equal comparison. */
46149 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46150 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46154 emit_insn (gen_rtx_SET (VOIDmode, mask,
46155 gen_rtx_NE (mode, zero, a)));
46157 emit_insn (gen_rtx_SET (VOIDmode, x0,
46158 gen_rtx_AND (mode, x0, mask)));
46163 emit_insn (gen_rtx_SET (VOIDmode, e0,
46164 gen_rtx_MULT (mode, x0, a)));
46166 emit_insn (gen_rtx_SET (VOIDmode, e1,
46167 gen_rtx_MULT (mode, e0, x0)));
46170 mthree = force_reg (mode, mthree);
46171 emit_insn (gen_rtx_SET (VOIDmode, e2,
46172 gen_rtx_PLUS (mode, e1, mthree)));
46174 mhalf = force_reg (mode, mhalf);
46176 /* e3 = -.5 * x0 */
46177 emit_insn (gen_rtx_SET (VOIDmode, e3,
46178 gen_rtx_MULT (mode, x0, mhalf)));
46180 /* e3 = -.5 * e0 */
46181 emit_insn (gen_rtx_SET (VOIDmode, e3,
46182 gen_rtx_MULT (mode, e0, mhalf)));
46183 /* ret = e2 * e3 */
46184 emit_insn (gen_rtx_SET (VOIDmode, res,
46185 gen_rtx_MULT (mode, e2, e3)));
46188 #ifdef TARGET_SOLARIS
46189 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46192 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46195 /* With Binutils 2.15, the "@unwind" marker must be specified on
46196 every occurrence of the ".eh_frame" section, not just the first
46199 && strcmp (name, ".eh_frame") == 0)
46201 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46202 flags & SECTION_WRITE ? "aw" : "a");
46207 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46209 solaris_elf_asm_comdat_section (name, flags, decl);
46214 default_elf_asm_named_section (name, flags, decl);
46216 #endif /* TARGET_SOLARIS */
46218 /* Return the mangling of TYPE if it is an extended fundamental type. */
46220 static const char *
46221 ix86_mangle_type (const_tree type)
46223 type = TYPE_MAIN_VARIANT (type);
46225 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46226 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46229 switch (TYPE_MODE (type))
46232 /* __float128 is "g". */
46235 /* "long double" or __float80 is "e". */
46242 /* For 32-bit code we can save PIC register setup by using
46243 __stack_chk_fail_local hidden function instead of calling
46244 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46245 register, so it is better to call __stack_chk_fail directly. */
46247 static tree ATTRIBUTE_UNUSED
46248 ix86_stack_protect_fail (void)
46250 return TARGET_64BIT
46251 ? default_external_stack_protect_fail ()
46252 : default_hidden_stack_protect_fail ();
46255 /* Select a format to encode pointers in exception handling data. CODE
46256 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46257 true if the symbol may be affected by dynamic relocations.
46259 ??? All x86 object file formats are capable of representing this.
46260 After all, the relocation needed is the same as for the call insn.
46261 Whether or not a particular assembler allows us to enter such, I
46262 guess we'll have to see. */
46264 asm_preferred_eh_data_format (int code, int global)
46268 int type = DW_EH_PE_sdata8;
46270 || ix86_cmodel == CM_SMALL_PIC
46271 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46272 type = DW_EH_PE_sdata4;
46273 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46275 if (ix86_cmodel == CM_SMALL
46276 || (ix86_cmodel == CM_MEDIUM && code))
46277 return DW_EH_PE_udata4;
46278 return DW_EH_PE_absptr;
46281 /* Expand copysign from SIGN to the positive value ABS_VALUE
46282 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46285 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46287 machine_mode mode = GET_MODE (sign);
46288 rtx sgn = gen_reg_rtx (mode);
46289 if (mask == NULL_RTX)
46291 machine_mode vmode;
46293 if (mode == SFmode)
46295 else if (mode == DFmode)
46300 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46301 if (!VECTOR_MODE_P (mode))
46303 /* We need to generate a scalar mode mask in this case. */
46304 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46305 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46306 mask = gen_reg_rtx (mode);
46307 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46311 mask = gen_rtx_NOT (mode, mask);
46312 emit_insn (gen_rtx_SET (VOIDmode, sgn,
46313 gen_rtx_AND (mode, mask, sign)));
46314 emit_insn (gen_rtx_SET (VOIDmode, result,
46315 gen_rtx_IOR (mode, abs_value, sgn)));
46318 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46319 mask for masking out the sign-bit is stored in *SMASK, if that is
46322 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46324 machine_mode vmode, mode = GET_MODE (op0);
46327 xa = gen_reg_rtx (mode);
46328 if (mode == SFmode)
46330 else if (mode == DFmode)
46334 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46335 if (!VECTOR_MODE_P (mode))
46337 /* We need to generate a scalar mode mask in this case. */
46338 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46339 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46340 mask = gen_reg_rtx (mode);
46341 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46343 emit_insn (gen_rtx_SET (VOIDmode, xa,
46344 gen_rtx_AND (mode, op0, mask)));
46352 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46353 swapping the operands if SWAP_OPERANDS is true. The expanded
46354 code is a forward jump to a newly created label in case the
46355 comparison is true. The generated label rtx is returned. */
46356 static rtx_code_label *
46357 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46358 bool swap_operands)
46360 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46361 rtx_code_label *label;
46365 std::swap (op0, op1);
46367 label = gen_label_rtx ();
46368 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46369 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46370 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46371 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46372 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46373 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46374 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46375 JUMP_LABEL (tmp) = label;
46380 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46381 using comparison code CODE. Operands are swapped for the comparison if
46382 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46384 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46385 bool swap_operands)
46387 rtx (*insn)(rtx, rtx, rtx, rtx);
46388 machine_mode mode = GET_MODE (op0);
46389 rtx mask = gen_reg_rtx (mode);
46392 std::swap (op0, op1);
46394 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46396 emit_insn (insn (mask, op0, op1,
46397 gen_rtx_fmt_ee (code, mode, op0, op1)));
46401 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46402 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46404 ix86_gen_TWO52 (machine_mode mode)
46406 REAL_VALUE_TYPE TWO52r;
46409 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46410 TWO52 = const_double_from_real_value (TWO52r, mode);
46411 TWO52 = force_reg (mode, TWO52);
46416 /* Expand SSE sequence for computing lround from OP1 storing
46419 ix86_expand_lround (rtx op0, rtx op1)
46421 /* C code for the stuff we're doing below:
46422 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46425 machine_mode mode = GET_MODE (op1);
46426 const struct real_format *fmt;
46427 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46430 /* load nextafter (0.5, 0.0) */
46431 fmt = REAL_MODE_FORMAT (mode);
46432 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46433 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46435 /* adj = copysign (0.5, op1) */
46436 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46437 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46439 /* adj = op1 + adj */
46440 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46442 /* op0 = (imode)adj */
46443 expand_fix (op0, adj, 0);
46446 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46449 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46451 /* C code for the stuff we're doing below (for do_floor):
46453 xi -= (double)xi > op1 ? 1 : 0;
46456 machine_mode fmode = GET_MODE (op1);
46457 machine_mode imode = GET_MODE (op0);
46458 rtx ireg, freg, tmp;
46459 rtx_code_label *label;
46461 /* reg = (long)op1 */
46462 ireg = gen_reg_rtx (imode);
46463 expand_fix (ireg, op1, 0);
46465 /* freg = (double)reg */
46466 freg = gen_reg_rtx (fmode);
46467 expand_float (freg, ireg, 0);
46469 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46470 label = ix86_expand_sse_compare_and_jump (UNLE,
46471 freg, op1, !do_floor);
46472 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46473 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46474 emit_move_insn (ireg, tmp);
46476 emit_label (label);
46477 LABEL_NUSES (label) = 1;
46479 emit_move_insn (op0, ireg);
46482 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46483 result in OPERAND0. */
46485 ix86_expand_rint (rtx operand0, rtx operand1)
46487 /* C code for the stuff we're doing below:
46488 xa = fabs (operand1);
46489 if (!isless (xa, 2**52))
46491 xa = xa + 2**52 - 2**52;
46492 return copysign (xa, operand1);
46494 machine_mode mode = GET_MODE (operand0);
46495 rtx res, xa, TWO52, mask;
46496 rtx_code_label *label;
46498 res = gen_reg_rtx (mode);
46499 emit_move_insn (res, operand1);
46501 /* xa = abs (operand1) */
46502 xa = ix86_expand_sse_fabs (res, &mask);
46504 /* if (!isless (xa, TWO52)) goto label; */
46505 TWO52 = ix86_gen_TWO52 (mode);
46506 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46508 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46509 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46511 ix86_sse_copysign_to_positive (res, xa, res, mask);
46513 emit_label (label);
46514 LABEL_NUSES (label) = 1;
46516 emit_move_insn (operand0, res);
46519 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46522 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46524 /* C code for the stuff we expand below.
46525 double xa = fabs (x), x2;
46526 if (!isless (xa, TWO52))
46528 xa = xa + TWO52 - TWO52;
46529 x2 = copysign (xa, x);
46538 machine_mode mode = GET_MODE (operand0);
46539 rtx xa, TWO52, tmp, one, res, mask;
46540 rtx_code_label *label;
46542 TWO52 = ix86_gen_TWO52 (mode);
46544 /* Temporary for holding the result, initialized to the input
46545 operand to ease control flow. */
46546 res = gen_reg_rtx (mode);
46547 emit_move_insn (res, operand1);
46549 /* xa = abs (operand1) */
46550 xa = ix86_expand_sse_fabs (res, &mask);
46552 /* if (!isless (xa, TWO52)) goto label; */
46553 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46555 /* xa = xa + TWO52 - TWO52; */
46556 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46557 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46559 /* xa = copysign (xa, operand1) */
46560 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46562 /* generate 1.0 or -1.0 */
46563 one = force_reg (mode,
46564 const_double_from_real_value (do_floor
46565 ? dconst1 : dconstm1, mode));
46567 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46568 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46569 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46570 gen_rtx_AND (mode, one, tmp)));
46571 /* We always need to subtract here to preserve signed zero. */
46572 tmp = expand_simple_binop (mode, MINUS,
46573 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46574 emit_move_insn (res, tmp);
46576 emit_label (label);
46577 LABEL_NUSES (label) = 1;
46579 emit_move_insn (operand0, res);
46582 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46585 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46587 /* C code for the stuff we expand below.
46588 double xa = fabs (x), x2;
46589 if (!isless (xa, TWO52))
46591 x2 = (double)(long)x;
46598 if (HONOR_SIGNED_ZEROS (mode))
46599 return copysign (x2, x);
46602 machine_mode mode = GET_MODE (operand0);
46603 rtx xa, xi, TWO52, tmp, one, res, mask;
46604 rtx_code_label *label;
46606 TWO52 = ix86_gen_TWO52 (mode);
46608 /* Temporary for holding the result, initialized to the input
46609 operand to ease control flow. */
46610 res = gen_reg_rtx (mode);
46611 emit_move_insn (res, operand1);
46613 /* xa = abs (operand1) */
46614 xa = ix86_expand_sse_fabs (res, &mask);
46616 /* if (!isless (xa, TWO52)) goto label; */
46617 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46619 /* xa = (double)(long)x */
46620 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46621 expand_fix (xi, res, 0);
46622 expand_float (xa, xi, 0);
46625 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46627 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46628 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46629 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46630 gen_rtx_AND (mode, one, tmp)));
46631 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46632 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46633 emit_move_insn (res, tmp);
46635 if (HONOR_SIGNED_ZEROS (mode))
46636 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46638 emit_label (label);
46639 LABEL_NUSES (label) = 1;
46641 emit_move_insn (operand0, res);
46644 /* Expand SSE sequence for computing round from OPERAND1 storing
46645 into OPERAND0. Sequence that works without relying on DImode truncation
46646 via cvttsd2siq that is only available on 64bit targets. */
46648 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46650 /* C code for the stuff we expand below.
46651 double xa = fabs (x), xa2, x2;
46652 if (!isless (xa, TWO52))
46654 Using the absolute value and copying back sign makes
46655 -0.0 -> -0.0 correct.
46656 xa2 = xa + TWO52 - TWO52;
46661 else if (dxa > 0.5)
46663 x2 = copysign (xa2, x);
46666 machine_mode mode = GET_MODE (operand0);
46667 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46668 rtx_code_label *label;
46670 TWO52 = ix86_gen_TWO52 (mode);
46672 /* Temporary for holding the result, initialized to the input
46673 operand to ease control flow. */
46674 res = gen_reg_rtx (mode);
46675 emit_move_insn (res, operand1);
46677 /* xa = abs (operand1) */
46678 xa = ix86_expand_sse_fabs (res, &mask);
46680 /* if (!isless (xa, TWO52)) goto label; */
46681 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46683 /* xa2 = xa + TWO52 - TWO52; */
46684 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46685 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46687 /* dxa = xa2 - xa; */
46688 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46690 /* generate 0.5, 1.0 and -0.5 */
46691 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46692 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46693 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46697 tmp = gen_reg_rtx (mode);
46698 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46699 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46700 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46701 gen_rtx_AND (mode, one, tmp)));
46702 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46703 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46704 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46705 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46706 gen_rtx_AND (mode, one, tmp)));
46707 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46709 /* res = copysign (xa2, operand1) */
46710 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46712 emit_label (label);
46713 LABEL_NUSES (label) = 1;
46715 emit_move_insn (operand0, res);
46718 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46721 ix86_expand_trunc (rtx operand0, rtx operand1)
46723 /* C code for SSE variant we expand below.
46724 double xa = fabs (x), x2;
46725 if (!isless (xa, TWO52))
46727 x2 = (double)(long)x;
46728 if (HONOR_SIGNED_ZEROS (mode))
46729 return copysign (x2, x);
46732 machine_mode mode = GET_MODE (operand0);
46733 rtx xa, xi, TWO52, res, mask;
46734 rtx_code_label *label;
46736 TWO52 = ix86_gen_TWO52 (mode);
46738 /* Temporary for holding the result, initialized to the input
46739 operand to ease control flow. */
46740 res = gen_reg_rtx (mode);
46741 emit_move_insn (res, operand1);
46743 /* xa = abs (operand1) */
46744 xa = ix86_expand_sse_fabs (res, &mask);
46746 /* if (!isless (xa, TWO52)) goto label; */
46747 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46749 /* x = (double)(long)x */
46750 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46751 expand_fix (xi, res, 0);
46752 expand_float (res, xi, 0);
46754 if (HONOR_SIGNED_ZEROS (mode))
46755 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46757 emit_label (label);
46758 LABEL_NUSES (label) = 1;
46760 emit_move_insn (operand0, res);
46763 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46766 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46768 machine_mode mode = GET_MODE (operand0);
46769 rtx xa, mask, TWO52, one, res, smask, tmp;
46770 rtx_code_label *label;
46772 /* C code for SSE variant we expand below.
46773 double xa = fabs (x), x2;
46774 if (!isless (xa, TWO52))
46776 xa2 = xa + TWO52 - TWO52;
46780 x2 = copysign (xa2, x);
46784 TWO52 = ix86_gen_TWO52 (mode);
46786 /* Temporary for holding the result, initialized to the input
46787 operand to ease control flow. */
46788 res = gen_reg_rtx (mode);
46789 emit_move_insn (res, operand1);
46791 /* xa = abs (operand1) */
46792 xa = ix86_expand_sse_fabs (res, &smask);
46794 /* if (!isless (xa, TWO52)) goto label; */
46795 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46797 /* res = xa + TWO52 - TWO52; */
46798 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46799 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46800 emit_move_insn (res, tmp);
46803 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46805 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46806 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46807 emit_insn (gen_rtx_SET (VOIDmode, mask,
46808 gen_rtx_AND (mode, mask, one)));
46809 tmp = expand_simple_binop (mode, MINUS,
46810 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46811 emit_move_insn (res, tmp);
46813 /* res = copysign (res, operand1) */
46814 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46816 emit_label (label);
46817 LABEL_NUSES (label) = 1;
46819 emit_move_insn (operand0, res);
46822 /* Expand SSE sequence for computing round from OPERAND1 storing
46825 ix86_expand_round (rtx operand0, rtx operand1)
46827 /* C code for the stuff we're doing below:
46828 double xa = fabs (x);
46829 if (!isless (xa, TWO52))
46831 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46832 return copysign (xa, x);
46834 machine_mode mode = GET_MODE (operand0);
46835 rtx res, TWO52, xa, xi, half, mask;
46836 rtx_code_label *label;
46837 const struct real_format *fmt;
46838 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46840 /* Temporary for holding the result, initialized to the input
46841 operand to ease control flow. */
46842 res = gen_reg_rtx (mode);
46843 emit_move_insn (res, operand1);
46845 TWO52 = ix86_gen_TWO52 (mode);
46846 xa = ix86_expand_sse_fabs (res, &mask);
46847 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46849 /* load nextafter (0.5, 0.0) */
46850 fmt = REAL_MODE_FORMAT (mode);
46851 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46852 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46854 /* xa = xa + 0.5 */
46855 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46856 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46858 /* xa = (double)(int64_t)xa */
46859 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46860 expand_fix (xi, xa, 0);
46861 expand_float (xa, xi, 0);
46863 /* res = copysign (xa, operand1) */
46864 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46866 emit_label (label);
46867 LABEL_NUSES (label) = 1;
46869 emit_move_insn (operand0, res);
46872 /* Expand SSE sequence for computing round
46873 from OP1 storing into OP0 using sse4 round insn. */
46875 ix86_expand_round_sse4 (rtx op0, rtx op1)
46877 machine_mode mode = GET_MODE (op0);
46878 rtx e1, e2, res, half;
46879 const struct real_format *fmt;
46880 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46881 rtx (*gen_copysign) (rtx, rtx, rtx);
46882 rtx (*gen_round) (rtx, rtx, rtx);
46887 gen_copysign = gen_copysignsf3;
46888 gen_round = gen_sse4_1_roundsf2;
46891 gen_copysign = gen_copysigndf3;
46892 gen_round = gen_sse4_1_rounddf2;
46895 gcc_unreachable ();
46898 /* round (a) = trunc (a + copysign (0.5, a)) */
46900 /* load nextafter (0.5, 0.0) */
46901 fmt = REAL_MODE_FORMAT (mode);
46902 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46903 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46904 half = const_double_from_real_value (pred_half, mode);
46906 /* e1 = copysign (0.5, op1) */
46907 e1 = gen_reg_rtx (mode);
46908 emit_insn (gen_copysign (e1, half, op1));
46910 /* e2 = op1 + e1 */
46911 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46913 /* res = trunc (e2) */
46914 res = gen_reg_rtx (mode);
46915 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46917 emit_move_insn (op0, res);
46921 /* Table of valid machine attributes. */
46922 static const struct attribute_spec ix86_attribute_table[] =
46924 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46925 affects_type_identity } */
46926 /* Stdcall attribute says callee is responsible for popping arguments
46927 if they are not variable. */
46928 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46930 /* Fastcall attribute says callee is responsible for popping arguments
46931 if they are not variable. */
46932 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46934 /* Thiscall attribute says callee is responsible for popping arguments
46935 if they are not variable. */
46936 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46938 /* Cdecl attribute says the callee is a normal C declaration */
46939 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46941 /* Regparm attribute specifies how many integer arguments are to be
46942 passed in registers. */
46943 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46945 /* Sseregparm attribute says we are using x86_64 calling conventions
46946 for FP arguments. */
46947 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46949 /* The transactional memory builtins are implicitly regparm or fastcall
46950 depending on the ABI. Override the generic do-nothing attribute that
46951 these builtins were declared with. */
46952 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46954 /* force_align_arg_pointer says this function realigns the stack at entry. */
46955 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46956 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
46957 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46958 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46959 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46960 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46963 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46965 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46967 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46968 SUBTARGET_ATTRIBUTE_TABLE,
46970 /* ms_abi and sysv_abi calling convention function attributes. */
46971 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46972 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46973 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46975 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46976 ix86_handle_callee_pop_aggregate_return, true },
46978 { NULL, 0, 0, false, false, false, NULL, false }
46981 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46983 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46988 switch (type_of_cost)
46991 return ix86_cost->scalar_stmt_cost;
46994 return ix86_cost->scalar_load_cost;
46997 return ix86_cost->scalar_store_cost;
47000 return ix86_cost->vec_stmt_cost;
47003 return ix86_cost->vec_align_load_cost;
47006 return ix86_cost->vec_store_cost;
47008 case vec_to_scalar:
47009 return ix86_cost->vec_to_scalar_cost;
47011 case scalar_to_vec:
47012 return ix86_cost->scalar_to_vec_cost;
47014 case unaligned_load:
47015 case unaligned_store:
47016 return ix86_cost->vec_unalign_load_cost;
47018 case cond_branch_taken:
47019 return ix86_cost->cond_taken_branch_cost;
47021 case cond_branch_not_taken:
47022 return ix86_cost->cond_not_taken_branch_cost;
47025 case vec_promote_demote:
47026 return ix86_cost->vec_stmt_cost;
47028 case vec_construct:
47029 elements = TYPE_VECTOR_SUBPARTS (vectype);
47030 return elements / 2 + 1;
47033 gcc_unreachable ();
47037 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47038 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47039 insn every time. */
47041 static GTY(()) rtx_insn *vselect_insn;
47043 /* Initialize vselect_insn. */
47046 init_vselect_insn (void)
47051 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47052 for (i = 0; i < MAX_VECT_LEN; ++i)
47053 XVECEXP (x, 0, i) = const0_rtx;
47054 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47056 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
47058 vselect_insn = emit_insn (x);
47062 /* Construct (set target (vec_select op0 (parallel perm))) and
47063 return true if that's a valid instruction in the active ISA. */
47066 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47067 unsigned nelt, bool testing_p)
47070 rtx x, save_vconcat;
47073 if (vselect_insn == NULL_RTX)
47074 init_vselect_insn ();
47076 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47077 PUT_NUM_ELEM (XVEC (x, 0), nelt);
47078 for (i = 0; i < nelt; ++i)
47079 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47080 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47081 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47082 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47083 SET_DEST (PATTERN (vselect_insn)) = target;
47084 icode = recog_memoized (vselect_insn);
47086 if (icode >= 0 && !testing_p)
47087 emit_insn (copy_rtx (PATTERN (vselect_insn)));
47089 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47090 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47091 INSN_CODE (vselect_insn) = -1;
47096 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47099 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47100 const unsigned char *perm, unsigned nelt,
47103 machine_mode v2mode;
47107 if (vselect_insn == NULL_RTX)
47108 init_vselect_insn ();
47110 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47111 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47112 PUT_MODE (x, v2mode);
47115 ok = expand_vselect (target, x, perm, nelt, testing_p);
47116 XEXP (x, 0) = const0_rtx;
47117 XEXP (x, 1) = const0_rtx;
47121 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47122 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47125 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47127 machine_mode mmode, vmode = d->vmode;
47128 unsigned i, mask, nelt = d->nelt;
47129 rtx target, op0, op1, maskop, x;
47130 rtx rperm[32], vperm;
47132 if (d->one_operand_p)
47134 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47135 && (TARGET_AVX512BW
47136 || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
47138 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47140 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47142 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47147 /* This is a blend, not a permute. Elements must stay in their
47148 respective lanes. */
47149 for (i = 0; i < nelt; ++i)
47151 unsigned e = d->perm[i];
47152 if (!(e == i || e == i + nelt))
47159 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47160 decision should be extracted elsewhere, so that we only try that
47161 sequence once all budget==3 options have been tried. */
47162 target = d->target;
47181 for (i = 0; i < nelt; ++i)
47182 mask |= (d->perm[i] >= nelt) << i;
47186 for (i = 0; i < 2; ++i)
47187 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47192 for (i = 0; i < 4; ++i)
47193 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47198 /* See if bytes move in pairs so we can use pblendw with
47199 an immediate argument, rather than pblendvb with a vector
47201 for (i = 0; i < 16; i += 2)
47202 if (d->perm[i] + 1 != d->perm[i + 1])
47205 for (i = 0; i < nelt; ++i)
47206 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47209 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47210 vperm = force_reg (vmode, vperm);
47212 if (GET_MODE_SIZE (vmode) == 16)
47213 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47215 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47216 if (target != d->target)
47217 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47221 for (i = 0; i < 8; ++i)
47222 mask |= (d->perm[i * 2] >= 16) << i;
47227 target = gen_reg_rtx (vmode);
47228 op0 = gen_lowpart (vmode, op0);
47229 op1 = gen_lowpart (vmode, op1);
47233 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47234 for (i = 0; i < 32; i += 2)
47235 if (d->perm[i] + 1 != d->perm[i + 1])
47237 /* See if bytes move in quadruplets. If yes, vpblendd
47238 with immediate can be used. */
47239 for (i = 0; i < 32; i += 4)
47240 if (d->perm[i] + 2 != d->perm[i + 2])
47244 /* See if bytes move the same in both lanes. If yes,
47245 vpblendw with immediate can be used. */
47246 for (i = 0; i < 16; i += 2)
47247 if (d->perm[i] + 16 != d->perm[i + 16])
47250 /* Use vpblendw. */
47251 for (i = 0; i < 16; ++i)
47252 mask |= (d->perm[i * 2] >= 32) << i;
47257 /* Use vpblendd. */
47258 for (i = 0; i < 8; ++i)
47259 mask |= (d->perm[i * 4] >= 32) << i;
47264 /* See if words move in pairs. If yes, vpblendd can be used. */
47265 for (i = 0; i < 16; i += 2)
47266 if (d->perm[i] + 1 != d->perm[i + 1])
47270 /* See if words move the same in both lanes. If not,
47271 vpblendvb must be used. */
47272 for (i = 0; i < 8; i++)
47273 if (d->perm[i] + 8 != d->perm[i + 8])
47275 /* Use vpblendvb. */
47276 for (i = 0; i < 32; ++i)
47277 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47281 target = gen_reg_rtx (vmode);
47282 op0 = gen_lowpart (vmode, op0);
47283 op1 = gen_lowpart (vmode, op1);
47284 goto finish_pblendvb;
47287 /* Use vpblendw. */
47288 for (i = 0; i < 16; ++i)
47289 mask |= (d->perm[i] >= 16) << i;
47293 /* Use vpblendd. */
47294 for (i = 0; i < 8; ++i)
47295 mask |= (d->perm[i * 2] >= 16) << i;
47300 /* Use vpblendd. */
47301 for (i = 0; i < 4; ++i)
47302 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47307 gcc_unreachable ();
47330 if (mmode != VOIDmode)
47331 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47333 maskop = GEN_INT (mask);
47335 /* This matches five different patterns with the different modes. */
47336 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47337 x = gen_rtx_SET (VOIDmode, target, x);
47339 if (target != d->target)
47340 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47345 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47346 in terms of the variable form of vpermilps.
47348 Note that we will have already failed the immediate input vpermilps,
47349 which requires that the high and low part shuffle be identical; the
47350 variable form doesn't require that. */
47353 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47355 rtx rperm[8], vperm;
47358 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47361 /* We can only permute within the 128-bit lane. */
47362 for (i = 0; i < 8; ++i)
47364 unsigned e = d->perm[i];
47365 if (i < 4 ? e >= 4 : e < 4)
47372 for (i = 0; i < 8; ++i)
47374 unsigned e = d->perm[i];
47376 /* Within each 128-bit lane, the elements of op0 are numbered
47377 from 0 and the elements of op1 are numbered from 4. */
47383 rperm[i] = GEN_INT (e);
47386 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47387 vperm = force_reg (V8SImode, vperm);
47388 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47393 /* Return true if permutation D can be performed as VMODE permutation
47397 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47399 unsigned int i, j, chunk;
47401 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47402 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47403 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47406 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47409 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47410 for (i = 0; i < d->nelt; i += chunk)
47411 if (d->perm[i] & (chunk - 1))
47414 for (j = 1; j < chunk; ++j)
47415 if (d->perm[i] + j != d->perm[i + j])
47421 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47422 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47425 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47427 unsigned i, nelt, eltsz, mask;
47428 unsigned char perm[64];
47429 machine_mode vmode = V16QImode;
47430 rtx rperm[64], vperm, target, op0, op1;
47434 if (!d->one_operand_p)
47436 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47439 && valid_perm_using_mode_p (V2TImode, d))
47444 /* Use vperm2i128 insn. The pattern uses
47445 V4DImode instead of V2TImode. */
47446 target = d->target;
47447 if (d->vmode != V4DImode)
47448 target = gen_reg_rtx (V4DImode);
47449 op0 = gen_lowpart (V4DImode, d->op0);
47450 op1 = gen_lowpart (V4DImode, d->op1);
47452 = GEN_INT ((d->perm[0] / (nelt / 2))
47453 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47454 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47455 if (target != d->target)
47456 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47464 if (GET_MODE_SIZE (d->vmode) == 16)
47469 else if (GET_MODE_SIZE (d->vmode) == 32)
47474 /* V4DImode should be already handled through
47475 expand_vselect by vpermq instruction. */
47476 gcc_assert (d->vmode != V4DImode);
47479 if (d->vmode == V8SImode
47480 || d->vmode == V16HImode
47481 || d->vmode == V32QImode)
47483 /* First see if vpermq can be used for
47484 V8SImode/V16HImode/V32QImode. */
47485 if (valid_perm_using_mode_p (V4DImode, d))
47487 for (i = 0; i < 4; i++)
47488 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47491 target = gen_reg_rtx (V4DImode);
47492 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47495 emit_move_insn (d->target,
47496 gen_lowpart (d->vmode, target));
47502 /* Next see if vpermd can be used. */
47503 if (valid_perm_using_mode_p (V8SImode, d))
47506 /* Or if vpermps can be used. */
47507 else if (d->vmode == V8SFmode)
47510 if (vmode == V32QImode)
47512 /* vpshufb only works intra lanes, it is not
47513 possible to shuffle bytes in between the lanes. */
47514 for (i = 0; i < nelt; ++i)
47515 if ((d->perm[i] ^ i) & (nelt / 2))
47519 else if (GET_MODE_SIZE (d->vmode) == 64)
47521 if (!TARGET_AVX512BW)
47524 /* If vpermq didn't work, vpshufb won't work either. */
47525 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47529 if (d->vmode == V16SImode
47530 || d->vmode == V32HImode
47531 || d->vmode == V64QImode)
47533 /* First see if vpermq can be used for
47534 V16SImode/V32HImode/V64QImode. */
47535 if (valid_perm_using_mode_p (V8DImode, d))
47537 for (i = 0; i < 8; i++)
47538 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47541 target = gen_reg_rtx (V8DImode);
47542 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47545 emit_move_insn (d->target,
47546 gen_lowpart (d->vmode, target));
47552 /* Next see if vpermd can be used. */
47553 if (valid_perm_using_mode_p (V16SImode, d))
47556 /* Or if vpermps can be used. */
47557 else if (d->vmode == V16SFmode)
47559 if (vmode == V64QImode)
47561 /* vpshufb only works intra lanes, it is not
47562 possible to shuffle bytes in between the lanes. */
47563 for (i = 0; i < nelt; ++i)
47564 if ((d->perm[i] ^ i) & (nelt / 4))
47575 if (vmode == V8SImode)
47576 for (i = 0; i < 8; ++i)
47577 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47578 else if (vmode == V16SImode)
47579 for (i = 0; i < 16; ++i)
47580 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47583 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47584 if (!d->one_operand_p)
47585 mask = 2 * nelt - 1;
47586 else if (vmode == V16QImode)
47588 else if (vmode == V64QImode)
47589 mask = nelt / 4 - 1;
47591 mask = nelt / 2 - 1;
47593 for (i = 0; i < nelt; ++i)
47595 unsigned j, e = d->perm[i] & mask;
47596 for (j = 0; j < eltsz; ++j)
47597 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47601 vperm = gen_rtx_CONST_VECTOR (vmode,
47602 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47603 vperm = force_reg (vmode, vperm);
47605 target = d->target;
47606 if (d->vmode != vmode)
47607 target = gen_reg_rtx (vmode);
47608 op0 = gen_lowpart (vmode, d->op0);
47609 if (d->one_operand_p)
47611 if (vmode == V16QImode)
47612 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47613 else if (vmode == V32QImode)
47614 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47615 else if (vmode == V64QImode)
47616 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47617 else if (vmode == V8SFmode)
47618 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47619 else if (vmode == V8SImode)
47620 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47621 else if (vmode == V16SFmode)
47622 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47623 else if (vmode == V16SImode)
47624 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47626 gcc_unreachable ();
47630 op1 = gen_lowpart (vmode, d->op1);
47631 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47633 if (target != d->target)
47634 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47640 in a single instruction. */
47643 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47645 unsigned i, nelt = d->nelt;
47646 unsigned char perm2[MAX_VECT_LEN];
47648 /* Check plain VEC_SELECT first, because AVX has instructions that could
47649 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47650 input where SEL+CONCAT may not. */
47651 if (d->one_operand_p)
47653 int mask = nelt - 1;
47654 bool identity_perm = true;
47655 bool broadcast_perm = true;
47657 for (i = 0; i < nelt; i++)
47659 perm2[i] = d->perm[i] & mask;
47661 identity_perm = false;
47663 broadcast_perm = false;
47669 emit_move_insn (d->target, d->op0);
47672 else if (broadcast_perm && TARGET_AVX2)
47674 /* Use vpbroadcast{b,w,d}. */
47675 rtx (*gen) (rtx, rtx) = NULL;
47679 if (TARGET_AVX512BW)
47680 gen = gen_avx512bw_vec_dupv64qi_1;
47683 gen = gen_avx2_pbroadcastv32qi_1;
47686 if (TARGET_AVX512BW)
47687 gen = gen_avx512bw_vec_dupv32hi_1;
47690 gen = gen_avx2_pbroadcastv16hi_1;
47693 if (TARGET_AVX512F)
47694 gen = gen_avx512f_vec_dupv16si_1;
47697 gen = gen_avx2_pbroadcastv8si_1;
47700 gen = gen_avx2_pbroadcastv16qi;
47703 gen = gen_avx2_pbroadcastv8hi;
47706 if (TARGET_AVX512F)
47707 gen = gen_avx512f_vec_dupv16sf_1;
47710 gen = gen_avx2_vec_dupv8sf_1;
47713 if (TARGET_AVX512F)
47714 gen = gen_avx512f_vec_dupv8df_1;
47717 if (TARGET_AVX512F)
47718 gen = gen_avx512f_vec_dupv8di_1;
47720 /* For other modes prefer other shuffles this function creates. */
47726 emit_insn (gen (d->target, d->op0));
47731 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47734 /* There are plenty of patterns in sse.md that are written for
47735 SEL+CONCAT and are not replicated for a single op. Perhaps
47736 that should be changed, to avoid the nastiness here. */
47738 /* Recognize interleave style patterns, which means incrementing
47739 every other permutation operand. */
47740 for (i = 0; i < nelt; i += 2)
47742 perm2[i] = d->perm[i] & mask;
47743 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47745 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47749 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47752 for (i = 0; i < nelt; i += 4)
47754 perm2[i + 0] = d->perm[i + 0] & mask;
47755 perm2[i + 1] = d->perm[i + 1] & mask;
47756 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47757 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47760 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47766 /* Finally, try the fully general two operand permute. */
47767 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47771 /* Recognize interleave style patterns with reversed operands. */
47772 if (!d->one_operand_p)
47774 for (i = 0; i < nelt; ++i)
47776 unsigned e = d->perm[i];
47784 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47789 /* Try the SSE4.1 blend variable merge instructions. */
47790 if (expand_vec_perm_blend (d))
47793 /* Try one of the AVX vpermil variable permutations. */
47794 if (expand_vec_perm_vpermil (d))
47797 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47798 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47799 if (expand_vec_perm_pshufb (d))
47802 /* Try the AVX2 vpalignr instruction. */
47803 if (expand_vec_perm_palignr (d, true))
47806 /* Try the AVX512F vpermi2 instructions. */
47807 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47813 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47814 in terms of a pair of pshuflw + pshufhw instructions. */
47817 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47819 unsigned char perm2[MAX_VECT_LEN];
47823 if (d->vmode != V8HImode || !d->one_operand_p)
47826 /* The two permutations only operate in 64-bit lanes. */
47827 for (i = 0; i < 4; ++i)
47828 if (d->perm[i] >= 4)
47830 for (i = 4; i < 8; ++i)
47831 if (d->perm[i] < 4)
47837 /* Emit the pshuflw. */
47838 memcpy (perm2, d->perm, 4);
47839 for (i = 4; i < 8; ++i)
47841 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47844 /* Emit the pshufhw. */
47845 memcpy (perm2 + 4, d->perm + 4, 4);
47846 for (i = 0; i < 4; ++i)
47848 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47854 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47855 the permutation using the SSSE3 palignr instruction. This succeeds
47856 when all of the elements in PERM fit within one vector and we merely
47857 need to shift them down so that a single vector permutation has a
47858 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47859 the vpalignr instruction itself can perform the requested permutation. */
47862 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47864 unsigned i, nelt = d->nelt;
47865 unsigned min, max, minswap, maxswap;
47866 bool in_order, ok, swap = false;
47868 struct expand_vec_perm_d dcopy;
47870 /* Even with AVX, palignr only operates on 128-bit vectors,
47871 in AVX2 palignr operates on both 128-bit lanes. */
47872 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47873 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47878 minswap = 2 * nelt;
47880 for (i = 0; i < nelt; ++i)
47882 unsigned e = d->perm[i];
47883 unsigned eswap = d->perm[i] ^ nelt;
47884 if (GET_MODE_SIZE (d->vmode) == 32)
47886 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47887 eswap = e ^ (nelt / 2);
47893 if (eswap < minswap)
47895 if (eswap > maxswap)
47899 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47901 if (d->one_operand_p
47903 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47904 ? nelt / 2 : nelt))
47911 /* Given that we have SSSE3, we know we'll be able to implement the
47912 single operand permutation after the palignr with pshufb for
47913 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47915 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47921 dcopy.op0 = d->op1;
47922 dcopy.op1 = d->op0;
47923 for (i = 0; i < nelt; ++i)
47924 dcopy.perm[i] ^= nelt;
47928 for (i = 0; i < nelt; ++i)
47930 unsigned e = dcopy.perm[i];
47931 if (GET_MODE_SIZE (d->vmode) == 32
47933 && (e & (nelt / 2 - 1)) < min)
47934 e = e - min - (nelt / 2);
47941 dcopy.one_operand_p = true;
47943 if (single_insn_only_p && !in_order)
47946 /* For AVX2, test whether we can permute the result in one instruction. */
47951 dcopy.op1 = dcopy.op0;
47952 return expand_vec_perm_1 (&dcopy);
47955 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47956 if (GET_MODE_SIZE (d->vmode) == 16)
47958 target = gen_reg_rtx (TImode);
47959 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47960 gen_lowpart (TImode, dcopy.op0), shift));
47964 target = gen_reg_rtx (V2TImode);
47965 emit_insn (gen_avx2_palignrv2ti (target,
47966 gen_lowpart (V2TImode, dcopy.op1),
47967 gen_lowpart (V2TImode, dcopy.op0),
47971 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47973 /* Test for the degenerate case where the alignment by itself
47974 produces the desired permutation. */
47977 emit_move_insn (d->target, dcopy.op0);
47981 ok = expand_vec_perm_1 (&dcopy);
47982 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47987 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47988 the permutation using the SSE4_1 pblendv instruction. Potentially
47989 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47992 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47994 unsigned i, which, nelt = d->nelt;
47995 struct expand_vec_perm_d dcopy, dcopy1;
47996 machine_mode vmode = d->vmode;
47999 /* Use the same checks as in expand_vec_perm_blend. */
48000 if (d->one_operand_p)
48002 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48004 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48006 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48011 /* Figure out where permutation elements stay not in their
48012 respective lanes. */
48013 for (i = 0, which = 0; i < nelt; ++i)
48015 unsigned e = d->perm[i];
48017 which |= (e < nelt ? 1 : 2);
48019 /* We can pblend the part where elements stay not in their
48020 respective lanes only when these elements are all in one
48021 half of a permutation.
48022 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48023 lanes, but both 8 and 9 >= 8
48024 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48025 respective lanes and 8 >= 8, but 2 not. */
48026 if (which != 1 && which != 2)
48028 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48031 /* First we apply one operand permutation to the part where
48032 elements stay not in their respective lanes. */
48035 dcopy.op0 = dcopy.op1 = d->op1;
48037 dcopy.op0 = dcopy.op1 = d->op0;
48039 dcopy.target = gen_reg_rtx (vmode);
48040 dcopy.one_operand_p = true;
48042 for (i = 0; i < nelt; ++i)
48043 dcopy.perm[i] = d->perm[i] & (nelt - 1);
48045 ok = expand_vec_perm_1 (&dcopy);
48046 if (GET_MODE_SIZE (vmode) != 16 && !ok)
48053 /* Next we put permuted elements into their positions. */
48056 dcopy1.op1 = dcopy.target;
48058 dcopy1.op0 = dcopy.target;
48060 for (i = 0; i < nelt; ++i)
48061 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48063 ok = expand_vec_perm_blend (&dcopy1);
48069 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48071 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48072 a two vector permutation into a single vector permutation by using
48073 an interleave operation to merge the vectors. */
48076 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48078 struct expand_vec_perm_d dremap, dfinal;
48079 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48080 unsigned HOST_WIDE_INT contents;
48081 unsigned char remap[2 * MAX_VECT_LEN];
48083 bool ok, same_halves = false;
48085 if (GET_MODE_SIZE (d->vmode) == 16)
48087 if (d->one_operand_p)
48090 else if (GET_MODE_SIZE (d->vmode) == 32)
48094 /* For 32-byte modes allow even d->one_operand_p.
48095 The lack of cross-lane shuffling in some instructions
48096 might prevent a single insn shuffle. */
48098 dfinal.testing_p = true;
48099 /* If expand_vec_perm_interleave3 can expand this into
48100 a 3 insn sequence, give up and let it be expanded as
48101 3 insn sequence. While that is one insn longer,
48102 it doesn't need a memory operand and in the common
48103 case that both interleave low and high permutations
48104 with the same operands are adjacent needs 4 insns
48105 for both after CSE. */
48106 if (expand_vec_perm_interleave3 (&dfinal))
48112 /* Examine from whence the elements come. */
48114 for (i = 0; i < nelt; ++i)
48115 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
48117 memset (remap, 0xff, sizeof (remap));
48120 if (GET_MODE_SIZE (d->vmode) == 16)
48122 unsigned HOST_WIDE_INT h1, h2, h3, h4;
48124 /* Split the two input vectors into 4 halves. */
48125 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
48130 /* If the elements from the low halves use interleave low, and similarly
48131 for interleave high. If the elements are from mis-matched halves, we
48132 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48133 if ((contents & (h1 | h3)) == contents)
48136 for (i = 0; i < nelt2; ++i)
48139 remap[i + nelt] = i * 2 + 1;
48140 dremap.perm[i * 2] = i;
48141 dremap.perm[i * 2 + 1] = i + nelt;
48143 if (!TARGET_SSE2 && d->vmode == V4SImode)
48144 dremap.vmode = V4SFmode;
48146 else if ((contents & (h2 | h4)) == contents)
48149 for (i = 0; i < nelt2; ++i)
48151 remap[i + nelt2] = i * 2;
48152 remap[i + nelt + nelt2] = i * 2 + 1;
48153 dremap.perm[i * 2] = i + nelt2;
48154 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48156 if (!TARGET_SSE2 && d->vmode == V4SImode)
48157 dremap.vmode = V4SFmode;
48159 else if ((contents & (h1 | h4)) == contents)
48162 for (i = 0; i < nelt2; ++i)
48165 remap[i + nelt + nelt2] = i + nelt2;
48166 dremap.perm[i] = i;
48167 dremap.perm[i + nelt2] = i + nelt + nelt2;
48172 dremap.vmode = V2DImode;
48174 dremap.perm[0] = 0;
48175 dremap.perm[1] = 3;
48178 else if ((contents & (h2 | h3)) == contents)
48181 for (i = 0; i < nelt2; ++i)
48183 remap[i + nelt2] = i;
48184 remap[i + nelt] = i + nelt2;
48185 dremap.perm[i] = i + nelt2;
48186 dremap.perm[i + nelt2] = i + nelt;
48191 dremap.vmode = V2DImode;
48193 dremap.perm[0] = 1;
48194 dremap.perm[1] = 2;
48202 unsigned int nelt4 = nelt / 4, nzcnt = 0;
48203 unsigned HOST_WIDE_INT q[8];
48204 unsigned int nonzero_halves[4];
48206 /* Split the two input vectors into 8 quarters. */
48207 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
48208 for (i = 1; i < 8; ++i)
48209 q[i] = q[0] << (nelt4 * i);
48210 for (i = 0; i < 4; ++i)
48211 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48213 nonzero_halves[nzcnt] = i;
48219 gcc_assert (d->one_operand_p);
48220 nonzero_halves[1] = nonzero_halves[0];
48221 same_halves = true;
48223 else if (d->one_operand_p)
48225 gcc_assert (nonzero_halves[0] == 0);
48226 gcc_assert (nonzero_halves[1] == 1);
48231 if (d->perm[0] / nelt2 == nonzero_halves[1])
48233 /* Attempt to increase the likelihood that dfinal
48234 shuffle will be intra-lane. */
48235 char tmph = nonzero_halves[0];
48236 nonzero_halves[0] = nonzero_halves[1];
48237 nonzero_halves[1] = tmph;
48240 /* vperm2f128 or vperm2i128. */
48241 for (i = 0; i < nelt2; ++i)
48243 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48244 remap[i + nonzero_halves[0] * nelt2] = i;
48245 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48246 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48249 if (d->vmode != V8SFmode
48250 && d->vmode != V4DFmode
48251 && d->vmode != V8SImode)
48253 dremap.vmode = V8SImode;
48255 for (i = 0; i < 4; ++i)
48257 dremap.perm[i] = i + nonzero_halves[0] * 4;
48258 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48262 else if (d->one_operand_p)
48264 else if (TARGET_AVX2
48265 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48268 for (i = 0; i < nelt4; ++i)
48271 remap[i + nelt] = i * 2 + 1;
48272 remap[i + nelt2] = i * 2 + nelt2;
48273 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48274 dremap.perm[i * 2] = i;
48275 dremap.perm[i * 2 + 1] = i + nelt;
48276 dremap.perm[i * 2 + nelt2] = i + nelt2;
48277 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48280 else if (TARGET_AVX2
48281 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48284 for (i = 0; i < nelt4; ++i)
48286 remap[i + nelt4] = i * 2;
48287 remap[i + nelt + nelt4] = i * 2 + 1;
48288 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48289 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48290 dremap.perm[i * 2] = i + nelt4;
48291 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48292 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48293 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48300 /* Use the remapping array set up above to move the elements from their
48301 swizzled locations into their final destinations. */
48303 for (i = 0; i < nelt; ++i)
48305 unsigned e = remap[d->perm[i]];
48306 gcc_assert (e < nelt);
48307 /* If same_halves is true, both halves of the remapped vector are the
48308 same. Avoid cross-lane accesses if possible. */
48309 if (same_halves && i >= nelt2)
48311 gcc_assert (e < nelt2);
48312 dfinal.perm[i] = e + nelt2;
48315 dfinal.perm[i] = e;
48319 dremap.target = gen_reg_rtx (dremap.vmode);
48320 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48322 dfinal.op1 = dfinal.op0;
48323 dfinal.one_operand_p = true;
48325 /* Test if the final remap can be done with a single insn. For V4SFmode or
48326 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48328 ok = expand_vec_perm_1 (&dfinal);
48329 seq = get_insns ();
48338 if (dremap.vmode != dfinal.vmode)
48340 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48341 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48344 ok = expand_vec_perm_1 (&dremap);
48351 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48352 a single vector cross-lane permutation into vpermq followed
48353 by any of the single insn permutations. */
48356 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48358 struct expand_vec_perm_d dremap, dfinal;
48359 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48360 unsigned contents[2];
48364 && (d->vmode == V32QImode || d->vmode == V16HImode)
48365 && d->one_operand_p))
48370 for (i = 0; i < nelt2; ++i)
48372 contents[0] |= 1u << (d->perm[i] / nelt4);
48373 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48376 for (i = 0; i < 2; ++i)
48378 unsigned int cnt = 0;
48379 for (j = 0; j < 4; ++j)
48380 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48388 dremap.vmode = V4DImode;
48390 dremap.target = gen_reg_rtx (V4DImode);
48391 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48392 dremap.op1 = dremap.op0;
48393 dremap.one_operand_p = true;
48394 for (i = 0; i < 2; ++i)
48396 unsigned int cnt = 0;
48397 for (j = 0; j < 4; ++j)
48398 if ((contents[i] & (1u << j)) != 0)
48399 dremap.perm[2 * i + cnt++] = j;
48400 for (; cnt < 2; ++cnt)
48401 dremap.perm[2 * i + cnt] = 0;
48405 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48406 dfinal.op1 = dfinal.op0;
48407 dfinal.one_operand_p = true;
48408 for (i = 0, j = 0; i < nelt; ++i)
48412 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48413 if ((d->perm[i] / nelt4) == dremap.perm[j])
48415 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48416 dfinal.perm[i] |= nelt4;
48418 gcc_unreachable ();
48421 ok = expand_vec_perm_1 (&dremap);
48424 ok = expand_vec_perm_1 (&dfinal);
48430 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48431 a vector permutation using two instructions, vperm2f128 resp.
48432 vperm2i128 followed by any single in-lane permutation. */
48435 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48437 struct expand_vec_perm_d dfirst, dsecond;
48438 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48442 || GET_MODE_SIZE (d->vmode) != 32
48443 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48447 dsecond.one_operand_p = false;
48448 dsecond.testing_p = true;
48450 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48451 immediate. For perm < 16 the second permutation uses
48452 d->op0 as first operand, for perm >= 16 it uses d->op1
48453 as first operand. The second operand is the result of
48455 for (perm = 0; perm < 32; perm++)
48457 /* Ignore permutations which do not move anything cross-lane. */
48460 /* The second shuffle for e.g. V4DFmode has
48461 0123 and ABCD operands.
48462 Ignore AB23, as 23 is already in the second lane
48463 of the first operand. */
48464 if ((perm & 0xc) == (1 << 2)) continue;
48465 /* And 01CD, as 01 is in the first lane of the first
48467 if ((perm & 3) == 0) continue;
48468 /* And 4567, as then the vperm2[fi]128 doesn't change
48469 anything on the original 4567 second operand. */
48470 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48474 /* The second shuffle for e.g. V4DFmode has
48475 4567 and ABCD operands.
48476 Ignore AB67, as 67 is already in the second lane
48477 of the first operand. */
48478 if ((perm & 0xc) == (3 << 2)) continue;
48479 /* And 45CD, as 45 is in the first lane of the first
48481 if ((perm & 3) == 2) continue;
48482 /* And 0123, as then the vperm2[fi]128 doesn't change
48483 anything on the original 0123 first operand. */
48484 if ((perm & 0xf) == (1 << 2)) continue;
48487 for (i = 0; i < nelt; i++)
48489 j = d->perm[i] / nelt2;
48490 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48491 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48492 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48493 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48501 ok = expand_vec_perm_1 (&dsecond);
48512 /* Found a usable second shuffle. dfirst will be
48513 vperm2f128 on d->op0 and d->op1. */
48514 dsecond.testing_p = false;
48516 dfirst.target = gen_reg_rtx (d->vmode);
48517 for (i = 0; i < nelt; i++)
48518 dfirst.perm[i] = (i & (nelt2 - 1))
48519 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48521 canonicalize_perm (&dfirst);
48522 ok = expand_vec_perm_1 (&dfirst);
48525 /* And dsecond is some single insn shuffle, taking
48526 d->op0 and result of vperm2f128 (if perm < 16) or
48527 d->op1 and result of vperm2f128 (otherwise). */
48529 dsecond.op0 = dsecond.op1;
48530 dsecond.op1 = dfirst.target;
48532 ok = expand_vec_perm_1 (&dsecond);
48538 /* For one operand, the only useful vperm2f128 permutation is 0x01
48540 if (d->one_operand_p)
48547 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48548 a two vector permutation using 2 intra-lane interleave insns
48549 and cross-lane shuffle for 32-byte vectors. */
48552 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48555 rtx (*gen) (rtx, rtx, rtx);
48557 if (d->one_operand_p)
48559 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48561 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48567 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48569 for (i = 0; i < nelt; i += 2)
48570 if (d->perm[i] != d->perm[0] + i / 2
48571 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48581 gen = gen_vec_interleave_highv32qi;
48583 gen = gen_vec_interleave_lowv32qi;
48587 gen = gen_vec_interleave_highv16hi;
48589 gen = gen_vec_interleave_lowv16hi;
48593 gen = gen_vec_interleave_highv8si;
48595 gen = gen_vec_interleave_lowv8si;
48599 gen = gen_vec_interleave_highv4di;
48601 gen = gen_vec_interleave_lowv4di;
48605 gen = gen_vec_interleave_highv8sf;
48607 gen = gen_vec_interleave_lowv8sf;
48611 gen = gen_vec_interleave_highv4df;
48613 gen = gen_vec_interleave_lowv4df;
48616 gcc_unreachable ();
48619 emit_insn (gen (d->target, d->op0, d->op1));
48623 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48624 a single vector permutation using a single intra-lane vector
48625 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48626 the non-swapped and swapped vectors together. */
48629 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48631 struct expand_vec_perm_d dfirst, dsecond;
48632 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48635 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48639 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48640 || !d->one_operand_p)
48644 for (i = 0; i < nelt; i++)
48645 dfirst.perm[i] = 0xff;
48646 for (i = 0, msk = 0; i < nelt; i++)
48648 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48649 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48651 dfirst.perm[j] = d->perm[i];
48655 for (i = 0; i < nelt; i++)
48656 if (dfirst.perm[i] == 0xff)
48657 dfirst.perm[i] = i;
48660 dfirst.target = gen_reg_rtx (dfirst.vmode);
48663 ok = expand_vec_perm_1 (&dfirst);
48664 seq = get_insns ();
48676 dsecond.op0 = dfirst.target;
48677 dsecond.op1 = dfirst.target;
48678 dsecond.one_operand_p = true;
48679 dsecond.target = gen_reg_rtx (dsecond.vmode);
48680 for (i = 0; i < nelt; i++)
48681 dsecond.perm[i] = i ^ nelt2;
48683 ok = expand_vec_perm_1 (&dsecond);
48686 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48687 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48692 permutation using two vperm2f128, followed by a vshufpd insn blending
48693 the two vectors together. */
48696 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48698 struct expand_vec_perm_d dfirst, dsecond, dthird;
48701 if (!TARGET_AVX || (d->vmode != V4DFmode))
48711 dfirst.perm[0] = (d->perm[0] & ~1);
48712 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48713 dfirst.perm[2] = (d->perm[2] & ~1);
48714 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48715 dsecond.perm[0] = (d->perm[1] & ~1);
48716 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48717 dsecond.perm[2] = (d->perm[3] & ~1);
48718 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48719 dthird.perm[0] = (d->perm[0] % 2);
48720 dthird.perm[1] = (d->perm[1] % 2) + 4;
48721 dthird.perm[2] = (d->perm[2] % 2) + 2;
48722 dthird.perm[3] = (d->perm[3] % 2) + 6;
48724 dfirst.target = gen_reg_rtx (dfirst.vmode);
48725 dsecond.target = gen_reg_rtx (dsecond.vmode);
48726 dthird.op0 = dfirst.target;
48727 dthird.op1 = dsecond.target;
48728 dthird.one_operand_p = false;
48730 canonicalize_perm (&dfirst);
48731 canonicalize_perm (&dsecond);
48733 ok = expand_vec_perm_1 (&dfirst)
48734 && expand_vec_perm_1 (&dsecond)
48735 && expand_vec_perm_1 (&dthird);
48742 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48743 permutation with two pshufb insns and an ior. We should have already
48744 failed all two instruction sequences. */
48747 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48749 rtx rperm[2][16], vperm, l, h, op, m128;
48750 unsigned int i, nelt, eltsz;
48752 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48754 gcc_assert (!d->one_operand_p);
48760 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48762 /* Generate two permutation masks. If the required element is within
48763 the given vector it is shuffled into the proper lane. If the required
48764 element is in the other vector, force a zero into the lane by setting
48765 bit 7 in the permutation mask. */
48766 m128 = GEN_INT (-128);
48767 for (i = 0; i < nelt; ++i)
48769 unsigned j, e = d->perm[i];
48770 unsigned which = (e >= nelt);
48774 for (j = 0; j < eltsz; ++j)
48776 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48777 rperm[1-which][i*eltsz + j] = m128;
48781 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48782 vperm = force_reg (V16QImode, vperm);
48784 l = gen_reg_rtx (V16QImode);
48785 op = gen_lowpart (V16QImode, d->op0);
48786 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48788 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48789 vperm = force_reg (V16QImode, vperm);
48791 h = gen_reg_rtx (V16QImode);
48792 op = gen_lowpart (V16QImode, d->op1);
48793 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48796 if (d->vmode != V16QImode)
48797 op = gen_reg_rtx (V16QImode);
48798 emit_insn (gen_iorv16qi3 (op, l, h));
48799 if (op != d->target)
48800 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48805 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48806 with two vpshufb insns, vpermq and vpor. We should have already failed
48807 all two or three instruction sequences. */
48810 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48812 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48813 unsigned int i, nelt, eltsz;
48816 || !d->one_operand_p
48817 || (d->vmode != V32QImode && d->vmode != V16HImode))
48824 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48826 /* Generate two permutation masks. If the required element is within
48827 the same lane, it is shuffled in. If the required element from the
48828 other lane, force a zero by setting bit 7 in the permutation mask.
48829 In the other mask the mask has non-negative elements if element
48830 is requested from the other lane, but also moved to the other lane,
48831 so that the result of vpshufb can have the two V2TImode halves
48833 m128 = GEN_INT (-128);
48834 for (i = 0; i < nelt; ++i)
48836 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48837 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48839 for (j = 0; j < eltsz; ++j)
48841 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48842 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48846 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48847 vperm = force_reg (V32QImode, vperm);
48849 h = gen_reg_rtx (V32QImode);
48850 op = gen_lowpart (V32QImode, d->op0);
48851 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48853 /* Swap the 128-byte lanes of h into hp. */
48854 hp = gen_reg_rtx (V4DImode);
48855 op = gen_lowpart (V4DImode, h);
48856 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48859 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48860 vperm = force_reg (V32QImode, vperm);
48862 l = gen_reg_rtx (V32QImode);
48863 op = gen_lowpart (V32QImode, d->op0);
48864 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48867 if (d->vmode != V32QImode)
48868 op = gen_reg_rtx (V32QImode);
48869 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48870 if (op != d->target)
48871 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48876 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48877 and extract-odd permutations of two V32QImode and V16QImode operand
48878 with two vpshufb insns, vpor and vpermq. We should have already
48879 failed all two or three instruction sequences. */
48882 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48884 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48885 unsigned int i, nelt, eltsz;
48888 || d->one_operand_p
48889 || (d->vmode != V32QImode && d->vmode != V16HImode))
48892 for (i = 0; i < d->nelt; ++i)
48893 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48900 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48902 /* Generate two permutation masks. In the first permutation mask
48903 the first quarter will contain indexes for the first half
48904 of the op0, the second quarter will contain bit 7 set, third quarter
48905 will contain indexes for the second half of the op0 and the
48906 last quarter bit 7 set. In the second permutation mask
48907 the first quarter will contain bit 7 set, the second quarter
48908 indexes for the first half of the op1, the third quarter bit 7 set
48909 and last quarter indexes for the second half of the op1.
48910 I.e. the first mask e.g. for V32QImode extract even will be:
48911 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48912 (all values masked with 0xf except for -128) and second mask
48913 for extract even will be
48914 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48915 m128 = GEN_INT (-128);
48916 for (i = 0; i < nelt; ++i)
48918 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48919 unsigned which = d->perm[i] >= nelt;
48920 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48922 for (j = 0; j < eltsz; ++j)
48924 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48925 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48929 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48930 vperm = force_reg (V32QImode, vperm);
48932 l = gen_reg_rtx (V32QImode);
48933 op = gen_lowpart (V32QImode, d->op0);
48934 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48936 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48937 vperm = force_reg (V32QImode, vperm);
48939 h = gen_reg_rtx (V32QImode);
48940 op = gen_lowpart (V32QImode, d->op1);
48941 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48943 ior = gen_reg_rtx (V32QImode);
48944 emit_insn (gen_iorv32qi3 (ior, l, h));
48946 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48947 op = gen_reg_rtx (V4DImode);
48948 ior = gen_lowpart (V4DImode, ior);
48949 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48950 const1_rtx, GEN_INT (3)));
48951 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48956 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48957 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48958 with two "and" and "pack" or two "shift" and "pack" insns. We should
48959 have already failed all two instruction sequences. */
48962 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48964 rtx op, dop0, dop1, t, rperm[16];
48965 unsigned i, odd, c, s, nelt = d->nelt;
48966 bool end_perm = false;
48967 machine_mode half_mode;
48968 rtx (*gen_and) (rtx, rtx, rtx);
48969 rtx (*gen_pack) (rtx, rtx, rtx);
48970 rtx (*gen_shift) (rtx, rtx, rtx);
48972 if (d->one_operand_p)
48978 /* Required for "pack". */
48979 if (!TARGET_SSE4_1)
48983 half_mode = V4SImode;
48984 gen_and = gen_andv4si3;
48985 gen_pack = gen_sse4_1_packusdw;
48986 gen_shift = gen_lshrv4si3;
48989 /* No check as all instructions are SSE2. */
48992 half_mode = V8HImode;
48993 gen_and = gen_andv8hi3;
48994 gen_pack = gen_sse2_packuswb;
48995 gen_shift = gen_lshrv8hi3;
49002 half_mode = V8SImode;
49003 gen_and = gen_andv8si3;
49004 gen_pack = gen_avx2_packusdw;
49005 gen_shift = gen_lshrv8si3;
49013 half_mode = V16HImode;
49014 gen_and = gen_andv16hi3;
49015 gen_pack = gen_avx2_packuswb;
49016 gen_shift = gen_lshrv16hi3;
49020 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49021 general shuffles. */
49025 /* Check that permutation is even or odd. */
49030 for (i = 1; i < nelt; ++i)
49031 if (d->perm[i] != 2 * i + odd)
49037 dop0 = gen_reg_rtx (half_mode);
49038 dop1 = gen_reg_rtx (half_mode);
49041 for (i = 0; i < nelt / 2; i++)
49042 rperm[i] = GEN_INT (c);
49043 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49044 t = force_reg (half_mode, t);
49045 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49046 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49050 emit_insn (gen_shift (dop0,
49051 gen_lowpart (half_mode, d->op0),
49053 emit_insn (gen_shift (dop1,
49054 gen_lowpart (half_mode, d->op1),
49057 /* In AVX2 for 256 bit case we need to permute pack result. */
49058 if (TARGET_AVX2 && end_perm)
49060 op = gen_reg_rtx (d->vmode);
49061 t = gen_reg_rtx (V4DImode);
49062 emit_insn (gen_pack (op, dop0, dop1));
49063 emit_insn (gen_avx2_permv4di_1 (t,
49064 gen_lowpart (V4DImode, op),
49069 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49072 emit_insn (gen_pack (d->target, dop0, dop1));
49077 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
49078 and extract-odd permutations of two V64QI operands
49079 with two "shifts", two "truncs" and one "concat" insns for "odd"
49080 and two "truncs" and one concat insn for "even."
49081 Have already failed all two instruction sequences. */
49084 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
49086 rtx t1, t2, t3, t4;
49087 unsigned i, odd, nelt = d->nelt;
49089 if (!TARGET_AVX512BW
49090 || d->one_operand_p
49091 || d->vmode != V64QImode)
49094 /* Check that permutation is even or odd. */
49099 for (i = 1; i < nelt; ++i)
49100 if (d->perm[i] != 2 * i + odd)
49109 t1 = gen_reg_rtx (V32HImode);
49110 t2 = gen_reg_rtx (V32HImode);
49111 emit_insn (gen_lshrv32hi3 (t1,
49112 gen_lowpart (V32HImode, d->op0),
49114 emit_insn (gen_lshrv32hi3 (t2,
49115 gen_lowpart (V32HImode, d->op1),
49120 t1 = gen_lowpart (V32HImode, d->op0);
49121 t2 = gen_lowpart (V32HImode, d->op1);
49124 t3 = gen_reg_rtx (V32QImode);
49125 t4 = gen_reg_rtx (V32QImode);
49126 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
49127 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
49128 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
49133 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49134 and extract-odd permutations. */
49137 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49139 rtx t1, t2, t3, t4, t5;
49146 t1 = gen_reg_rtx (V4DFmode);
49147 t2 = gen_reg_rtx (V4DFmode);
49149 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49150 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49151 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49153 /* Now an unpck[lh]pd will produce the result required. */
49155 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49157 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49163 int mask = odd ? 0xdd : 0x88;
49167 t1 = gen_reg_rtx (V8SFmode);
49168 t2 = gen_reg_rtx (V8SFmode);
49169 t3 = gen_reg_rtx (V8SFmode);
49171 /* Shuffle within the 128-bit lanes to produce:
49172 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49173 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49176 /* Shuffle the lanes around to produce:
49177 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49178 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49181 /* Shuffle within the 128-bit lanes to produce:
49182 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49183 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49185 /* Shuffle within the 128-bit lanes to produce:
49186 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49187 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49189 /* Shuffle the lanes around to produce:
49190 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49191 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49200 /* These are always directly implementable by expand_vec_perm_1. */
49201 gcc_unreachable ();
49205 return expand_vec_perm_even_odd_pack (d);
49206 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49207 return expand_vec_perm_pshufb2 (d);
49212 /* We need 2*log2(N)-1 operations to achieve odd/even
49213 with interleave. */
49214 t1 = gen_reg_rtx (V8HImode);
49215 t2 = gen_reg_rtx (V8HImode);
49216 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49217 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49218 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49219 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49221 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49223 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49229 return expand_vec_perm_even_odd_pack (d);
49233 return expand_vec_perm_even_odd_pack (d);
49236 return expand_vec_perm_even_odd_trunc (d);
49241 struct expand_vec_perm_d d_copy = *d;
49242 d_copy.vmode = V4DFmode;
49244 d_copy.target = gen_lowpart (V4DFmode, d->target);
49246 d_copy.target = gen_reg_rtx (V4DFmode);
49247 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49248 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49249 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49252 emit_move_insn (d->target,
49253 gen_lowpart (V4DImode, d_copy.target));
49262 t1 = gen_reg_rtx (V4DImode);
49263 t2 = gen_reg_rtx (V4DImode);
49265 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49266 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49267 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49269 /* Now an vpunpck[lh]qdq will produce the result required. */
49271 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49273 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49280 struct expand_vec_perm_d d_copy = *d;
49281 d_copy.vmode = V8SFmode;
49283 d_copy.target = gen_lowpart (V8SFmode, d->target);
49285 d_copy.target = gen_reg_rtx (V8SFmode);
49286 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49287 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49288 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49291 emit_move_insn (d->target,
49292 gen_lowpart (V8SImode, d_copy.target));
49301 t1 = gen_reg_rtx (V8SImode);
49302 t2 = gen_reg_rtx (V8SImode);
49303 t3 = gen_reg_rtx (V4DImode);
49304 t4 = gen_reg_rtx (V4DImode);
49305 t5 = gen_reg_rtx (V4DImode);
49307 /* Shuffle the lanes around into
49308 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49309 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49310 gen_lowpart (V4DImode, d->op1),
49312 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49313 gen_lowpart (V4DImode, d->op1),
49316 /* Swap the 2nd and 3rd position in each lane into
49317 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49318 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49319 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49320 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49321 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49323 /* Now an vpunpck[lh]qdq will produce
49324 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49326 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49327 gen_lowpart (V4DImode, t2));
49329 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49330 gen_lowpart (V4DImode, t2));
49332 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49336 gcc_unreachable ();
49342 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49343 extract-even and extract-odd permutations. */
49346 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49348 unsigned i, odd, nelt = d->nelt;
49351 if (odd != 0 && odd != 1)
49354 for (i = 1; i < nelt; ++i)
49355 if (d->perm[i] != 2 * i + odd)
49358 return expand_vec_perm_even_odd_1 (d, odd);
49361 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49362 permutations. We assume that expand_vec_perm_1 has already failed. */
49365 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49367 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49368 machine_mode vmode = d->vmode;
49369 unsigned char perm2[4];
49370 rtx op0 = d->op0, dest;
49377 /* These are special-cased in sse.md so that we can optionally
49378 use the vbroadcast instruction. They expand to two insns
49379 if the input happens to be in a register. */
49380 gcc_unreachable ();
49386 /* These are always implementable using standard shuffle patterns. */
49387 gcc_unreachable ();
49391 /* These can be implemented via interleave. We save one insn by
49392 stopping once we have promoted to V4SImode and then use pshufd. */
49398 rtx (*gen) (rtx, rtx, rtx)
49399 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49400 : gen_vec_interleave_lowv8hi;
49404 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49405 : gen_vec_interleave_highv8hi;
49410 dest = gen_reg_rtx (vmode);
49411 emit_insn (gen (dest, op0, op0));
49412 vmode = get_mode_wider_vector (vmode);
49413 op0 = gen_lowpart (vmode, dest);
49415 while (vmode != V4SImode);
49417 memset (perm2, elt, 4);
49418 dest = gen_reg_rtx (V4SImode);
49419 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49422 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49430 /* For AVX2 broadcasts of the first element vpbroadcast* or
49431 vpermq should be used by expand_vec_perm_1. */
49432 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49436 gcc_unreachable ();
49440 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49441 broadcast permutations. */
49444 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49446 unsigned i, elt, nelt = d->nelt;
49448 if (!d->one_operand_p)
49452 for (i = 1; i < nelt; ++i)
49453 if (d->perm[i] != elt)
49456 return expand_vec_perm_broadcast_1 (d);
49459 /* Implement arbitrary permutations of two V64QImode operands
49460 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49462 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49464 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49470 struct expand_vec_perm_d ds[2];
49471 rtx rperm[128], vperm, target0, target1;
49472 unsigned int i, nelt;
49473 machine_mode vmode;
49478 for (i = 0; i < 2; i++)
49481 ds[i].vmode = V32HImode;
49483 ds[i].target = gen_reg_rtx (V32HImode);
49484 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49485 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49488 /* Prepare permutations such that the first one takes care of
49489 putting the even bytes into the right positions or one higher
49490 positions (ds[0]) and the second one takes care of
49491 putting the odd bytes into the right positions or one below
49494 for (i = 0; i < nelt; i++)
49496 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49499 rperm[i] = constm1_rtx;
49500 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49504 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49505 rperm[i + 64] = constm1_rtx;
49509 bool ok = expand_vec_perm_1 (&ds[0]);
49511 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49513 ok = expand_vec_perm_1 (&ds[1]);
49515 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49517 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49518 vperm = force_reg (vmode, vperm);
49519 target0 = gen_reg_rtx (V64QImode);
49520 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49522 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49523 vperm = force_reg (vmode, vperm);
49524 target1 = gen_reg_rtx (V64QImode);
49525 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49527 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49531 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49532 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49533 all the shorter instruction sequences. */
49536 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49538 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49539 unsigned int i, nelt, eltsz;
49543 || d->one_operand_p
49544 || (d->vmode != V32QImode && d->vmode != V16HImode))
49551 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49553 /* Generate 4 permutation masks. If the required element is within
49554 the same lane, it is shuffled in. If the required element from the
49555 other lane, force a zero by setting bit 7 in the permutation mask.
49556 In the other mask the mask has non-negative elements if element
49557 is requested from the other lane, but also moved to the other lane,
49558 so that the result of vpshufb can have the two V2TImode halves
49560 m128 = GEN_INT (-128);
49561 for (i = 0; i < 32; ++i)
49563 rperm[0][i] = m128;
49564 rperm[1][i] = m128;
49565 rperm[2][i] = m128;
49566 rperm[3][i] = m128;
49572 for (i = 0; i < nelt; ++i)
49574 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49575 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49576 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49578 for (j = 0; j < eltsz; ++j)
49579 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49580 used[which] = true;
49583 for (i = 0; i < 2; ++i)
49585 if (!used[2 * i + 1])
49590 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49591 gen_rtvec_v (32, rperm[2 * i + 1]));
49592 vperm = force_reg (V32QImode, vperm);
49593 h[i] = gen_reg_rtx (V32QImode);
49594 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49595 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49598 /* Swap the 128-byte lanes of h[X]. */
49599 for (i = 0; i < 2; ++i)
49601 if (h[i] == NULL_RTX)
49603 op = gen_reg_rtx (V4DImode);
49604 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49605 const2_rtx, GEN_INT (3), const0_rtx,
49607 h[i] = gen_lowpart (V32QImode, op);
49610 for (i = 0; i < 2; ++i)
49617 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49618 vperm = force_reg (V32QImode, vperm);
49619 l[i] = gen_reg_rtx (V32QImode);
49620 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49621 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49624 for (i = 0; i < 2; ++i)
49628 op = gen_reg_rtx (V32QImode);
49629 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49636 gcc_assert (l[0] && l[1]);
49638 if (d->vmode != V32QImode)
49639 op = gen_reg_rtx (V32QImode);
49640 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49641 if (op != d->target)
49642 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49646 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49647 With all of the interface bits taken care of, perform the expansion
49648 in D and return true on success. */
49651 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49653 /* Try a single instruction expansion. */
49654 if (expand_vec_perm_1 (d))
49657 /* Try sequences of two instructions. */
49659 if (expand_vec_perm_pshuflw_pshufhw (d))
49662 if (expand_vec_perm_palignr (d, false))
49665 if (expand_vec_perm_interleave2 (d))
49668 if (expand_vec_perm_broadcast (d))
49671 if (expand_vec_perm_vpermq_perm_1 (d))
49674 if (expand_vec_perm_vperm2f128 (d))
49677 if (expand_vec_perm_pblendv (d))
49680 /* Try sequences of three instructions. */
49682 if (expand_vec_perm_even_odd_pack (d))
49685 if (expand_vec_perm_2vperm2f128_vshuf (d))
49688 if (expand_vec_perm_pshufb2 (d))
49691 if (expand_vec_perm_interleave3 (d))
49694 if (expand_vec_perm_vperm2f128_vblend (d))
49697 /* Try sequences of four instructions. */
49699 if (expand_vec_perm_even_odd_trunc (d))
49701 if (expand_vec_perm_vpshufb2_vpermq (d))
49704 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49707 if (expand_vec_perm_vpermi2_vpshub2 (d))
49710 /* ??? Look for narrow permutations whose element orderings would
49711 allow the promotion to a wider mode. */
49713 /* ??? Look for sequences of interleave or a wider permute that place
49714 the data into the correct lanes for a half-vector shuffle like
49715 pshuf[lh]w or vpermilps. */
49717 /* ??? Look for sequences of interleave that produce the desired results.
49718 The combinatorics of punpck[lh] get pretty ugly... */
49720 if (expand_vec_perm_even_odd (d))
49723 /* Even longer sequences. */
49724 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49730 /* If a permutation only uses one operand, make it clear. Returns true
49731 if the permutation references both operands. */
49734 canonicalize_perm (struct expand_vec_perm_d *d)
49736 int i, which, nelt = d->nelt;
49738 for (i = which = 0; i < nelt; ++i)
49739 which |= (d->perm[i] < nelt ? 1 : 2);
49741 d->one_operand_p = true;
49748 if (!rtx_equal_p (d->op0, d->op1))
49750 d->one_operand_p = false;
49753 /* The elements of PERM do not suggest that only the first operand
49754 is used, but both operands are identical. Allow easier matching
49755 of the permutation by folding the permutation into the single
49760 for (i = 0; i < nelt; ++i)
49761 d->perm[i] &= nelt - 1;
49770 return (which == 3);
49774 ix86_expand_vec_perm_const (rtx operands[4])
49776 struct expand_vec_perm_d d;
49777 unsigned char perm[MAX_VECT_LEN];
49782 d.target = operands[0];
49783 d.op0 = operands[1];
49784 d.op1 = operands[2];
49787 d.vmode = GET_MODE (d.target);
49788 gcc_assert (VECTOR_MODE_P (d.vmode));
49789 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49790 d.testing_p = false;
49792 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49793 gcc_assert (XVECLEN (sel, 0) == nelt);
49794 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49796 for (i = 0; i < nelt; ++i)
49798 rtx e = XVECEXP (sel, 0, i);
49799 int ei = INTVAL (e) & (2 * nelt - 1);
49804 two_args = canonicalize_perm (&d);
49806 if (ix86_expand_vec_perm_const_1 (&d))
49809 /* If the selector says both arguments are needed, but the operands are the
49810 same, the above tried to expand with one_operand_p and flattened selector.
49811 If that didn't work, retry without one_operand_p; we succeeded with that
49813 if (two_args && d.one_operand_p)
49815 d.one_operand_p = false;
49816 memcpy (d.perm, perm, sizeof (perm));
49817 return ix86_expand_vec_perm_const_1 (&d);
49823 /* Implement targetm.vectorize.vec_perm_const_ok. */
49826 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49827 const unsigned char *sel)
49829 struct expand_vec_perm_d d;
49830 unsigned int i, nelt, which;
49834 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49835 d.testing_p = true;
49837 /* Given sufficient ISA support we can just return true here
49838 for selected vector modes. */
49845 if (TARGET_AVX512F)
49846 /* All implementable with a single vpermi2 insn. */
49850 if (TARGET_AVX512BW)
49851 /* All implementable with a single vpermi2 insn. */
49855 if (TARGET_AVX512BW)
49856 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49863 if (TARGET_AVX512VL)
49864 /* All implementable with a single vpermi2 insn. */
49869 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49874 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49881 /* All implementable with a single vpperm insn. */
49884 /* All implementable with 2 pshufb + 1 ior. */
49890 /* All implementable with shufpd or unpck[lh]pd. */
49896 /* Extract the values from the vector CST into the permutation
49898 memcpy (d.perm, sel, nelt);
49899 for (i = which = 0; i < nelt; ++i)
49901 unsigned char e = d.perm[i];
49902 gcc_assert (e < 2 * nelt);
49903 which |= (e < nelt ? 1 : 2);
49906 /* For all elements from second vector, fold the elements to first. */
49908 for (i = 0; i < nelt; ++i)
49911 /* Check whether the mask can be applied to the vector type. */
49912 d.one_operand_p = (which != 3);
49914 /* Implementable with shufps or pshufd. */
49915 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49918 /* Otherwise we have to go through the motions and see if we can
49919 figure out how to generate the requested permutation. */
49920 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49921 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49922 if (!d.one_operand_p)
49923 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49926 ret = ix86_expand_vec_perm_const_1 (&d);
49933 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49935 struct expand_vec_perm_d d;
49941 d.vmode = GET_MODE (targ);
49942 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49943 d.one_operand_p = false;
49944 d.testing_p = false;
49946 for (i = 0; i < nelt; ++i)
49947 d.perm[i] = i * 2 + odd;
49949 /* We'll either be able to implement the permutation directly... */
49950 if (expand_vec_perm_1 (&d))
49953 /* ... or we use the special-case patterns. */
49954 expand_vec_perm_even_odd_1 (&d, odd);
49958 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49960 struct expand_vec_perm_d d;
49961 unsigned i, nelt, base;
49967 d.vmode = GET_MODE (targ);
49968 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49969 d.one_operand_p = false;
49970 d.testing_p = false;
49972 base = high_p ? nelt / 2 : 0;
49973 for (i = 0; i < nelt / 2; ++i)
49975 d.perm[i * 2] = i + base;
49976 d.perm[i * 2 + 1] = i + base + nelt;
49979 /* Note that for AVX this isn't one instruction. */
49980 ok = ix86_expand_vec_perm_const_1 (&d);
49985 /* Expand a vector operation CODE for a V*QImode in terms of the
49986 same operation on V*HImode. */
49989 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49991 machine_mode qimode = GET_MODE (dest);
49992 machine_mode himode;
49993 rtx (*gen_il) (rtx, rtx, rtx);
49994 rtx (*gen_ih) (rtx, rtx, rtx);
49995 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49996 struct expand_vec_perm_d d;
49997 bool ok, full_interleave;
49998 bool uns_p = false;
50005 gen_il = gen_vec_interleave_lowv16qi;
50006 gen_ih = gen_vec_interleave_highv16qi;
50009 himode = V16HImode;
50010 gen_il = gen_avx2_interleave_lowv32qi;
50011 gen_ih = gen_avx2_interleave_highv32qi;
50014 himode = V32HImode;
50015 gen_il = gen_avx512bw_interleave_lowv64qi;
50016 gen_ih = gen_avx512bw_interleave_highv64qi;
50019 gcc_unreachable ();
50022 op2_l = op2_h = op2;
50026 /* Unpack data such that we've got a source byte in each low byte of
50027 each word. We don't care what goes into the high byte of each word.
50028 Rather than trying to get zero in there, most convenient is to let
50029 it be a copy of the low byte. */
50030 op2_l = gen_reg_rtx (qimode);
50031 op2_h = gen_reg_rtx (qimode);
50032 emit_insn (gen_il (op2_l, op2, op2));
50033 emit_insn (gen_ih (op2_h, op2, op2));
50036 op1_l = gen_reg_rtx (qimode);
50037 op1_h = gen_reg_rtx (qimode);
50038 emit_insn (gen_il (op1_l, op1, op1));
50039 emit_insn (gen_ih (op1_h, op1, op1));
50040 full_interleave = qimode == V16QImode;
50048 op1_l = gen_reg_rtx (himode);
50049 op1_h = gen_reg_rtx (himode);
50050 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50051 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50052 full_interleave = true;
50055 gcc_unreachable ();
50058 /* Perform the operation. */
50059 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50061 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50063 gcc_assert (res_l && res_h);
50065 /* Merge the data back into the right place. */
50067 d.op0 = gen_lowpart (qimode, res_l);
50068 d.op1 = gen_lowpart (qimode, res_h);
50070 d.nelt = GET_MODE_NUNITS (qimode);
50071 d.one_operand_p = false;
50072 d.testing_p = false;
50074 if (full_interleave)
50076 /* For SSE2, we used an full interleave, so the desired
50077 results are in the even elements. */
50078 for (i = 0; i < d.nelt; ++i)
50083 /* For AVX, the interleave used above was not cross-lane. So the
50084 extraction is evens but with the second and third quarter swapped.
50085 Happily, that is even one insn shorter than even extraction.
50086 For AVX512BW we have 4 lanes. We extract evens from within a lane,
50087 always first from the first and then from the second source operand,
50088 the index bits above the low 4 bits remains the same.
50089 Thus, for d.nelt == 32 we want permutation
50090 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
50091 and for d.nelt == 64 we want permutation
50092 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
50093 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
50094 for (i = 0; i < d.nelt; ++i)
50095 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
50098 ok = ix86_expand_vec_perm_const_1 (&d);
50101 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50102 gen_rtx_fmt_ee (code, qimode, op1, op2));
50105 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50106 if op is CONST_VECTOR with all odd elements equal to their
50107 preceding element. */
50110 const_vector_equal_evenodd_p (rtx op)
50112 machine_mode mode = GET_MODE (op);
50113 int i, nunits = GET_MODE_NUNITS (mode);
50114 if (GET_CODE (op) != CONST_VECTOR
50115 || nunits != CONST_VECTOR_NUNITS (op))
50117 for (i = 0; i < nunits; i += 2)
50118 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50124 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50125 bool uns_p, bool odd_p)
50127 machine_mode mode = GET_MODE (op1);
50128 machine_mode wmode = GET_MODE (dest);
50130 rtx orig_op1 = op1, orig_op2 = op2;
50132 if (!nonimmediate_operand (op1, mode))
50133 op1 = force_reg (mode, op1);
50134 if (!nonimmediate_operand (op2, mode))
50135 op2 = force_reg (mode, op2);
50137 /* We only play even/odd games with vectors of SImode. */
50138 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50140 /* If we're looking for the odd results, shift those members down to
50141 the even slots. For some cpus this is faster than a PSHUFD. */
50144 /* For XOP use vpmacsdqh, but only for smult, as it is only
50146 if (TARGET_XOP && mode == V4SImode && !uns_p)
50148 x = force_reg (wmode, CONST0_RTX (wmode));
50149 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50153 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50154 if (!const_vector_equal_evenodd_p (orig_op1))
50155 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50156 x, NULL, 1, OPTAB_DIRECT);
50157 if (!const_vector_equal_evenodd_p (orig_op2))
50158 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50159 x, NULL, 1, OPTAB_DIRECT);
50160 op1 = gen_lowpart (mode, op1);
50161 op2 = gen_lowpart (mode, op2);
50164 if (mode == V16SImode)
50167 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50169 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50171 else if (mode == V8SImode)
50174 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50176 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50179 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50180 else if (TARGET_SSE4_1)
50181 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50184 rtx s1, s2, t0, t1, t2;
50186 /* The easiest way to implement this without PMULDQ is to go through
50187 the motions as if we are performing a full 64-bit multiply. With
50188 the exception that we need to do less shuffling of the elements. */
50190 /* Compute the sign-extension, aka highparts, of the two operands. */
50191 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50192 op1, pc_rtx, pc_rtx);
50193 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50194 op2, pc_rtx, pc_rtx);
50196 /* Multiply LO(A) * HI(B), and vice-versa. */
50197 t1 = gen_reg_rtx (wmode);
50198 t2 = gen_reg_rtx (wmode);
50199 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50200 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50202 /* Multiply LO(A) * LO(B). */
50203 t0 = gen_reg_rtx (wmode);
50204 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50206 /* Combine and shift the highparts into place. */
50207 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50208 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50211 /* Combine high and low parts. */
50212 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50219 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50220 bool uns_p, bool high_p)
50222 machine_mode wmode = GET_MODE (dest);
50223 machine_mode mode = GET_MODE (op1);
50224 rtx t1, t2, t3, t4, mask;
50229 t1 = gen_reg_rtx (mode);
50230 t2 = gen_reg_rtx (mode);
50231 if (TARGET_XOP && !uns_p)
50233 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50234 shuffle the elements once so that all elements are in the right
50235 place for immediate use: { A C B D }. */
50236 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50237 const1_rtx, GEN_INT (3)));
50238 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50239 const1_rtx, GEN_INT (3)));
50243 /* Put the elements into place for the multiply. */
50244 ix86_expand_vec_interleave (t1, op1, op1, high_p);
50245 ix86_expand_vec_interleave (t2, op2, op2, high_p);
50248 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50252 /* Shuffle the elements between the lanes. After this we
50253 have { A B E F | C D G H } for each operand. */
50254 t1 = gen_reg_rtx (V4DImode);
50255 t2 = gen_reg_rtx (V4DImode);
50256 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50257 const0_rtx, const2_rtx,
50258 const1_rtx, GEN_INT (3)));
50259 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50260 const0_rtx, const2_rtx,
50261 const1_rtx, GEN_INT (3)));
50263 /* Shuffle the elements within the lanes. After this we
50264 have { A A B B | C C D D } or { E E F F | G G H H }. */
50265 t3 = gen_reg_rtx (V8SImode);
50266 t4 = gen_reg_rtx (V8SImode);
50267 mask = GEN_INT (high_p
50268 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50269 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50270 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50271 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50273 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50278 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50279 uns_p, OPTAB_DIRECT);
50280 t2 = expand_binop (mode,
50281 uns_p ? umul_highpart_optab : smul_highpart_optab,
50282 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50283 gcc_assert (t1 && t2);
50285 t3 = gen_reg_rtx (mode);
50286 ix86_expand_vec_interleave (t3, t1, t2, high_p);
50287 emit_move_insn (dest, gen_lowpart (wmode, t3));
50295 t1 = gen_reg_rtx (wmode);
50296 t2 = gen_reg_rtx (wmode);
50297 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50298 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50300 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
50304 gcc_unreachable ();
50309 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50311 rtx res_1, res_2, res_3, res_4;
50313 res_1 = gen_reg_rtx (V4SImode);
50314 res_2 = gen_reg_rtx (V4SImode);
50315 res_3 = gen_reg_rtx (V2DImode);
50316 res_4 = gen_reg_rtx (V2DImode);
50317 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50318 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50320 /* Move the results in element 2 down to element 1; we don't care
50321 what goes in elements 2 and 3. Then we can merge the parts
50322 back together with an interleave.
50324 Note that two other sequences were tried:
50325 (1) Use interleaves at the start instead of psrldq, which allows
50326 us to use a single shufps to merge things back at the end.
50327 (2) Use shufps here to combine the two vectors, then pshufd to
50328 put the elements in the correct order.
50329 In both cases the cost of the reformatting stall was too high
50330 and the overall sequence slower. */
50332 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50333 const0_rtx, const2_rtx,
50334 const0_rtx, const0_rtx));
50335 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50336 const0_rtx, const2_rtx,
50337 const0_rtx, const0_rtx));
50338 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50340 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50344 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50346 machine_mode mode = GET_MODE (op0);
50347 rtx t1, t2, t3, t4, t5, t6;
50349 if (TARGET_AVX512DQ && mode == V8DImode)
50350 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50351 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50352 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50353 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50354 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50355 else if (TARGET_XOP && mode == V2DImode)
50357 /* op1: A,B,C,D, op2: E,F,G,H */
50358 op1 = gen_lowpart (V4SImode, op1);
50359 op2 = gen_lowpart (V4SImode, op2);
50361 t1 = gen_reg_rtx (V4SImode);
50362 t2 = gen_reg_rtx (V4SImode);
50363 t3 = gen_reg_rtx (V2DImode);
50364 t4 = gen_reg_rtx (V2DImode);
50367 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50373 /* t2: (B*E),(A*F),(D*G),(C*H) */
50374 emit_insn (gen_mulv4si3 (t2, t1, op2));
50376 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50377 emit_insn (gen_xop_phadddq (t3, t2));
50379 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50380 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50382 /* Multiply lower parts and add all */
50383 t5 = gen_reg_rtx (V2DImode);
50384 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50385 gen_lowpart (V4SImode, op1),
50386 gen_lowpart (V4SImode, op2)));
50387 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50392 machine_mode nmode;
50393 rtx (*umul) (rtx, rtx, rtx);
50395 if (mode == V2DImode)
50397 umul = gen_vec_widen_umult_even_v4si;
50400 else if (mode == V4DImode)
50402 umul = gen_vec_widen_umult_even_v8si;
50405 else if (mode == V8DImode)
50407 umul = gen_vec_widen_umult_even_v16si;
50411 gcc_unreachable ();
50414 /* Multiply low parts. */
50415 t1 = gen_reg_rtx (mode);
50416 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50418 /* Shift input vectors right 32 bits so we can multiply high parts. */
50420 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50421 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50423 /* Multiply high parts by low parts. */
50424 t4 = gen_reg_rtx (mode);
50425 t5 = gen_reg_rtx (mode);
50426 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50427 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50429 /* Combine and shift the highparts back. */
50430 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50431 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50433 /* Combine high and low parts. */
50434 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50437 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50438 gen_rtx_MULT (mode, op1, op2));
50441 /* Return 1 if control tansfer instruction INSN
50442 should be encoded with bnd prefix.
50443 If insn is NULL then return 1 when control
50444 transfer instructions should be prefixed with
50445 bnd by default for current function. */
50448 ix86_bnd_prefixed_insn_p (rtx insn)
50450 /* For call insns check special flag. */
50451 if (insn && CALL_P (insn))
50453 rtx call = get_call_rtx_from (insn);
50455 return CALL_EXPR_WITH_BOUNDS_P (call);
50458 /* All other insns are prefixed only if function is instrumented. */
50459 return chkp_function_instrumented_p (current_function_decl);
50462 /* Calculate integer abs() using only SSE2 instructions. */
50465 ix86_expand_sse2_abs (rtx target, rtx input)
50467 machine_mode mode = GET_MODE (target);
50472 /* For 32-bit signed integer X, the best way to calculate the absolute
50473 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50475 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50476 GEN_INT (GET_MODE_BITSIZE
50477 (GET_MODE_INNER (mode)) - 1),
50478 NULL, 0, OPTAB_DIRECT);
50479 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50480 NULL, 0, OPTAB_DIRECT);
50481 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50482 target, 0, OPTAB_DIRECT);
50485 /* For 16-bit signed integer X, the best way to calculate the absolute
50486 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50488 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50490 x = expand_simple_binop (mode, SMAX, tmp0, input,
50491 target, 0, OPTAB_DIRECT);
50494 /* For 8-bit signed integer X, the best way to calculate the absolute
50495 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50496 as SSE2 provides the PMINUB insn. */
50498 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50500 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50501 target, 0, OPTAB_DIRECT);
50505 gcc_unreachable ();
50509 emit_move_insn (target, x);
50512 /* Expand an insert into a vector register through pinsr insn.
50513 Return true if successful. */
50516 ix86_expand_pinsr (rtx *operands)
50518 rtx dst = operands[0];
50519 rtx src = operands[3];
50521 unsigned int size = INTVAL (operands[1]);
50522 unsigned int pos = INTVAL (operands[2]);
50524 if (GET_CODE (src) == SUBREG)
50526 /* Reject non-lowpart subregs. */
50527 if (SUBREG_BYTE (src) != 0)
50529 src = SUBREG_REG (src);
50532 if (GET_CODE (dst) == SUBREG)
50534 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50535 dst = SUBREG_REG (dst);
50538 switch (GET_MODE (dst))
50545 machine_mode srcmode, dstmode;
50546 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50548 srcmode = mode_for_size (size, MODE_INT, 0);
50553 if (!TARGET_SSE4_1)
50555 dstmode = V16QImode;
50556 pinsr = gen_sse4_1_pinsrb;
50562 dstmode = V8HImode;
50563 pinsr = gen_sse2_pinsrw;
50567 if (!TARGET_SSE4_1)
50569 dstmode = V4SImode;
50570 pinsr = gen_sse4_1_pinsrd;
50574 gcc_assert (TARGET_64BIT);
50575 if (!TARGET_SSE4_1)
50577 dstmode = V2DImode;
50578 pinsr = gen_sse4_1_pinsrq;
50585 /* Reject insertions to misaligned positions. */
50586 if (pos & (size-1))
50590 if (GET_MODE (dst) != dstmode)
50591 d = gen_reg_rtx (dstmode);
50592 src = gen_lowpart (srcmode, src);
50596 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50597 GEN_INT (1 << pos)));
50599 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50608 /* This function returns the calling abi specific va_list type node.
50609 It returns the FNDECL specific va_list type. */
50612 ix86_fn_abi_va_list (tree fndecl)
50615 return va_list_type_node;
50616 gcc_assert (fndecl != NULL_TREE);
50618 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50619 return ms_va_list_type_node;
50621 return sysv_va_list_type_node;
50624 /* Returns the canonical va_list type specified by TYPE. If there
50625 is no valid TYPE provided, it return NULL_TREE. */
50628 ix86_canonical_va_list_type (tree type)
50632 /* Resolve references and pointers to va_list type. */
50633 if (TREE_CODE (type) == MEM_REF)
50634 type = TREE_TYPE (type);
50635 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50636 type = TREE_TYPE (type);
50637 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50638 type = TREE_TYPE (type);
50640 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50642 wtype = va_list_type_node;
50643 gcc_assert (wtype != NULL_TREE);
50645 if (TREE_CODE (wtype) == ARRAY_TYPE)
50647 /* If va_list is an array type, the argument may have decayed
50648 to a pointer type, e.g. by being passed to another function.
50649 In that case, unwrap both types so that we can compare the
50650 underlying records. */
50651 if (TREE_CODE (htype) == ARRAY_TYPE
50652 || POINTER_TYPE_P (htype))
50654 wtype = TREE_TYPE (wtype);
50655 htype = TREE_TYPE (htype);
50658 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50659 return va_list_type_node;
50660 wtype = sysv_va_list_type_node;
50661 gcc_assert (wtype != NULL_TREE);
50663 if (TREE_CODE (wtype) == ARRAY_TYPE)
50665 /* If va_list is an array type, the argument may have decayed
50666 to a pointer type, e.g. by being passed to another function.
50667 In that case, unwrap both types so that we can compare the
50668 underlying records. */
50669 if (TREE_CODE (htype) == ARRAY_TYPE
50670 || POINTER_TYPE_P (htype))
50672 wtype = TREE_TYPE (wtype);
50673 htype = TREE_TYPE (htype);
50676 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50677 return sysv_va_list_type_node;
50678 wtype = ms_va_list_type_node;
50679 gcc_assert (wtype != NULL_TREE);
50681 if (TREE_CODE (wtype) == ARRAY_TYPE)
50683 /* If va_list is an array type, the argument may have decayed
50684 to a pointer type, e.g. by being passed to another function.
50685 In that case, unwrap both types so that we can compare the
50686 underlying records. */
50687 if (TREE_CODE (htype) == ARRAY_TYPE
50688 || POINTER_TYPE_P (htype))
50690 wtype = TREE_TYPE (wtype);
50691 htype = TREE_TYPE (htype);
50694 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50695 return ms_va_list_type_node;
50698 return std_canonical_va_list_type (type);
50701 /* Iterate through the target-specific builtin types for va_list.
50702 IDX denotes the iterator, *PTREE is set to the result type of
50703 the va_list builtin, and *PNAME to its internal type.
50704 Returns zero if there is no element for this index, otherwise
50705 IDX should be increased upon the next call.
50706 Note, do not iterate a base builtin's name like __builtin_va_list.
50707 Used from c_common_nodes_and_builtins. */
50710 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50720 *ptree = ms_va_list_type_node;
50721 *pname = "__builtin_ms_va_list";
50725 *ptree = sysv_va_list_type_node;
50726 *pname = "__builtin_sysv_va_list";
50734 #undef TARGET_SCHED_DISPATCH
50735 #define TARGET_SCHED_DISPATCH has_dispatch
50736 #undef TARGET_SCHED_DISPATCH_DO
50737 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50738 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50739 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50740 #undef TARGET_SCHED_REORDER
50741 #define TARGET_SCHED_REORDER ix86_sched_reorder
50742 #undef TARGET_SCHED_ADJUST_PRIORITY
50743 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50744 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50745 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50746 ix86_dependencies_evaluation_hook
50748 /* The size of the dispatch window is the total number of bytes of
50749 object code allowed in a window. */
50750 #define DISPATCH_WINDOW_SIZE 16
50752 /* Number of dispatch windows considered for scheduling. */
50753 #define MAX_DISPATCH_WINDOWS 3
50755 /* Maximum number of instructions in a window. */
50758 /* Maximum number of immediate operands in a window. */
50761 /* Maximum number of immediate bits allowed in a window. */
50762 #define MAX_IMM_SIZE 128
50764 /* Maximum number of 32 bit immediates allowed in a window. */
50765 #define MAX_IMM_32 4
50767 /* Maximum number of 64 bit immediates allowed in a window. */
50768 #define MAX_IMM_64 2
50770 /* Maximum total of loads or prefetches allowed in a window. */
50773 /* Maximum total of stores allowed in a window. */
50774 #define MAX_STORE 1
50780 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50781 enum dispatch_group {
50796 /* Number of allowable groups in a dispatch window. It is an array
50797 indexed by dispatch_group enum. 100 is used as a big number,
50798 because the number of these kind of operations does not have any
50799 effect in dispatch window, but we need them for other reasons in
50801 static unsigned int num_allowable_groups[disp_last] = {
50802 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50805 char group_name[disp_last + 1][16] = {
50806 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50807 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50808 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50811 /* Instruction path. */
50814 path_single, /* Single micro op. */
50815 path_double, /* Double micro op. */
50816 path_multi, /* Instructions with more than 2 micro op.. */
50820 /* sched_insn_info defines a window to the instructions scheduled in
50821 the basic block. It contains a pointer to the insn_info table and
50822 the instruction scheduled.
50824 Windows are allocated for each basic block and are linked
50826 typedef struct sched_insn_info_s {
50828 enum dispatch_group group;
50829 enum insn_path path;
50834 /* Linked list of dispatch windows. This is a two way list of
50835 dispatch windows of a basic block. It contains information about
50836 the number of uops in the window and the total number of
50837 instructions and of bytes in the object code for this dispatch
50839 typedef struct dispatch_windows_s {
50840 int num_insn; /* Number of insn in the window. */
50841 int num_uops; /* Number of uops in the window. */
50842 int window_size; /* Number of bytes in the window. */
50843 int window_num; /* Window number between 0 or 1. */
50844 int num_imm; /* Number of immediates in an insn. */
50845 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50846 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50847 int imm_size; /* Total immediates in the window. */
50848 int num_loads; /* Total memory loads in the window. */
50849 int num_stores; /* Total memory stores in the window. */
50850 int violation; /* Violation exists in window. */
50851 sched_insn_info *window; /* Pointer to the window. */
50852 struct dispatch_windows_s *next;
50853 struct dispatch_windows_s *prev;
50854 } dispatch_windows;
50856 /* Immediate valuse used in an insn. */
50857 typedef struct imm_info_s
50864 static dispatch_windows *dispatch_window_list;
50865 static dispatch_windows *dispatch_window_list1;
50867 /* Get dispatch group of insn. */
50869 static enum dispatch_group
50870 get_mem_group (rtx_insn *insn)
50872 enum attr_memory memory;
50874 if (INSN_CODE (insn) < 0)
50875 return disp_no_group;
50876 memory = get_attr_memory (insn);
50877 if (memory == MEMORY_STORE)
50880 if (memory == MEMORY_LOAD)
50883 if (memory == MEMORY_BOTH)
50884 return disp_load_store;
50886 return disp_no_group;
50889 /* Return true if insn is a compare instruction. */
50892 is_cmp (rtx_insn *insn)
50894 enum attr_type type;
50896 type = get_attr_type (insn);
50897 return (type == TYPE_TEST
50898 || type == TYPE_ICMP
50899 || type == TYPE_FCMP
50900 || GET_CODE (PATTERN (insn)) == COMPARE);
50903 /* Return true if a dispatch violation encountered. */
50906 dispatch_violation (void)
50908 if (dispatch_window_list->next)
50909 return dispatch_window_list->next->violation;
50910 return dispatch_window_list->violation;
50913 /* Return true if insn is a branch instruction. */
50916 is_branch (rtx insn)
50918 return (CALL_P (insn) || JUMP_P (insn));
50921 /* Return true if insn is a prefetch instruction. */
50924 is_prefetch (rtx insn)
50926 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50929 /* This function initializes a dispatch window and the list container holding a
50930 pointer to the window. */
50933 init_window (int window_num)
50936 dispatch_windows *new_list;
50938 if (window_num == 0)
50939 new_list = dispatch_window_list;
50941 new_list = dispatch_window_list1;
50943 new_list->num_insn = 0;
50944 new_list->num_uops = 0;
50945 new_list->window_size = 0;
50946 new_list->next = NULL;
50947 new_list->prev = NULL;
50948 new_list->window_num = window_num;
50949 new_list->num_imm = 0;
50950 new_list->num_imm_32 = 0;
50951 new_list->num_imm_64 = 0;
50952 new_list->imm_size = 0;
50953 new_list->num_loads = 0;
50954 new_list->num_stores = 0;
50955 new_list->violation = false;
50957 for (i = 0; i < MAX_INSN; i++)
50959 new_list->window[i].insn = NULL;
50960 new_list->window[i].group = disp_no_group;
50961 new_list->window[i].path = no_path;
50962 new_list->window[i].byte_len = 0;
50963 new_list->window[i].imm_bytes = 0;
50968 /* This function allocates and initializes a dispatch window and the
50969 list container holding a pointer to the window. */
50971 static dispatch_windows *
50972 allocate_window (void)
50974 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50975 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50980 /* This routine initializes the dispatch scheduling information. It
50981 initiates building dispatch scheduler tables and constructs the
50982 first dispatch window. */
50985 init_dispatch_sched (void)
50987 /* Allocate a dispatch list and a window. */
50988 dispatch_window_list = allocate_window ();
50989 dispatch_window_list1 = allocate_window ();
50994 /* This function returns true if a branch is detected. End of a basic block
50995 does not have to be a branch, but here we assume only branches end a
50999 is_end_basic_block (enum dispatch_group group)
51001 return group == disp_branch;
51004 /* This function is called when the end of a window processing is reached. */
51007 process_end_window (void)
51009 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
51010 if (dispatch_window_list->next)
51012 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
51013 gcc_assert (dispatch_window_list->window_size
51014 + dispatch_window_list1->window_size <= 48);
51020 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
51021 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
51022 for 48 bytes of instructions. Note that these windows are not dispatch
51023 windows that their sizes are DISPATCH_WINDOW_SIZE. */
51025 static dispatch_windows *
51026 allocate_next_window (int window_num)
51028 if (window_num == 0)
51030 if (dispatch_window_list->next)
51033 return dispatch_window_list;
51036 dispatch_window_list->next = dispatch_window_list1;
51037 dispatch_window_list1->prev = dispatch_window_list;
51039 return dispatch_window_list1;
51042 /* Compute number of immediate operands of an instruction. */
51045 find_constant (rtx in_rtx, imm_info *imm_values)
51047 if (INSN_P (in_rtx))
51048 in_rtx = PATTERN (in_rtx);
51049 subrtx_iterator::array_type array;
51050 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51051 if (const_rtx x = *iter)
51052 switch (GET_CODE (x))
51057 (imm_values->imm)++;
51058 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51059 (imm_values->imm32)++;
51061 (imm_values->imm64)++;
51065 (imm_values->imm)++;
51066 (imm_values->imm64)++;
51070 if (LABEL_KIND (x) == LABEL_NORMAL)
51072 (imm_values->imm)++;
51073 (imm_values->imm32)++;
51082 /* Return total size of immediate operands of an instruction along with number
51083 of corresponding immediate-operands. It initializes its parameters to zero
51084 befor calling FIND_CONSTANT.
51085 INSN is the input instruction. IMM is the total of immediates.
51086 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
51090 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
51092 imm_info imm_values = {0, 0, 0};
51094 find_constant (insn, &imm_values);
51095 *imm = imm_values.imm;
51096 *imm32 = imm_values.imm32;
51097 *imm64 = imm_values.imm64;
51098 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51101 /* This function indicates if an operand of an instruction is an
51105 has_immediate (rtx insn)
51107 int num_imm_operand;
51108 int num_imm32_operand;
51109 int num_imm64_operand;
51112 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51113 &num_imm64_operand);
51117 /* Return single or double path for instructions. */
51119 static enum insn_path
51120 get_insn_path (rtx_insn *insn)
51122 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51124 if ((int)path == 0)
51125 return path_single;
51127 if ((int)path == 1)
51128 return path_double;
51133 /* Return insn dispatch group. */
51135 static enum dispatch_group
51136 get_insn_group (rtx_insn *insn)
51138 enum dispatch_group group = get_mem_group (insn);
51142 if (is_branch (insn))
51143 return disp_branch;
51148 if (has_immediate (insn))
51151 if (is_prefetch (insn))
51152 return disp_prefetch;
51154 return disp_no_group;
51157 /* Count number of GROUP restricted instructions in a dispatch
51158 window WINDOW_LIST. */
51161 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51163 enum dispatch_group group = get_insn_group (insn);
51165 int num_imm_operand;
51166 int num_imm32_operand;
51167 int num_imm64_operand;
51169 if (group == disp_no_group)
51172 if (group == disp_imm)
51174 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51175 &num_imm64_operand);
51176 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51177 || num_imm_operand + window_list->num_imm > MAX_IMM
51178 || (num_imm32_operand > 0
51179 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51180 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51181 || (num_imm64_operand > 0
51182 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51183 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51184 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51185 && num_imm64_operand > 0
51186 && ((window_list->num_imm_64 > 0
51187 && window_list->num_insn >= 2)
51188 || window_list->num_insn >= 3)))
51194 if ((group == disp_load_store
51195 && (window_list->num_loads >= MAX_LOAD
51196 || window_list->num_stores >= MAX_STORE))
51197 || ((group == disp_load
51198 || group == disp_prefetch)
51199 && window_list->num_loads >= MAX_LOAD)
51200 || (group == disp_store
51201 && window_list->num_stores >= MAX_STORE))
51207 /* This function returns true if insn satisfies dispatch rules on the
51208 last window scheduled. */
51211 fits_dispatch_window (rtx_insn *insn)
51213 dispatch_windows *window_list = dispatch_window_list;
51214 dispatch_windows *window_list_next = dispatch_window_list->next;
51215 unsigned int num_restrict;
51216 enum dispatch_group group = get_insn_group (insn);
51217 enum insn_path path = get_insn_path (insn);
51220 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51221 instructions should be given the lowest priority in the
51222 scheduling process in Haifa scheduler to make sure they will be
51223 scheduled in the same dispatch window as the reference to them. */
51224 if (group == disp_jcc || group == disp_cmp)
51227 /* Check nonrestricted. */
51228 if (group == disp_no_group || group == disp_branch)
51231 /* Get last dispatch window. */
51232 if (window_list_next)
51233 window_list = window_list_next;
51235 if (window_list->window_num == 1)
51237 sum = window_list->prev->window_size + window_list->window_size;
51240 || (min_insn_size (insn) + sum) >= 48)
51241 /* Window 1 is full. Go for next window. */
51245 num_restrict = count_num_restricted (insn, window_list);
51247 if (num_restrict > num_allowable_groups[group])
51250 /* See if it fits in the first window. */
51251 if (window_list->window_num == 0)
51253 /* The first widow should have only single and double path
51255 if (path == path_double
51256 && (window_list->num_uops + 2) > MAX_INSN)
51258 else if (path != path_single)
51264 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51265 dispatch window WINDOW_LIST. */
51268 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51270 int byte_len = min_insn_size (insn);
51271 int num_insn = window_list->num_insn;
51273 sched_insn_info *window = window_list->window;
51274 enum dispatch_group group = get_insn_group (insn);
51275 enum insn_path path = get_insn_path (insn);
51276 int num_imm_operand;
51277 int num_imm32_operand;
51278 int num_imm64_operand;
51280 if (!window_list->violation && group != disp_cmp
51281 && !fits_dispatch_window (insn))
51282 window_list->violation = true;
51284 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51285 &num_imm64_operand);
51287 /* Initialize window with new instruction. */
51288 window[num_insn].insn = insn;
51289 window[num_insn].byte_len = byte_len;
51290 window[num_insn].group = group;
51291 window[num_insn].path = path;
51292 window[num_insn].imm_bytes = imm_size;
51294 window_list->window_size += byte_len;
51295 window_list->num_insn = num_insn + 1;
51296 window_list->num_uops = window_list->num_uops + num_uops;
51297 window_list->imm_size += imm_size;
51298 window_list->num_imm += num_imm_operand;
51299 window_list->num_imm_32 += num_imm32_operand;
51300 window_list->num_imm_64 += num_imm64_operand;
51302 if (group == disp_store)
51303 window_list->num_stores += 1;
51304 else if (group == disp_load
51305 || group == disp_prefetch)
51306 window_list->num_loads += 1;
51307 else if (group == disp_load_store)
51309 window_list->num_stores += 1;
51310 window_list->num_loads += 1;
51314 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51315 If the total bytes of instructions or the number of instructions in
51316 the window exceed allowable, it allocates a new window. */
51319 add_to_dispatch_window (rtx_insn *insn)
51322 dispatch_windows *window_list;
51323 dispatch_windows *next_list;
51324 dispatch_windows *window0_list;
51325 enum insn_path path;
51326 enum dispatch_group insn_group;
51334 if (INSN_CODE (insn) < 0)
51337 byte_len = min_insn_size (insn);
51338 window_list = dispatch_window_list;
51339 next_list = window_list->next;
51340 path = get_insn_path (insn);
51341 insn_group = get_insn_group (insn);
51343 /* Get the last dispatch window. */
51345 window_list = dispatch_window_list->next;
51347 if (path == path_single)
51349 else if (path == path_double)
51352 insn_num_uops = (int) path;
51354 /* If current window is full, get a new window.
51355 Window number zero is full, if MAX_INSN uops are scheduled in it.
51356 Window number one is full, if window zero's bytes plus window
51357 one's bytes is 32, or if the bytes of the new instruction added
51358 to the total makes it greater than 48, or it has already MAX_INSN
51359 instructions in it. */
51360 num_insn = window_list->num_insn;
51361 num_uops = window_list->num_uops;
51362 window_num = window_list->window_num;
51363 insn_fits = fits_dispatch_window (insn);
51365 if (num_insn >= MAX_INSN
51366 || num_uops + insn_num_uops > MAX_INSN
51369 window_num = ~window_num & 1;
51370 window_list = allocate_next_window (window_num);
51373 if (window_num == 0)
51375 add_insn_window (insn, window_list, insn_num_uops);
51376 if (window_list->num_insn >= MAX_INSN
51377 && insn_group == disp_branch)
51379 process_end_window ();
51383 else if (window_num == 1)
51385 window0_list = window_list->prev;
51386 sum = window0_list->window_size + window_list->window_size;
51388 || (byte_len + sum) >= 48)
51390 process_end_window ();
51391 window_list = dispatch_window_list;
51394 add_insn_window (insn, window_list, insn_num_uops);
51397 gcc_unreachable ();
51399 if (is_end_basic_block (insn_group))
51401 /* End of basic block is reached do end-basic-block process. */
51402 process_end_window ();
51407 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51409 DEBUG_FUNCTION static void
51410 debug_dispatch_window_file (FILE *file, int window_num)
51412 dispatch_windows *list;
51415 if (window_num == 0)
51416 list = dispatch_window_list;
51418 list = dispatch_window_list1;
51420 fprintf (file, "Window #%d:\n", list->window_num);
51421 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51422 list->num_insn, list->num_uops, list->window_size);
51423 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51424 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51426 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51428 fprintf (file, " insn info:\n");
51430 for (i = 0; i < MAX_INSN; i++)
51432 if (!list->window[i].insn)
51434 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51435 i, group_name[list->window[i].group],
51436 i, (void *)list->window[i].insn,
51437 i, list->window[i].path,
51438 i, list->window[i].byte_len,
51439 i, list->window[i].imm_bytes);
51443 /* Print to stdout a dispatch window. */
51445 DEBUG_FUNCTION void
51446 debug_dispatch_window (int window_num)
51448 debug_dispatch_window_file (stdout, window_num);
51451 /* Print INSN dispatch information to FILE. */
51453 DEBUG_FUNCTION static void
51454 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51457 enum insn_path path;
51458 enum dispatch_group group;
51460 int num_imm_operand;
51461 int num_imm32_operand;
51462 int num_imm64_operand;
51464 if (INSN_CODE (insn) < 0)
51467 byte_len = min_insn_size (insn);
51468 path = get_insn_path (insn);
51469 group = get_insn_group (insn);
51470 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51471 &num_imm64_operand);
51473 fprintf (file, " insn info:\n");
51474 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51475 group_name[group], path, byte_len);
51476 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51477 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51480 /* Print to STDERR the status of the ready list with respect to
51481 dispatch windows. */
51483 DEBUG_FUNCTION void
51484 debug_ready_dispatch (void)
51487 int no_ready = number_in_ready ();
51489 fprintf (stdout, "Number of ready: %d\n", no_ready);
51491 for (i = 0; i < no_ready; i++)
51492 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51495 /* This routine is the driver of the dispatch scheduler. */
51498 do_dispatch (rtx_insn *insn, int mode)
51500 if (mode == DISPATCH_INIT)
51501 init_dispatch_sched ();
51502 else if (mode == ADD_TO_DISPATCH_WINDOW)
51503 add_to_dispatch_window (insn);
51506 /* Return TRUE if Dispatch Scheduling is supported. */
51509 has_dispatch (rtx_insn *insn, int action)
51511 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51512 && flag_dispatch_scheduler)
51518 case IS_DISPATCH_ON:
51523 return is_cmp (insn);
51525 case DISPATCH_VIOLATION:
51526 return dispatch_violation ();
51528 case FITS_DISPATCH_WINDOW:
51529 return fits_dispatch_window (insn);
51535 /* Implementation of reassociation_width target hook used by
51536 reassoc phase to identify parallelism level in reassociated
51537 tree. Statements tree_code is passed in OPC. Arguments type
51540 Currently parallel reassociation is enabled for Atom
51541 processors only and we set reassociation width to be 2
51542 because Atom may issue up to 2 instructions per cycle.
51544 Return value should be fixed if parallel reassociation is
51545 enabled for other processors. */
51548 ix86_reassociation_width (unsigned int, machine_mode mode)
51551 if (VECTOR_MODE_P (mode))
51553 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51560 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51562 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51568 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51569 place emms and femms instructions. */
51571 static machine_mode
51572 ix86_preferred_simd_mode (machine_mode mode)
51580 return TARGET_AVX512BW ? V64QImode :
51581 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51583 return TARGET_AVX512BW ? V32HImode :
51584 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51586 return TARGET_AVX512F ? V16SImode :
51587 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51589 return TARGET_AVX512F ? V8DImode :
51590 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51593 if (TARGET_AVX512F)
51595 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51601 if (!TARGET_VECTORIZE_DOUBLE)
51603 else if (TARGET_AVX512F)
51605 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51607 else if (TARGET_SSE2)
51616 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51617 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51618 256bit and 128bit vectors. */
51620 static unsigned int
51621 ix86_autovectorize_vector_sizes (void)
51623 return TARGET_AVX512F ? 64 | 32 | 16 :
51624 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51629 /* Return class of registers which could be used for pseudo of MODE
51630 and of class RCLASS for spilling instead of memory. Return NO_REGS
51631 if it is not possible or non-profitable. */
51633 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51635 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51636 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51637 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51638 return ALL_SSE_REGS;
51642 /* Implement targetm.vectorize.init_cost. */
51645 ix86_init_cost (struct loop *)
51647 unsigned *cost = XNEWVEC (unsigned, 3);
51648 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51652 /* Implement targetm.vectorize.add_stmt_cost. */
51655 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51656 struct _stmt_vec_info *stmt_info, int misalign,
51657 enum vect_cost_model_location where)
51659 unsigned *cost = (unsigned *) data;
51660 unsigned retval = 0;
51662 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51663 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51665 /* Statements in an inner loop relative to the loop being
51666 vectorized are weighted more heavily. The value here is
51667 arbitrary and could potentially be improved with analysis. */
51668 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51669 count *= 50; /* FIXME. */
51671 retval = (unsigned) (count * stmt_cost);
51673 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51674 for Silvermont as it has out of order integer pipeline and can execute
51675 2 scalar instruction per tick, but has in order SIMD pipeline. */
51676 if (TARGET_SILVERMONT || TARGET_INTEL)
51677 if (stmt_info && stmt_info->stmt)
51679 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51680 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51681 retval = (retval * 17) / 10;
51684 cost[where] += retval;
51689 /* Implement targetm.vectorize.finish_cost. */
51692 ix86_finish_cost (void *data, unsigned *prologue_cost,
51693 unsigned *body_cost, unsigned *epilogue_cost)
51695 unsigned *cost = (unsigned *) data;
51696 *prologue_cost = cost[vect_prologue];
51697 *body_cost = cost[vect_body];
51698 *epilogue_cost = cost[vect_epilogue];
51701 /* Implement targetm.vectorize.destroy_cost_data. */
51704 ix86_destroy_cost_data (void *data)
51709 /* Validate target specific memory model bits in VAL. */
51711 static unsigned HOST_WIDE_INT
51712 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51714 enum memmodel model = memmodel_from_int (val);
51717 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51719 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51721 warning (OPT_Winvalid_memory_model,
51722 "Unknown architecture specific memory model");
51723 return MEMMODEL_SEQ_CST;
51725 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51726 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51728 warning (OPT_Winvalid_memory_model,
51729 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51730 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51732 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51734 warning (OPT_Winvalid_memory_model,
51735 "HLE_RELEASE not used with RELEASE or stronger memory model");
51736 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51741 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51742 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51743 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51744 or number of vecsize_mangle variants that should be emitted. */
51747 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51748 struct cgraph_simd_clone *clonei,
51749 tree base_type, int num)
51753 if (clonei->simdlen
51754 && (clonei->simdlen < 2
51755 || clonei->simdlen > 16
51756 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51758 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51759 "unsupported simdlen %d", clonei->simdlen);
51763 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51764 if (TREE_CODE (ret_type) != VOID_TYPE)
51765 switch (TYPE_MODE (ret_type))
51777 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51778 "unsupported return type %qT for simd\n", ret_type);
51785 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51786 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51787 switch (TYPE_MODE (TREE_TYPE (t)))
51799 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51800 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51804 if (clonei->cilk_elemental)
51806 /* Parse here processor clause. If not present, default to 'b'. */
51807 clonei->vecsize_mangle = 'b';
51809 else if (!TREE_PUBLIC (node->decl))
51811 /* If the function isn't exported, we can pick up just one ISA
51814 clonei->vecsize_mangle = 'd';
51815 else if (TARGET_AVX)
51816 clonei->vecsize_mangle = 'c';
51818 clonei->vecsize_mangle = 'b';
51823 clonei->vecsize_mangle = "bcd"[num];
51826 switch (clonei->vecsize_mangle)
51829 clonei->vecsize_int = 128;
51830 clonei->vecsize_float = 128;
51833 clonei->vecsize_int = 128;
51834 clonei->vecsize_float = 256;
51837 clonei->vecsize_int = 256;
51838 clonei->vecsize_float = 256;
51841 if (clonei->simdlen == 0)
51843 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51844 clonei->simdlen = clonei->vecsize_int;
51846 clonei->simdlen = clonei->vecsize_float;
51847 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51848 if (clonei->simdlen > 16)
51849 clonei->simdlen = 16;
51854 /* Add target attribute to SIMD clone NODE if needed. */
51857 ix86_simd_clone_adjust (struct cgraph_node *node)
51859 const char *str = NULL;
51860 gcc_assert (node->decl == cfun->decl);
51861 switch (node->simdclone->vecsize_mangle)
51876 gcc_unreachable ();
51881 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51882 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51885 ix86_reset_previous_fndecl ();
51886 ix86_set_current_function (node->decl);
51889 /* If SIMD clone NODE can't be used in a vectorized loop
51890 in current function, return -1, otherwise return a badness of using it
51891 (0 if it is most desirable from vecsize_mangle point of view, 1
51892 slightly less desirable, etc.). */
51895 ix86_simd_clone_usable (struct cgraph_node *node)
51897 switch (node->simdclone->vecsize_mangle)
51904 return TARGET_AVX2 ? 2 : 1;
51908 return TARGET_AVX2 ? 1 : 0;
51915 gcc_unreachable ();
51919 /* This function adjusts the unroll factor based on
51920 the hardware capabilities. For ex, bdver3 has
51921 a loop buffer which makes unrolling of smaller
51922 loops less important. This function decides the
51923 unroll factor using number of memory references
51924 (value 32 is used) as a heuristic. */
51927 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51932 unsigned mem_count = 0;
51934 if (!TARGET_ADJUST_UNROLL)
51937 /* Count the number of memory references within the loop body.
51938 This value determines the unrolling factor for bdver3 and bdver4
51940 subrtx_iterator::array_type array;
51941 bbs = get_loop_body (loop);
51942 for (i = 0; i < loop->num_nodes; i++)
51943 FOR_BB_INSNS (bbs[i], insn)
51944 if (NONDEBUG_INSN_P (insn))
51945 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51946 if (const_rtx x = *iter)
51949 machine_mode mode = GET_MODE (x);
51950 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51958 if (mem_count && mem_count <=32)
51959 return 32/mem_count;
51965 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51968 ix86_float_exceptions_rounding_supported_p (void)
51970 /* For x87 floating point with standard excess precision handling,
51971 there is no adddf3 pattern (since x87 floating point only has
51972 XFmode operations) so the default hook implementation gets this
51974 return TARGET_80387 || TARGET_SSE_MATH;
51977 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51980 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51982 if (!TARGET_80387 && !TARGET_SSE_MATH)
51984 tree exceptions_var = create_tmp_var (integer_type_node);
51987 tree fenv_index_type = build_index_type (size_int (6));
51988 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51989 tree fenv_var = create_tmp_var (fenv_type);
51990 mark_addressable (fenv_var);
51991 tree fenv_ptr = build_pointer_type (fenv_type);
51992 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51993 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51994 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51995 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51996 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51997 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51998 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51999 tree hold_fnclex = build_call_expr (fnclex, 0);
52000 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
52002 *clear = build_call_expr (fnclex, 0);
52003 tree sw_var = create_tmp_var (short_unsigned_type_node);
52004 tree fnstsw_call = build_call_expr (fnstsw, 0);
52005 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
52006 sw_var, fnstsw_call);
52007 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
52008 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
52009 exceptions_var, exceptions_x87);
52010 *update = build2 (COMPOUND_EXPR, integer_type_node,
52011 sw_mod, update_mod);
52012 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
52013 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
52015 if (TARGET_SSE_MATH)
52017 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
52018 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
52019 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
52020 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
52021 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
52022 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
52023 mxcsr_orig_var, stmxcsr_hold_call);
52024 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
52026 build_int_cst (unsigned_type_node, 0x1f80));
52027 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52028 build_int_cst (unsigned_type_node, 0xffffffc0));
52029 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52030 mxcsr_mod_var, hold_mod_val);
52031 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52032 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52033 hold_assign_orig, hold_assign_mod);
52034 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52035 ldmxcsr_hold_call);
52037 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52040 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52042 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52043 ldmxcsr_clear_call);
52045 *clear = ldmxcsr_clear_call;
52046 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52047 tree exceptions_sse = fold_convert (integer_type_node,
52048 stxmcsr_update_call);
52051 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52052 exceptions_var, exceptions_sse);
52053 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52054 exceptions_var, exceptions_mod);
52055 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52056 exceptions_assign);
52059 *update = build2 (MODIFY_EXPR, integer_type_node,
52060 exceptions_var, exceptions_sse);
52061 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52062 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52063 ldmxcsr_update_call);
52065 tree atomic_feraiseexcept
52066 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52067 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52068 1, exceptions_var);
52069 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52070 atomic_feraiseexcept_call);
52073 /* Return mode to be used for bounds or VOIDmode
52074 if bounds are not supported. */
52076 static enum machine_mode
52077 ix86_mpx_bound_mode ()
52079 /* Do not support pointer checker if MPX
52083 if (flag_check_pointer_bounds)
52084 warning (0, "Pointer Checker requires MPX support on this target."
52085 " Use -mmpx options to enable MPX.");
52092 /* Return constant used to statically initialize constant bounds.
52094 This function is used to create special bound values. For now
52095 only INIT bounds and NONE bounds are expected. More special
52096 values may be added later. */
52099 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52101 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52102 : build_zero_cst (pointer_sized_int_node);
52103 tree high = ub ? build_zero_cst (pointer_sized_int_node)
52104 : build_minus_one_cst (pointer_sized_int_node);
52106 /* This function is supposed to be used to create INIT and
52107 NONE bounds only. */
52108 gcc_assert ((lb == 0 && ub == -1)
52109 || (lb == -1 && ub == 0));
52111 return build_complex (NULL, low, high);
52114 /* Generate a list of statements STMTS to initialize pointer bounds
52115 variable VAR with bounds LB and UB. Return the number of generated
52119 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52121 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52122 tree lhs, modify, var_p;
52124 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52125 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52127 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52128 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52129 append_to_statement_list (modify, stmts);
52131 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52132 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52133 TYPE_SIZE_UNIT (pointer_sized_int_node)));
52134 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52135 append_to_statement_list (modify, stmts);
52140 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52141 /* For i386, common symbol is local only for non-PIE binaries. For
52142 x86-64, common symbol is local only for non-PIE binaries or linker
52143 supports copy reloc in PIE binaries. */
52146 ix86_binds_local_p (const_tree exp)
52148 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52151 && HAVE_LD_PIE_COPYRELOC != 0)));
52155 /* Initialize the GCC target structure. */
52156 #undef TARGET_RETURN_IN_MEMORY
52157 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52159 #undef TARGET_LEGITIMIZE_ADDRESS
52160 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52162 #undef TARGET_ATTRIBUTE_TABLE
52163 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52164 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52165 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52166 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52167 # undef TARGET_MERGE_DECL_ATTRIBUTES
52168 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52171 #undef TARGET_COMP_TYPE_ATTRIBUTES
52172 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52174 #undef TARGET_INIT_BUILTINS
52175 #define TARGET_INIT_BUILTINS ix86_init_builtins
52176 #undef TARGET_BUILTIN_DECL
52177 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52178 #undef TARGET_EXPAND_BUILTIN
52179 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52181 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52182 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52183 ix86_builtin_vectorized_function
52185 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52186 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52188 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52189 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52191 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52192 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52194 #undef TARGET_BUILTIN_RECIPROCAL
52195 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52197 #undef TARGET_ASM_FUNCTION_EPILOGUE
52198 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52200 #undef TARGET_ENCODE_SECTION_INFO
52201 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52202 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52204 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52207 #undef TARGET_ASM_OPEN_PAREN
52208 #define TARGET_ASM_OPEN_PAREN ""
52209 #undef TARGET_ASM_CLOSE_PAREN
52210 #define TARGET_ASM_CLOSE_PAREN ""
52212 #undef TARGET_ASM_BYTE_OP
52213 #define TARGET_ASM_BYTE_OP ASM_BYTE
52215 #undef TARGET_ASM_ALIGNED_HI_OP
52216 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52217 #undef TARGET_ASM_ALIGNED_SI_OP
52218 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52220 #undef TARGET_ASM_ALIGNED_DI_OP
52221 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52224 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52225 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52227 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52228 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52230 #undef TARGET_ASM_UNALIGNED_HI_OP
52231 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52232 #undef TARGET_ASM_UNALIGNED_SI_OP
52233 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52234 #undef TARGET_ASM_UNALIGNED_DI_OP
52235 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52237 #undef TARGET_PRINT_OPERAND
52238 #define TARGET_PRINT_OPERAND ix86_print_operand
52239 #undef TARGET_PRINT_OPERAND_ADDRESS
52240 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52241 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52242 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52243 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52244 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52246 #undef TARGET_SCHED_INIT_GLOBAL
52247 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52248 #undef TARGET_SCHED_ADJUST_COST
52249 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52250 #undef TARGET_SCHED_ISSUE_RATE
52251 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52252 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52253 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52254 ia32_multipass_dfa_lookahead
52255 #undef TARGET_SCHED_MACRO_FUSION_P
52256 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52257 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52258 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52260 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52261 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52263 #undef TARGET_MEMMODEL_CHECK
52264 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52266 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52267 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52270 #undef TARGET_HAVE_TLS
52271 #define TARGET_HAVE_TLS true
52273 #undef TARGET_CANNOT_FORCE_CONST_MEM
52274 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52275 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52276 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52278 #undef TARGET_DELEGITIMIZE_ADDRESS
52279 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52281 #undef TARGET_MS_BITFIELD_LAYOUT_P
52282 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52285 #undef TARGET_BINDS_LOCAL_P
52286 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52288 #undef TARGET_BINDS_LOCAL_P
52289 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52291 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52292 #undef TARGET_BINDS_LOCAL_P
52293 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52296 #undef TARGET_ASM_OUTPUT_MI_THUNK
52297 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52301 #undef TARGET_ASM_FILE_START
52302 #define TARGET_ASM_FILE_START x86_file_start
52304 #undef TARGET_OPTION_OVERRIDE
52305 #define TARGET_OPTION_OVERRIDE ix86_option_override
52307 #undef TARGET_REGISTER_MOVE_COST
52308 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52309 #undef TARGET_MEMORY_MOVE_COST
52310 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52311 #undef TARGET_RTX_COSTS
52312 #define TARGET_RTX_COSTS ix86_rtx_costs
52313 #undef TARGET_ADDRESS_COST
52314 #define TARGET_ADDRESS_COST ix86_address_cost
52316 #undef TARGET_FIXED_CONDITION_CODE_REGS
52317 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52318 #undef TARGET_CC_MODES_COMPATIBLE
52319 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52321 #undef TARGET_MACHINE_DEPENDENT_REORG
52322 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52324 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52325 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52327 #undef TARGET_BUILD_BUILTIN_VA_LIST
52328 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52330 #undef TARGET_FOLD_BUILTIN
52331 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52333 #undef TARGET_COMPARE_VERSION_PRIORITY
52334 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52336 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52337 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52338 ix86_generate_version_dispatcher_body
52340 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52341 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52342 ix86_get_function_versions_dispatcher
52344 #undef TARGET_ENUM_VA_LIST_P
52345 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52347 #undef TARGET_FN_ABI_VA_LIST
52348 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52350 #undef TARGET_CANONICAL_VA_LIST_TYPE
52351 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52353 #undef TARGET_EXPAND_BUILTIN_VA_START
52354 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52356 #undef TARGET_MD_ASM_CLOBBERS
52357 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
52359 #undef TARGET_PROMOTE_PROTOTYPES
52360 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52361 #undef TARGET_SETUP_INCOMING_VARARGS
52362 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52363 #undef TARGET_MUST_PASS_IN_STACK
52364 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52365 #undef TARGET_FUNCTION_ARG_ADVANCE
52366 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52367 #undef TARGET_FUNCTION_ARG
52368 #define TARGET_FUNCTION_ARG ix86_function_arg
52369 #undef TARGET_INIT_PIC_REG
52370 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52371 #undef TARGET_USE_PSEUDO_PIC_REG
52372 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52373 #undef TARGET_FUNCTION_ARG_BOUNDARY
52374 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52375 #undef TARGET_PASS_BY_REFERENCE
52376 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52377 #undef TARGET_INTERNAL_ARG_POINTER
52378 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52379 #undef TARGET_UPDATE_STACK_BOUNDARY
52380 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52381 #undef TARGET_GET_DRAP_RTX
52382 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52383 #undef TARGET_STRICT_ARGUMENT_NAMING
52384 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52385 #undef TARGET_STATIC_CHAIN
52386 #define TARGET_STATIC_CHAIN ix86_static_chain
52387 #undef TARGET_TRAMPOLINE_INIT
52388 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52389 #undef TARGET_RETURN_POPS_ARGS
52390 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52392 #undef TARGET_LEGITIMATE_COMBINED_INSN
52393 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52395 #undef TARGET_ASAN_SHADOW_OFFSET
52396 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52398 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52399 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52401 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52402 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52405 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52407 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52408 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52409 ix86_libgcc_floating_mode_supported_p
52411 #undef TARGET_C_MODE_FOR_SUFFIX
52412 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52415 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52416 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52419 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52420 #undef TARGET_INSERT_ATTRIBUTES
52421 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52424 #undef TARGET_MANGLE_TYPE
52425 #define TARGET_MANGLE_TYPE ix86_mangle_type
52428 #undef TARGET_STACK_PROTECT_FAIL
52429 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52432 #undef TARGET_FUNCTION_VALUE
52433 #define TARGET_FUNCTION_VALUE ix86_function_value
52435 #undef TARGET_FUNCTION_VALUE_REGNO_P
52436 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52438 #undef TARGET_PROMOTE_FUNCTION_MODE
52439 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52441 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52442 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52444 #undef TARGET_INSTANTIATE_DECLS
52445 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52447 #undef TARGET_SECONDARY_RELOAD
52448 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52450 #undef TARGET_CLASS_MAX_NREGS
52451 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52453 #undef TARGET_PREFERRED_RELOAD_CLASS
52454 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52455 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52456 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52457 #undef TARGET_CLASS_LIKELY_SPILLED_P
52458 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52460 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52461 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52462 ix86_builtin_vectorization_cost
52463 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52464 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52465 ix86_vectorize_vec_perm_const_ok
52466 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52467 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52468 ix86_preferred_simd_mode
52469 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52470 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52471 ix86_autovectorize_vector_sizes
52472 #undef TARGET_VECTORIZE_INIT_COST
52473 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52474 #undef TARGET_VECTORIZE_ADD_STMT_COST
52475 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52476 #undef TARGET_VECTORIZE_FINISH_COST
52477 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52478 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52479 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52481 #undef TARGET_SET_CURRENT_FUNCTION
52482 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52484 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52485 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52487 #undef TARGET_OPTION_SAVE
52488 #define TARGET_OPTION_SAVE ix86_function_specific_save
52490 #undef TARGET_OPTION_RESTORE
52491 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52493 #undef TARGET_OPTION_POST_STREAM_IN
52494 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52496 #undef TARGET_OPTION_PRINT
52497 #define TARGET_OPTION_PRINT ix86_function_specific_print
52499 #undef TARGET_OPTION_FUNCTION_VERSIONS
52500 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52502 #undef TARGET_CAN_INLINE_P
52503 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52505 #undef TARGET_LEGITIMATE_ADDRESS_P
52506 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52508 #undef TARGET_LRA_P
52509 #define TARGET_LRA_P hook_bool_void_true
52511 #undef TARGET_REGISTER_PRIORITY
52512 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52514 #undef TARGET_REGISTER_USAGE_LEVELING_P
52515 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52517 #undef TARGET_LEGITIMATE_CONSTANT_P
52518 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52520 #undef TARGET_FRAME_POINTER_REQUIRED
52521 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52523 #undef TARGET_CAN_ELIMINATE
52524 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52526 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52527 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52529 #undef TARGET_ASM_CODE_END
52530 #define TARGET_ASM_CODE_END ix86_code_end
52532 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52533 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52536 #undef TARGET_INIT_LIBFUNCS
52537 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52540 #undef TARGET_LOOP_UNROLL_ADJUST
52541 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52543 #undef TARGET_SPILL_CLASS
52544 #define TARGET_SPILL_CLASS ix86_spill_class
52546 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52547 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52548 ix86_simd_clone_compute_vecsize_and_simdlen
52550 #undef TARGET_SIMD_CLONE_ADJUST
52551 #define TARGET_SIMD_CLONE_ADJUST \
52552 ix86_simd_clone_adjust
52554 #undef TARGET_SIMD_CLONE_USABLE
52555 #define TARGET_SIMD_CLONE_USABLE \
52556 ix86_simd_clone_usable
52558 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52559 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52560 ix86_float_exceptions_rounding_supported_p
52562 #undef TARGET_MODE_EMIT
52563 #define TARGET_MODE_EMIT ix86_emit_mode_set
52565 #undef TARGET_MODE_NEEDED
52566 #define TARGET_MODE_NEEDED ix86_mode_needed
52568 #undef TARGET_MODE_AFTER
52569 #define TARGET_MODE_AFTER ix86_mode_after
52571 #undef TARGET_MODE_ENTRY
52572 #define TARGET_MODE_ENTRY ix86_mode_entry
52574 #undef TARGET_MODE_EXIT
52575 #define TARGET_MODE_EXIT ix86_mode_exit
52577 #undef TARGET_MODE_PRIORITY
52578 #define TARGET_MODE_PRIORITY ix86_mode_priority
52580 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52581 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52583 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52584 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52586 #undef TARGET_STORE_BOUNDS_FOR_ARG
52587 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52589 #undef TARGET_LOAD_RETURNED_BOUNDS
52590 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52592 #undef TARGET_STORE_RETURNED_BOUNDS
52593 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52595 #undef TARGET_CHKP_BOUND_MODE
52596 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52598 #undef TARGET_BUILTIN_CHKP_FUNCTION
52599 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52601 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52602 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52604 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52605 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52607 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52608 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52610 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52611 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52613 #undef TARGET_OFFLOAD_OPTIONS
52614 #define TARGET_OFFLOAD_OPTIONS \
52615 ix86_offload_options
52617 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52618 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52620 struct gcc_target targetm = TARGET_INITIALIZER;
52622 #include "gt-i386.h"