1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256 = -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest, const_rtx set, void *data)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
111 || (GET_CODE (set) == SET
112 && REG_P (SET_SRC (set))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
115 enum upper_128bits_state *state
116 = (enum upper_128bits_state *) data;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb,
129 enum upper_128bits_state state)
132 rtx vzeroupper_insn = NULL_RTX;
137 if (BLOCK_INFO (bb)->unchanged)
140 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb)->state = state;
147 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
150 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
151 bb->index, BLOCK_INFO (bb)->state);
155 BLOCK_INFO (bb)->prev = state;
158 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end = BB_END (bb);
166 while (insn != bb_end)
168 insn = NEXT_INSN (insn);
170 if (!NONDEBUG_INSN_P (insn))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn) || CALL_P (insn))
176 if (!vzeroupper_insn)
179 if (PREV_INSN (insn) != vzeroupper_insn)
183 fprintf (dump_file, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file, PREV_INSN (insn));
185 fprintf (dump_file, "before:\n");
186 print_rtl_single (dump_file, insn);
188 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
191 vzeroupper_insn = NULL_RTX;
195 pat = PATTERN (insn);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat) == UNSPEC_VOLATILE
199 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file, "Found vzeroupper:\n");
205 print_rtl_single (dump_file, insn);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat) == PARALLEL
212 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn);
222 vzeroupper_insn = NULL_RTX;
225 else if (state != used)
227 note_stores (pat, check_avx256_stores, &state);
234 /* Process vzeroupper intrinsic. */
235 avx256 = INTVAL (XVECEXP (pat, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256 == callee_return_avx256)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256 != callee_return_pass_avx256)
264 if (avx256 == callee_return_pass_avx256
265 || avx256 == callee_pass_avx256)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file, insn);
278 vzeroupper_insn = insn;
284 BLOCK_INFO (bb)->state = state;
285 BLOCK_INFO (bb)->unchanged = unchanged;
286 BLOCK_INFO (bb)->scanned = true;
289 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb->index, unchanged ? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
304 enum upper_128bits_state state, old_state, new_state;
308 fprintf (dump_file, " Process [bb %i]: status: %d\n",
309 block->index, BLOCK_INFO (block)->processed);
311 if (BLOCK_INFO (block)->processed)
316 /* Check all predecessor edges of this block. */
317 seen_unknown = false;
318 FOR_EACH_EDGE (e, ei, block->preds)
322 switch (BLOCK_INFO (e->src)->state)
325 if (!unknown_is_unused)
339 old_state = BLOCK_INFO (block)->state;
340 move_or_delete_vzeroupper_2 (block, state);
341 new_state = BLOCK_INFO (block)->state;
343 if (state != unknown || new_state == used)
344 BLOCK_INFO (block)->processed = true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state != old_state)
350 if (new_state == used)
351 cfun->machine->rescan_vzeroupper_p = 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist, pending, fibheap_swap;
369 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
383 move_or_delete_vzeroupper_2 (e->dest,
384 cfun->machine->caller_pass_avx256_p
386 BLOCK_INFO (e->dest)->processed = true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
392 bb_order = XNEWVEC (int, last_basic_block);
393 pre_and_rev_post_order_compute (NULL, rc_order, false);
394 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
395 bb_order[rc_order[i]] = i;
398 worklist = fibheap_new ();
399 pending = fibheap_new ();
400 visited = sbitmap_alloc (last_basic_block);
401 in_worklist = sbitmap_alloc (last_basic_block);
402 in_pending = sbitmap_alloc (last_basic_block);
403 sbitmap_zero (in_worklist);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending);
408 if (BLOCK_INFO (bb)->processed)
409 RESET_BIT (in_pending, bb->index);
412 move_or_delete_vzeroupper_1 (bb, false);
413 fibheap_insert (pending, bb_order[bb->index], bb);
417 fprintf (dump_file, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending))
421 fibheap_swap = pending;
423 worklist = fibheap_swap;
424 sbitmap_swap = in_pending;
425 in_pending = in_worklist;
426 in_worklist = sbitmap_swap;
428 sbitmap_zero (visited);
430 cfun->machine->rescan_vzeroupper_p = 0;
432 while (!fibheap_empty (worklist))
434 bb = (basic_block) fibheap_extract_min (worklist);
435 RESET_BIT (in_worklist, bb->index);
436 gcc_assert (!TEST_BIT (visited, bb->index));
437 if (!TEST_BIT (visited, bb->index))
441 SET_BIT (visited, bb->index);
443 if (move_or_delete_vzeroupper_1 (bb, false))
444 FOR_EACH_EDGE (e, ei, bb->succs)
446 if (e->dest == EXIT_BLOCK_PTR
447 || BLOCK_INFO (e->dest)->processed)
450 if (TEST_BIT (visited, e->dest->index))
452 if (!TEST_BIT (in_pending, e->dest->index))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending, e->dest->index);
456 fibheap_insert (pending,
457 bb_order[e->dest->index],
461 else if (!TEST_BIT (in_worklist, e->dest->index))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist, e->dest->index);
465 fibheap_insert (worklist, bb_order[e->dest->index],
472 if (!cfun->machine->rescan_vzeroupper_p)
477 fibheap_delete (worklist);
478 fibheap_delete (pending);
479 sbitmap_free (visited);
480 sbitmap_free (in_worklist);
481 sbitmap_free (in_pending);
484 fprintf (dump_file, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb, true);
489 free_aux_for_blocks ();
492 static rtx legitimize_dllimport_symbol (rtx, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost = { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost = { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
711 DUMMY_STRINGOP_ALGS},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
779 DUMMY_STRINGOP_ALGS},
780 {{libcall, {{-1, rep_prefix_4_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost = {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
855 DUMMY_STRINGOP_ALGS},
856 {{rep_prefix_4_byte, {{1024, unrolled_loop},
857 {8192, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1075 DUMMY_STRINGOP_ALGS},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1151 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 {{libcall, {{8, loop}, {24, unrolled_loop},
1153 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1154 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost = {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1238 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 {{libcall, {{8, loop}, {24, unrolled_loop},
1240 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1241 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost = {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1325 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 {{libcall, {{8, loop}, {24, unrolled_loop},
1327 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1328 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost = {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1412 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1413 {{libcall, {{8, loop}, {24, unrolled_loop},
1414 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1415 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost = {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1494 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1495 {{libcall, {{8, loop}, {24, unrolled_loop},
1496 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1497 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost = {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1565 DUMMY_STRINGOP_ALGS},
1566 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1568 DUMMY_STRINGOP_ALGS},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost = {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1636 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1637 {100000, unrolled_loop}, {-1, libcall}}}},
1638 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1640 {libcall, {{24, loop}, {64, unrolled_loop},
1641 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost = {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1709 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1710 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1711 {{libcall, {{8, loop}, {15, unrolled_loop},
1712 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1713 {libcall, {{24, loop}, {32, unrolled_loop},
1714 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost = {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS,
1789 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1790 {DUMMY_STRINGOP_ALGS,
1791 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost = {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1861 DUMMY_STRINGOP_ALGS},
1862 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1863 DUMMY_STRINGOP_ALGS},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs *ix86_cost = &pentium_cost;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features[X86_TUNE_LAST];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7 | m_GENERIC,
1967 /* X86_TUNE_USE_HIMODE_FIOP */
1968 m_386 | m_486 | m_K6_GEODE,
1970 /* X86_TUNE_USE_SIMODE_FIOP */
1971 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1973 /* X86_TUNE_USE_MOV0 */
1976 /* X86_TUNE_USE_CLTD */
1977 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1979 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1982 /* X86_TUNE_SPLIT_LONG_MOVES */
1985 /* X86_TUNE_READ_MODIFY_WRITE */
1988 /* X86_TUNE_READ_MODIFY */
1991 /* X86_TUNE_PROMOTE_QIMODE */
1992 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1994 /* X86_TUNE_FAST_PREFIX */
1995 ~(m_386 | m_486 | m_PENT),
1997 /* X86_TUNE_SINGLE_STRINGOP */
1998 m_386 | m_P4_NOCONA,
2000 /* X86_TUNE_QIMODE_MATH */
2003 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2004 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2005 might be considered for Generic32 if our scheme for avoiding partial
2006 stalls was more effective. */
2009 /* X86_TUNE_PROMOTE_QI_REGS */
2012 /* X86_TUNE_PROMOTE_HI_REGS */
2015 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2016 over esp addition. */
2017 m_386 | m_486 | m_PENT | m_PPRO,
2019 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2020 over esp addition. */
2023 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2024 over esp subtraction. */
2025 m_386 | m_486 | m_PENT | m_K6_GEODE,
2027 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2028 over esp subtraction. */
2029 m_PENT | m_K6_GEODE,
2031 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2032 for DFmode copies */
2033 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2035 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2036 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2038 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2039 conflict here in between PPro/Pentium4 based chips that thread 128bit
2040 SSE registers as single units versus K8 based chips that divide SSE
2041 registers to two 64bit halves. This knob promotes all store destinations
2042 to be 128bit to allow register renaming on 128bit SSE units, but usually
2043 results in one extra microop on 64bit SSE units. Experimental results
2044 shows that disabling this option on P4 brings over 20% SPECfp regression,
2045 while enabling it on K8 brings roughly 2.4% regression that can be partly
2046 masked by careful scheduling of moves. */
2047 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2049 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2050 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2052 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2055 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2058 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2059 are resolved on SSE register parts instead of whole registers, so we may
2060 maintain just lower part of scalar values in proper format leaving the
2061 upper part undefined. */
2064 /* X86_TUNE_SSE_TYPELESS_STORES */
2067 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2068 m_PPRO | m_P4_NOCONA,
2070 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2071 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2073 /* X86_TUNE_PROLOGUE_USING_MOVE */
2074 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2076 /* X86_TUNE_EPILOGUE_USING_MOVE */
2077 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2079 /* X86_TUNE_SHIFT1 */
2082 /* X86_TUNE_USE_FFREEP */
2085 /* X86_TUNE_INTER_UNIT_MOVES */
2086 ~(m_AMD_MULTIPLE | m_GENERIC),
2088 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2089 ~(m_AMDFAM10 | m_BDVER ),
2091 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2092 than 4 branch instructions in the 16 byte window. */
2093 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2095 /* X86_TUNE_SCHEDULE */
2096 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2098 /* X86_TUNE_USE_BT */
2099 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2101 /* X86_TUNE_USE_INCDEC */
2102 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2104 /* X86_TUNE_PAD_RETURNS */
2105 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2107 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2110 /* X86_TUNE_EXT_80387_CONSTANTS */
2111 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2113 /* X86_TUNE_SHORTEN_X87_SSE */
2116 /* X86_TUNE_AVOID_VECTOR_DECODE */
2117 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2119 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2120 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2123 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2124 vector path on AMD machines. */
2125 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2127 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2129 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2131 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2135 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2136 but one byte longer. */
2139 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2140 operand that cannot be represented using a modRM byte. The XOR
2141 replacement is long decoded, so this split helps here as well. */
2144 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2146 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2148 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2149 from integer to FP. */
2152 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2153 with a subsequent conditional jump instruction into a single
2154 compare-and-branch uop. */
2157 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2158 will impact LEA instruction selection. */
2161 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2165 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2166 at -O3. For the moment, the prefetching seems badly tuned for Intel
2168 m_K6_GEODE | m_AMD_MULTIPLE,
2170 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2171 the auto-vectorizer. */
2174 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2175 during reassociation of integer computation. */
2178 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of fp computation. */
2183 /* Feature tests against the various architecture variations. */
2184 unsigned char ix86_arch_features[X86_ARCH_LAST];
2186 /* Feature tests against the various architecture variations, used to create
2187 ix86_arch_features based on the processor mask. */
2188 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2189 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2190 ~(m_386 | m_486 | m_PENT | m_K6),
2192 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2195 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2198 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2201 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2205 static const unsigned int x86_accumulate_outgoing_args
2206 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2208 static const unsigned int x86_arch_always_fancy_math_387
2209 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2211 static const unsigned int x86_avx256_split_unaligned_load
2212 = m_COREI7 | m_GENERIC;
2214 static const unsigned int x86_avx256_split_unaligned_store
2215 = m_COREI7 | m_BDVER | m_GENERIC;
2217 /* In case the average insn count for single function invocation is
2218 lower than this constant, emit fast (but longer) prologue and
2220 #define FAST_PROLOGUE_INSN_COUNT 20
2222 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2223 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2224 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2225 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2227 /* Array of the smallest class containing reg number REGNO, indexed by
2228 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2230 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2232 /* ax, dx, cx, bx */
2233 AREG, DREG, CREG, BREG,
2234 /* si, di, bp, sp */
2235 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2237 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2238 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2241 /* flags, fpsr, fpcr, frame */
2242 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2244 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2247 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2252 /* SSE REX registers */
2253 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2257 /* The "default" register map used in 32bit mode. */
2259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2270 /* The "default" register map used in 64bit mode. */
2272 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2274 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2275 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2276 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2277 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2278 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2279 8,9,10,11,12,13,14,15, /* extended integer registers */
2280 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2283 /* Define the register numbers to be used in Dwarf debugging information.
2284 The SVR4 reference port C compiler uses the following register numbers
2285 in its Dwarf output code:
2286 0 for %eax (gcc regno = 0)
2287 1 for %ecx (gcc regno = 2)
2288 2 for %edx (gcc regno = 1)
2289 3 for %ebx (gcc regno = 3)
2290 4 for %esp (gcc regno = 7)
2291 5 for %ebp (gcc regno = 6)
2292 6 for %esi (gcc regno = 4)
2293 7 for %edi (gcc regno = 5)
2294 The following three DWARF register numbers are never generated by
2295 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2296 believes these numbers have these meanings.
2297 8 for %eip (no gcc equivalent)
2298 9 for %eflags (gcc regno = 17)
2299 10 for %trapno (no gcc equivalent)
2300 It is not at all clear how we should number the FP stack registers
2301 for the x86 architecture. If the version of SDB on x86/svr4 were
2302 a bit less brain dead with respect to floating-point then we would
2303 have a precedent to follow with respect to DWARF register numbers
2304 for x86 FP registers, but the SDB on x86/svr4 is so completely
2305 broken with respect to FP registers that it is hardly worth thinking
2306 of it as something to strive for compatibility with.
2307 The version of x86/svr4 SDB I have at the moment does (partially)
2308 seem to believe that DWARF register number 11 is associated with
2309 the x86 register %st(0), but that's about all. Higher DWARF
2310 register numbers don't seem to be associated with anything in
2311 particular, and even for DWARF regno 11, SDB only seems to under-
2312 stand that it should say that a variable lives in %st(0) (when
2313 asked via an `=' command) if we said it was in DWARF regno 11,
2314 but SDB still prints garbage when asked for the value of the
2315 variable in question (via a `/' command).
2316 (Also note that the labels SDB prints for various FP stack regs
2317 when doing an `x' command are all wrong.)
2318 Note that these problems generally don't affect the native SVR4
2319 C compiler because it doesn't allow the use of -O with -g and
2320 because when it is *not* optimizing, it allocates a memory
2321 location for each floating-point variable, and the memory
2322 location is what gets described in the DWARF AT_location
2323 attribute for the variable in question.
2324 Regardless of the severe mental illness of the x86/svr4 SDB, we
2325 do something sensible here and we use the following DWARF
2326 register numbers. Note that these are all stack-top-relative
2328 11 for %st(0) (gcc regno = 8)
2329 12 for %st(1) (gcc regno = 9)
2330 13 for %st(2) (gcc regno = 10)
2331 14 for %st(3) (gcc regno = 11)
2332 15 for %st(4) (gcc regno = 12)
2333 16 for %st(5) (gcc regno = 13)
2334 17 for %st(6) (gcc regno = 14)
2335 18 for %st(7) (gcc regno = 15)
2337 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2339 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2340 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2341 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2342 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2343 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2345 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2348 /* Define parameter passing and return registers. */
2350 static int const x86_64_int_parameter_registers[6] =
2352 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2355 static int const x86_64_ms_abi_int_parameter_registers[4] =
2357 CX_REG, DX_REG, R8_REG, R9_REG
2360 static int const x86_64_int_return_registers[4] =
2362 AX_REG, DX_REG, DI_REG, SI_REG
2365 /* Define the structure for the machine field in struct function. */
2367 struct GTY(()) stack_local_entry {
2368 unsigned short mode;
2371 struct stack_local_entry *next;
2374 /* Structure describing stack frame layout.
2375 Stack grows downward:
2381 saved static chain if ix86_static_chain_on_stack
2383 saved frame pointer if frame_pointer_needed
2384 <- HARD_FRAME_POINTER
2390 <- sse_regs_save_offset
2393 [va_arg registers] |
2397 [padding2] | = to_allocate
2406 int outgoing_arguments_size;
2407 HOST_WIDE_INT frame;
2409 /* The offsets relative to ARG_POINTER. */
2410 HOST_WIDE_INT frame_pointer_offset;
2411 HOST_WIDE_INT hard_frame_pointer_offset;
2412 HOST_WIDE_INT stack_pointer_offset;
2413 HOST_WIDE_INT hfp_save_offset;
2414 HOST_WIDE_INT reg_save_offset;
2415 HOST_WIDE_INT sse_reg_save_offset;
2417 /* When save_regs_using_mov is set, emit prologue using
2418 move instead of push instructions. */
2419 bool save_regs_using_mov;
2422 /* Which cpu are we scheduling for. */
2423 enum attr_cpu ix86_schedule;
2425 /* Which cpu are we optimizing for. */
2426 enum processor_type ix86_tune;
2428 /* Which instruction set architecture to use. */
2429 enum processor_type ix86_arch;
2431 /* True if processor has SSE prefetch instruction. */
2432 int x86_prefetch_sse;
2434 /* True if processor has prefetchw instruction. */
2437 /* -mstackrealign option */
2438 static const char ix86_force_align_arg_pointer_string[]
2439 = "force_align_arg_pointer";
2441 static rtx (*ix86_gen_leave) (void);
2442 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2445 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2446 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2447 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2448 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2449 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2450 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2452 /* Preferred alignment for stack boundary in bits. */
2453 unsigned int ix86_preferred_stack_boundary;
2455 /* Alignment for incoming stack boundary in bits specified at
2457 static unsigned int ix86_user_incoming_stack_boundary;
2459 /* Default alignment for incoming stack boundary in bits. */
2460 static unsigned int ix86_default_incoming_stack_boundary;
2462 /* Alignment for incoming stack boundary in bits. */
2463 unsigned int ix86_incoming_stack_boundary;
2465 /* Calling abi specific va_list type nodes. */
2466 static GTY(()) tree sysv_va_list_type_node;
2467 static GTY(()) tree ms_va_list_type_node;
2469 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2470 char internal_label_prefix[16];
2471 int internal_label_prefix_len;
2473 /* Fence to use after loop using movnt. */
2476 /* Register class used for passing given 64bit part of the argument.
2477 These represent classes as documented by the PS ABI, with the exception
2478 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2479 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2481 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2482 whenever possible (upper half does contain padding). */
2483 enum x86_64_reg_class
2486 X86_64_INTEGER_CLASS,
2487 X86_64_INTEGERSI_CLASS,
2494 X86_64_COMPLEX_X87_CLASS,
2498 #define MAX_CLASSES 4
2500 /* Table of constants used by fldpi, fldln2, etc.... */
2501 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2502 static bool ext_80387_constants_init = 0;
2505 static struct machine_function * ix86_init_machine_status (void);
2506 static rtx ix86_function_value (const_tree, const_tree, bool);
2507 static bool ix86_function_value_regno_p (const unsigned int);
2508 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2510 static rtx ix86_static_chain (const_tree, bool);
2511 static int ix86_function_regparm (const_tree, const_tree);
2512 static void ix86_compute_frame_layout (struct ix86_frame *);
2513 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2515 static void ix86_add_new_builtins (HOST_WIDE_INT);
2516 static tree ix86_canonical_va_list_type (tree);
2517 static void predict_jump (int);
2518 static unsigned int split_stack_prologue_scratch_regno (void);
2519 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2521 enum ix86_function_specific_strings
2523 IX86_FUNCTION_SPECIFIC_ARCH,
2524 IX86_FUNCTION_SPECIFIC_TUNE,
2525 IX86_FUNCTION_SPECIFIC_MAX
2528 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2529 const char *, enum fpmath_unit, bool);
2530 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2531 static void ix86_function_specific_save (struct cl_target_option *);
2532 static void ix86_function_specific_restore (struct cl_target_option *);
2533 static void ix86_function_specific_print (FILE *, int,
2534 struct cl_target_option *);
2535 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2536 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2537 struct gcc_options *);
2538 static bool ix86_can_inline_p (tree, tree);
2539 static void ix86_set_current_function (tree);
2540 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2542 static enum calling_abi ix86_function_abi (const_tree);
2545 #ifndef SUBTARGET32_DEFAULT_CPU
2546 #define SUBTARGET32_DEFAULT_CPU "i386"
2549 /* The svr4 ABI for the i386 says that records and unions are returned
2551 #ifndef DEFAULT_PCC_STRUCT_RETURN
2552 #define DEFAULT_PCC_STRUCT_RETURN 1
2555 /* Whether -mtune= or -march= were specified */
2556 static int ix86_tune_defaulted;
2557 static int ix86_arch_specified;
2559 /* Vectorization library interface and handlers. */
2560 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2562 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2563 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2565 /* Processor target table, indexed by processor number */
2568 const struct processor_costs *cost; /* Processor costs */
2569 const int align_loop; /* Default alignments. */
2570 const int align_loop_max_skip;
2571 const int align_jump;
2572 const int align_jump_max_skip;
2573 const int align_func;
2576 static const struct ptt processor_target_table[PROCESSOR_max] =
2578 {&i386_cost, 4, 3, 4, 3, 4},
2579 {&i486_cost, 16, 15, 16, 15, 16},
2580 {&pentium_cost, 16, 7, 16, 7, 16},
2581 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2582 {&geode_cost, 0, 0, 0, 0, 0},
2583 {&k6_cost, 32, 7, 32, 7, 32},
2584 {&athlon_cost, 16, 7, 16, 7, 16},
2585 {&pentium4_cost, 0, 0, 0, 0, 0},
2586 {&k8_cost, 16, 7, 16, 7, 16},
2587 {&nocona_cost, 0, 0, 0, 0, 0},
2588 /* Core 2 32-bit. */
2589 {&generic32_cost, 16, 10, 16, 10, 16},
2590 /* Core 2 64-bit. */
2591 {&generic64_cost, 16, 10, 16, 10, 16},
2592 /* Core i7 32-bit. */
2593 {&generic32_cost, 16, 10, 16, 10, 16},
2594 /* Core i7 64-bit. */
2595 {&generic64_cost, 16, 10, 16, 10, 16},
2596 {&generic32_cost, 16, 7, 16, 7, 16},
2597 {&generic64_cost, 16, 10, 16, 10, 16},
2598 {&amdfam10_cost, 32, 24, 32, 7, 32},
2599 {&bdver1_cost, 32, 24, 32, 7, 32},
2600 {&bdver2_cost, 32, 24, 32, 7, 32},
2601 {&btver1_cost, 32, 24, 32, 7, 32},
2602 {&atom_cost, 16, 15, 16, 7, 16}
2605 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2635 /* Return true if a red-zone is in use. */
2638 ix86_using_red_zone (void)
2640 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2643 /* Return a string that documents the current -m options. The caller is
2644 responsible for freeing the string. */
2647 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2648 const char *tune, enum fpmath_unit fpmath,
2651 struct ix86_target_opts
2653 const char *option; /* option string */
2654 HOST_WIDE_INT mask; /* isa mask options */
2657 /* This table is ordered so that options like -msse4.2 that imply
2658 preceding options while match those first. */
2659 static struct ix86_target_opts isa_opts[] =
2661 { "-m64", OPTION_MASK_ISA_64BIT },
2662 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2663 { "-mfma", OPTION_MASK_ISA_FMA },
2664 { "-mxop", OPTION_MASK_ISA_XOP },
2665 { "-mlwp", OPTION_MASK_ISA_LWP },
2666 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2667 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2668 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2669 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2670 { "-msse3", OPTION_MASK_ISA_SSE3 },
2671 { "-msse2", OPTION_MASK_ISA_SSE2 },
2672 { "-msse", OPTION_MASK_ISA_SSE },
2673 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2674 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2675 { "-mmmx", OPTION_MASK_ISA_MMX },
2676 { "-mabm", OPTION_MASK_ISA_ABM },
2677 { "-mbmi", OPTION_MASK_ISA_BMI },
2678 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2679 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2680 { "-mtbm", OPTION_MASK_ISA_TBM },
2681 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2682 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2683 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2684 { "-maes", OPTION_MASK_ISA_AES },
2685 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2686 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2687 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2688 { "-mf16c", OPTION_MASK_ISA_F16C },
2692 static struct ix86_target_opts flag_opts[] =
2694 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2695 { "-m80387", MASK_80387 },
2696 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2697 { "-malign-double", MASK_ALIGN_DOUBLE },
2698 { "-mcld", MASK_CLD },
2699 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2700 { "-mieee-fp", MASK_IEEE_FP },
2701 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2702 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2703 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2704 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2705 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2706 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2707 { "-mno-red-zone", MASK_NO_RED_ZONE },
2708 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2709 { "-mrecip", MASK_RECIP },
2710 { "-mrtd", MASK_RTD },
2711 { "-msseregparm", MASK_SSEREGPARM },
2712 { "-mstack-arg-probe", MASK_STACK_PROBE },
2713 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2714 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2715 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2716 { "-mvzeroupper", MASK_VZEROUPPER },
2717 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2718 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2719 { "-mprefer-avx128", MASK_PREFER_AVX128},
2722 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2725 char target_other[40];
2734 memset (opts, '\0', sizeof (opts));
2736 /* Add -march= option. */
2739 opts[num][0] = "-march=";
2740 opts[num++][1] = arch;
2743 /* Add -mtune= option. */
2746 opts[num][0] = "-mtune=";
2747 opts[num++][1] = tune;
2750 /* Pick out the options in isa options. */
2751 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2753 if ((isa & isa_opts[i].mask) != 0)
2755 opts[num++][0] = isa_opts[i].option;
2756 isa &= ~ isa_opts[i].mask;
2760 if (isa && add_nl_p)
2762 opts[num++][0] = isa_other;
2763 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2767 /* Add flag options. */
2768 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2770 if ((flags & flag_opts[i].mask) != 0)
2772 opts[num++][0] = flag_opts[i].option;
2773 flags &= ~ flag_opts[i].mask;
2777 if (flags && add_nl_p)
2779 opts[num++][0] = target_other;
2780 sprintf (target_other, "(other flags: %#x)", flags);
2783 /* Add -fpmath= option. */
2786 opts[num][0] = "-mfpmath=";
2787 switch ((int) fpmath)
2790 opts[num++][1] = "387";
2794 opts[num++][1] = "sse";
2797 case FPMATH_387 | FPMATH_SSE:
2798 opts[num++][1] = "sse+387";
2810 gcc_assert (num < ARRAY_SIZE (opts));
2812 /* Size the string. */
2814 sep_len = (add_nl_p) ? 3 : 1;
2815 for (i = 0; i < num; i++)
2818 for (j = 0; j < 2; j++)
2820 len += strlen (opts[i][j]);
2823 /* Build the string. */
2824 ret = ptr = (char *) xmalloc (len);
2827 for (i = 0; i < num; i++)
2831 for (j = 0; j < 2; j++)
2832 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2839 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2847 for (j = 0; j < 2; j++)
2850 memcpy (ptr, opts[i][j], len2[j]);
2852 line_len += len2[j];
2857 gcc_assert (ret + len >= ptr);
2862 /* Return true, if profiling code should be emitted before
2863 prologue. Otherwise it returns false.
2864 Note: For x86 with "hotfix" it is sorried. */
2866 ix86_profile_before_prologue (void)
2868 return flag_fentry != 0;
2871 /* Function that is callable from the debugger to print the current
2874 ix86_debug_options (void)
2876 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2877 ix86_arch_string, ix86_tune_string,
2882 fprintf (stderr, "%s\n\n", opts);
2886 fputs ("<no options>\n\n", stderr);
2891 /* Override various settings based on options. If MAIN_ARGS_P, the
2892 options are from the command line, otherwise they are from
2896 ix86_option_override_internal (bool main_args_p)
2899 unsigned int ix86_arch_mask, ix86_tune_mask;
2900 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2905 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2906 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2907 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2908 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2909 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2910 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2911 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2912 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2913 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2914 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2915 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2916 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2917 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2918 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2919 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2920 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2921 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2922 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2923 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2924 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2925 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2926 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2927 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2928 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2929 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2930 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2931 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2932 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2933 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2934 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2935 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2936 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2937 #define PTA_PREFETCHW (HOST_WIDE_INT_1 << 32)
2939 /* if this reaches 64, need to widen struct pta flags below */
2943 const char *const name; /* processor name or nickname. */
2944 const enum processor_type processor;
2945 const enum attr_cpu schedule;
2946 const unsigned HOST_WIDE_INT flags;
2948 const processor_alias_table[] =
2950 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2951 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2952 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2953 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2954 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2955 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2956 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2957 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2958 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2959 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2960 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2961 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2962 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2964 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2966 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2967 PTA_MMX | PTA_SSE | PTA_SSE2},
2968 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2969 PTA_MMX |PTA_SSE | PTA_SSE2},
2970 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2971 PTA_MMX | PTA_SSE | PTA_SSE2},
2972 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2973 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2974 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_CX16 | PTA_NO_SAHF},
2977 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_CX16},
2980 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2981 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2982 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2983 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2985 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2986 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2987 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C},
2992 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2993 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2994 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2995 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2996 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2997 | PTA_FMA | PTA_MOVBE},
2998 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2999 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3000 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3001 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3004 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3005 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3006 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3007 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3008 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3009 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3010 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3011 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3012 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3013 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3014 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3015 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3016 {"x86-64", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3018 {"k8", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_NO_SAHF},
3021 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3024 {"opteron", PROCESSOR_K8, CPU_K8,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_NO_SAHF},
3027 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3030 {"athlon64", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF},
3033 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3036 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_NO_SAHF},
3039 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3040 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3041 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3042 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3043 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3044 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3045 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3046 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3047 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3048 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_FMA4 | PTA_XOP | PTA_LWP},
3050 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3051 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3052 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3053 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3054 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3056 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3057 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3058 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
3059 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3060 0 /* flags are only used for -march switch. */ },
3061 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3062 PTA_64BIT /* flags are only used for -march switch. */ },
3065 /* -mrecip options. */
3068 const char *string; /* option name */
3069 unsigned int mask; /* mask bits to set */
3071 const recip_options[] =
3073 { "all", RECIP_MASK_ALL },
3074 { "none", RECIP_MASK_NONE },
3075 { "div", RECIP_MASK_DIV },
3076 { "sqrt", RECIP_MASK_SQRT },
3077 { "vec-div", RECIP_MASK_VEC_DIV },
3078 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3081 int const pta_size = ARRAY_SIZE (processor_alias_table);
3083 /* Set up prefix/suffix so the error messages refer to either the command
3084 line argument, or the attribute(target). */
3093 prefix = "option(\"";
3098 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3099 SUBTARGET_OVERRIDE_OPTIONS;
3102 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3103 SUBSUBTARGET_OVERRIDE_OPTIONS;
3107 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3109 /* -fPIC is the default for x86_64. */
3110 if (TARGET_MACHO && TARGET_64BIT)
3113 /* Need to check -mtune=generic first. */
3114 if (ix86_tune_string)
3116 if (!strcmp (ix86_tune_string, "generic")
3117 || !strcmp (ix86_tune_string, "i686")
3118 /* As special support for cross compilers we read -mtune=native
3119 as -mtune=generic. With native compilers we won't see the
3120 -mtune=native, as it was changed by the driver. */
3121 || !strcmp (ix86_tune_string, "native"))
3124 ix86_tune_string = "generic64";
3126 ix86_tune_string = "generic32";
3128 /* If this call is for setting the option attribute, allow the
3129 generic32/generic64 that was previously set. */
3130 else if (!main_args_p
3131 && (!strcmp (ix86_tune_string, "generic32")
3132 || !strcmp (ix86_tune_string, "generic64")))
3134 else if (!strncmp (ix86_tune_string, "generic", 7))
3135 error ("bad value (%s) for %stune=%s %s",
3136 ix86_tune_string, prefix, suffix, sw);
3137 else if (!strcmp (ix86_tune_string, "x86-64"))
3138 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3139 "%stune=k8%s or %stune=generic%s instead as appropriate",
3140 prefix, suffix, prefix, suffix, prefix, suffix);
3144 if (ix86_arch_string)
3145 ix86_tune_string = ix86_arch_string;
3146 if (!ix86_tune_string)
3148 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3149 ix86_tune_defaulted = 1;
3152 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3153 need to use a sensible tune option. */
3154 if (!strcmp (ix86_tune_string, "generic")
3155 || !strcmp (ix86_tune_string, "x86-64")
3156 || !strcmp (ix86_tune_string, "i686"))
3159 ix86_tune_string = "generic64";
3161 ix86_tune_string = "generic32";
3165 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3167 /* rep; movq isn't available in 32-bit code. */
3168 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3169 ix86_stringop_alg = no_stringop;
3172 if (!ix86_arch_string)
3173 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3175 ix86_arch_specified = 1;
3177 if (!global_options_set.x_ix86_abi)
3178 ix86_abi = DEFAULT_ABI;
3180 if (global_options_set.x_ix86_cmodel)
3182 switch (ix86_cmodel)
3187 ix86_cmodel = CM_SMALL_PIC;
3189 error ("code model %qs not supported in the %s bit mode",
3196 ix86_cmodel = CM_MEDIUM_PIC;
3198 error ("code model %qs not supported in the %s bit mode",
3200 else if (TARGET_X32)
3201 error ("code model %qs not supported in x32 mode",
3208 ix86_cmodel = CM_LARGE_PIC;
3210 error ("code model %qs not supported in the %s bit mode",
3212 else if (TARGET_X32)
3213 error ("code model %qs not supported in x32 mode",
3219 error ("code model %s does not support PIC mode", "32");
3221 error ("code model %qs not supported in the %s bit mode",
3228 error ("code model %s does not support PIC mode", "kernel");
3229 ix86_cmodel = CM_32;
3232 error ("code model %qs not supported in the %s bit mode",
3242 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3243 use of rip-relative addressing. This eliminates fixups that
3244 would otherwise be needed if this object is to be placed in a
3245 DLL, and is essentially just as efficient as direct addressing. */
3246 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3247 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3248 else if (TARGET_64BIT)
3249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3251 ix86_cmodel = CM_32;
3253 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3255 error ("-masm=intel not supported in this configuration");
3256 ix86_asm_dialect = ASM_ATT;
3258 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3259 sorry ("%i-bit mode not compiled in",
3260 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3262 for (i = 0; i < pta_size; i++)
3263 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3265 ix86_schedule = processor_alias_table[i].schedule;
3266 ix86_arch = processor_alias_table[i].processor;
3267 /* Default cpu tuning to the architecture. */
3268 ix86_tune = ix86_arch;
3270 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3271 error ("CPU you selected does not support x86-64 "
3274 if (processor_alias_table[i].flags & PTA_MMX
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3276 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3277 if (processor_alias_table[i].flags & PTA_3DNOW
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3279 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3280 if (processor_alias_table[i].flags & PTA_3DNOW_A
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3282 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3283 if (processor_alias_table[i].flags & PTA_SSE
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3285 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3286 if (processor_alias_table[i].flags & PTA_SSE2
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3288 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3289 if (processor_alias_table[i].flags & PTA_SSE3
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3292 if (processor_alias_table[i].flags & PTA_SSSE3
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3294 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3295 if (processor_alias_table[i].flags & PTA_SSE4_1
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3297 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3298 if (processor_alias_table[i].flags & PTA_SSE4_2
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3300 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3301 if (processor_alias_table[i].flags & PTA_AVX
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3303 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3304 if (processor_alias_table[i].flags & PTA_AVX2
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3306 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3307 if (processor_alias_table[i].flags & PTA_FMA
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3309 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3310 if (processor_alias_table[i].flags & PTA_SSE4A
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3312 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3313 if (processor_alias_table[i].flags & PTA_FMA4
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3315 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3316 if (processor_alias_table[i].flags & PTA_XOP
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3318 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3319 if (processor_alias_table[i].flags & PTA_LWP
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3321 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3322 if (processor_alias_table[i].flags & PTA_ABM
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3324 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3325 if (processor_alias_table[i].flags & PTA_BMI
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3327 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3328 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3330 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3331 if (processor_alias_table[i].flags & PTA_TBM
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3333 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3334 if (processor_alias_table[i].flags & PTA_BMI2
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3336 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3337 if (processor_alias_table[i].flags & PTA_CX16
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3339 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3340 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3342 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3343 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3345 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3346 if (processor_alias_table[i].flags & PTA_MOVBE
3347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3348 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3349 if (processor_alias_table[i].flags & PTA_AES
3350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3351 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3352 if (processor_alias_table[i].flags & PTA_PCLMUL
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3354 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3355 if (processor_alias_table[i].flags & PTA_FSGSBASE
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3357 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3358 if (processor_alias_table[i].flags & PTA_RDRND
3359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3360 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3361 if (processor_alias_table[i].flags & PTA_F16C
3362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3363 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3364 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3365 x86_prefetch_sse = true;
3366 if (processor_alias_table[i].flags & PTA_PREFETCHW)
3367 x86_prefetchw = true;
3372 if (!strcmp (ix86_arch_string, "generic"))
3373 error ("generic CPU can be used only for %stune=%s %s",
3374 prefix, suffix, sw);
3375 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3376 error ("bad value (%s) for %sarch=%s %s",
3377 ix86_arch_string, prefix, suffix, sw);
3379 ix86_arch_mask = 1u << ix86_arch;
3380 for (i = 0; i < X86_ARCH_LAST; ++i)
3381 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3383 for (i = 0; i < pta_size; i++)
3384 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3386 ix86_schedule = processor_alias_table[i].schedule;
3387 ix86_tune = processor_alias_table[i].processor;
3390 if (!(processor_alias_table[i].flags & PTA_64BIT))
3392 if (ix86_tune_defaulted)
3394 ix86_tune_string = "x86-64";
3395 for (i = 0; i < pta_size; i++)
3396 if (! strcmp (ix86_tune_string,
3397 processor_alias_table[i].name))
3399 ix86_schedule = processor_alias_table[i].schedule;
3400 ix86_tune = processor_alias_table[i].processor;
3403 error ("CPU you selected does not support x86-64 "
3409 /* Adjust tuning when compiling for 32-bit ABI. */
3412 case PROCESSOR_GENERIC64:
3413 ix86_tune = PROCESSOR_GENERIC32;
3414 ix86_schedule = CPU_PENTIUMPRO;
3417 case PROCESSOR_CORE2_64:
3418 ix86_tune = PROCESSOR_CORE2_32;
3421 case PROCESSOR_COREI7_64:
3422 ix86_tune = PROCESSOR_COREI7_32;
3429 /* Intel CPUs have always interpreted SSE prefetch instructions as
3430 NOPs; so, we can enable SSE prefetch instructions even when
3431 -mtune (rather than -march) points us to a processor that has them.
3432 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3433 higher processors. */
3435 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3436 x86_prefetch_sse = true;
3440 if (ix86_tune_specified && i == pta_size)
3441 error ("bad value (%s) for %stune=%s %s",
3442 ix86_tune_string, prefix, suffix, sw);
3444 ix86_tune_mask = 1u << ix86_tune;
3445 for (i = 0; i < X86_TUNE_LAST; ++i)
3446 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 #ifndef USE_IX86_FRAME_POINTER
3449 #define USE_IX86_FRAME_POINTER 0
3452 #ifndef USE_X86_64_FRAME_POINTER
3453 #define USE_X86_64_FRAME_POINTER 0
3456 /* Set the default values for switches whose default depends on TARGET_64BIT
3457 in case they weren't overwritten by command line options. */
3460 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3461 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3462 if (flag_asynchronous_unwind_tables == 2)
3463 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3464 if (flag_pcc_struct_return == 2)
3465 flag_pcc_struct_return = 0;
3469 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3470 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3471 if (flag_asynchronous_unwind_tables == 2)
3472 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3473 if (flag_pcc_struct_return == 2)
3474 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3478 ix86_cost = &ix86_size_cost;
3480 ix86_cost = processor_target_table[ix86_tune].cost;
3482 /* Arrange to set up i386_stack_locals for all functions. */
3483 init_machine_status = ix86_init_machine_status;
3485 /* Validate -mregparm= value. */
3486 if (global_options_set.x_ix86_regparm)
3489 warning (0, "-mregparm is ignored in 64-bit mode");
3490 if (ix86_regparm > REGPARM_MAX)
3492 error ("-mregparm=%d is not between 0 and %d",
3493 ix86_regparm, REGPARM_MAX);
3498 ix86_regparm = REGPARM_MAX;
3500 /* Default align_* from the processor table. */
3501 if (align_loops == 0)
3503 align_loops = processor_target_table[ix86_tune].align_loop;
3504 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3506 if (align_jumps == 0)
3508 align_jumps = processor_target_table[ix86_tune].align_jump;
3509 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3511 if (align_functions == 0)
3513 align_functions = processor_target_table[ix86_tune].align_func;
3516 /* Provide default for -mbranch-cost= value. */
3517 if (!global_options_set.x_ix86_branch_cost)
3518 ix86_branch_cost = ix86_cost->branch_cost;
3522 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3524 /* Enable by default the SSE and MMX builtins. Do allow the user to
3525 explicitly disable any of these. In particular, disabling SSE and
3526 MMX for kernel code is extremely useful. */
3527 if (!ix86_arch_specified)
3529 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3530 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3533 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3537 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3539 if (!ix86_arch_specified)
3541 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3543 /* i386 ABI does not specify red zone. It still makes sense to use it
3544 when programmer takes care to stack from being destroyed. */
3545 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3546 target_flags |= MASK_NO_RED_ZONE;
3549 /* Keep nonleaf frame pointers. */
3550 if (flag_omit_frame_pointer)
3551 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3552 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3553 flag_omit_frame_pointer = 1;
3555 /* If we're doing fast math, we don't care about comparison order
3556 wrt NaNs. This lets us use a shorter comparison sequence. */
3557 if (flag_finite_math_only)
3558 target_flags &= ~MASK_IEEE_FP;
3560 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3561 since the insns won't need emulation. */
3562 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3563 target_flags &= ~MASK_NO_FANCY_MATH_387;
3565 /* Likewise, if the target doesn't have a 387, or we've specified
3566 software floating point, don't use 387 inline intrinsics. */
3568 target_flags |= MASK_NO_FANCY_MATH_387;
3570 /* Turn on MMX builtins for -msse. */
3573 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3574 x86_prefetch_sse = true;
3577 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3578 if (TARGET_SSE4_2 || TARGET_ABM)
3579 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3581 /* Turn on lzcnt instruction for -mabm. */
3583 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3585 /* Validate -mpreferred-stack-boundary= value or default it to
3586 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3587 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3588 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3590 int min = (TARGET_64BIT ? 4 : 2);
3591 int max = (TARGET_SEH ? 4 : 12);
3593 if (ix86_preferred_stack_boundary_arg < min
3594 || ix86_preferred_stack_boundary_arg > max)
3597 error ("-mpreferred-stack-boundary is not supported "
3600 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3601 ix86_preferred_stack_boundary_arg, min, max);
3604 ix86_preferred_stack_boundary
3605 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3608 /* Set the default value for -mstackrealign. */
3609 if (ix86_force_align_arg_pointer == -1)
3610 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3612 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3614 /* Validate -mincoming-stack-boundary= value or default it to
3615 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3616 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3617 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3619 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3620 || ix86_incoming_stack_boundary_arg > 12)
3621 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3622 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3625 ix86_user_incoming_stack_boundary
3626 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3627 ix86_incoming_stack_boundary
3628 = ix86_user_incoming_stack_boundary;
3632 /* Accept -msseregparm only if at least SSE support is enabled. */
3633 if (TARGET_SSEREGPARM
3635 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3637 if (global_options_set.x_ix86_fpmath)
3639 if (ix86_fpmath & FPMATH_SSE)
3643 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3644 ix86_fpmath = FPMATH_387;
3646 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3648 warning (0, "387 instruction set disabled, using SSE arithmetics");
3649 ix86_fpmath = FPMATH_SSE;
3654 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3656 /* If the i387 is disabled, then do not return values in it. */
3658 target_flags &= ~MASK_FLOAT_RETURNS;
3660 /* Use external vectorized library in vectorizing intrinsics. */
3661 if (global_options_set.x_ix86_veclibabi_type)
3662 switch (ix86_veclibabi_type)
3664 case ix86_veclibabi_type_svml:
3665 ix86_veclib_handler = ix86_veclibabi_svml;
3668 case ix86_veclibabi_type_acml:
3669 ix86_veclib_handler = ix86_veclibabi_acml;
3676 if ((!USE_IX86_FRAME_POINTER
3677 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3678 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3680 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3682 /* ??? Unwind info is not correct around the CFG unless either a frame
3683 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3684 unwind info generation to be aware of the CFG and propagating states
3686 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3687 || flag_exceptions || flag_non_call_exceptions)
3688 && flag_omit_frame_pointer
3689 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3691 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3692 warning (0, "unwind tables currently require either a frame pointer "
3693 "or %saccumulate-outgoing-args%s for correctness",
3695 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3698 /* If stack probes are required, the space used for large function
3699 arguments on the stack must also be probed, so enable
3700 -maccumulate-outgoing-args so this happens in the prologue. */
3701 if (TARGET_STACK_PROBE
3702 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3704 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3705 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3706 "for correctness", prefix, suffix);
3707 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3714 p = strchr (internal_label_prefix, 'X');
3715 internal_label_prefix_len = p - internal_label_prefix;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE)
3722 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3725 ix86_cost->simultaneous_prefetches,
3726 global_options.x_param_values,
3727 global_options_set.x_param_values);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3729 global_options.x_param_values,
3730 global_options_set.x_param_values);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3732 global_options.x_param_values,
3733 global_options_set.x_param_values);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3735 global_options.x_param_values,
3736 global_options_set.x_param_values);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays < 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3743 flag_prefetch_loop_arrays = 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT && !flag_split_stack)
3748 targetm.expand_builtin_va_start = NULL;
3752 ix86_gen_leave = gen_leave_rex64;
3753 ix86_gen_add3 = gen_adddi3;
3754 ix86_gen_sub3 = gen_subdi3;
3755 ix86_gen_sub3_carry = gen_subdi3_carry;
3756 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3757 ix86_gen_monitor = gen_sse3_monitor64;
3758 ix86_gen_andsp = gen_anddi3;
3759 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3760 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3761 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3765 ix86_gen_leave = gen_leave;
3766 ix86_gen_add3 = gen_addsi3;
3767 ix86_gen_sub3 = gen_subsi3;
3768 ix86_gen_sub3_carry = gen_subsi3_carry;
3769 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3770 ix86_gen_monitor = gen_sse3_monitor;
3771 ix86_gen_andsp = gen_andsi3;
3772 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3773 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3774 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags |= MASK_CLD & ~target_flags_explicit;
3783 if (!TARGET_64BIT && flag_pic)
3785 if (flag_fentry > 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH)
3792 if (flag_fentry == 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry < 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit & MASK_VZEROUPPER))
3814 target_flags |= MASK_VZEROUPPER;
3815 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3816 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3817 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3818 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3819 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3820 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3823 target_flags |= MASK_PREFER_AVX128;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags &= ~MASK_VZEROUPPER;
3832 if (ix86_recip_name)
3834 char *p = ASTRDUP (ix86_recip_name);
3836 unsigned int mask, i;
3839 while ((q = strtok (p, ",")) != NULL)
3850 if (!strcmp (q, "default"))
3851 mask = RECIP_MASK_ALL;
3854 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3855 if (!strcmp (q, recip_options[i].string))
3857 mask = recip_options[i].mask;
3861 if (i == ARRAY_SIZE (recip_options))
3863 error ("unknown option for -mrecip=%s", q);
3865 mask = RECIP_MASK_NONE;
3869 recip_mask_explicit |= mask;
3871 recip_mask &= ~mask;
3878 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3879 else if (target_flags_explicit & MASK_RECIP)
3880 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node = target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val)
3897 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3900 if (GET_CODE (val) == PARALLEL)
3905 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3907 r = XVECEXP (val, 0, i);
3908 if (GET_CODE (r) == EXPR_LIST
3910 && REG_P (XEXP (r, 0))
3911 && (GET_MODE (XEXP (r, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3938 if (fixed_regs[i] > 1)
3939 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3940 if (call_used_regs[i] > 1)
3941 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j = PIC_OFFSET_TABLE_REGNUM;
3946 if (j != INVALID_REGNUM)
3947 fixed_regs[j] = call_used_regs[j] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI)
3952 call_used_regs[SI_REG] = 0;
3953 call_used_regs[DI_REG] = 0;
3954 call_used_regs[XMM6_REG] = 0;
3955 call_used_regs[XMM7_REG] = 0;
3956 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3957 call_used_regs[i] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3966 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3968 && call_used_regs[i])
3969 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3976 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3982 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3986 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3988 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3995 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option *ptr)
4006 ptr->arch = ix86_arch;
4007 ptr->schedule = ix86_schedule;
4008 ptr->tune = ix86_tune;
4009 ptr->branch_cost = ix86_branch_cost;
4010 ptr->tune_defaulted = ix86_tune_defaulted;
4011 ptr->arch_specified = ix86_arch_specified;
4012 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4013 ptr->ix86_target_flags_explicit = target_flags_explicit;
4014 ptr->x_recip_mask_explicit = recip_mask_explicit;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr->arch == ix86_arch);
4019 gcc_assert (ptr->schedule == ix86_schedule);
4020 gcc_assert (ptr->tune == ix86_tune);
4021 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option *ptr)
4029 enum processor_type old_tune = ix86_tune;
4030 enum processor_type old_arch = ix86_arch;
4031 unsigned int ix86_arch_mask, ix86_tune_mask;
4034 ix86_arch = (enum processor_type) ptr->arch;
4035 ix86_schedule = (enum attr_cpu) ptr->schedule;
4036 ix86_tune = (enum processor_type) ptr->tune;
4037 ix86_branch_cost = ptr->branch_cost;
4038 ix86_tune_defaulted = ptr->tune_defaulted;
4039 ix86_arch_specified = ptr->arch_specified;
4040 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4041 target_flags_explicit = ptr->ix86_target_flags_explicit;
4042 recip_mask_explicit = ptr->x_recip_mask_explicit;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch != ix86_arch)
4047 ix86_arch_mask = 1u << ix86_arch;
4048 for (i = 0; i < X86_ARCH_LAST; ++i)
4049 ix86_arch_features[i]
4050 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune != ix86_tune)
4056 ix86_tune_mask = 1u << ix86_tune;
4057 for (i = 0; i < X86_TUNE_LAST; ++i)
4058 ix86_tune_features[i]
4059 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file, int indent,
4067 struct cl_target_option *ptr)
4070 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4071 NULL, NULL, ptr->x_ix86_fpmath, false);
4073 fprintf (file, "%*sarch = %d (%s)\n",
4076 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4077 ? cpu_names[ptr->arch]
4080 fprintf (file, "%*stune = %d (%s)\n",
4083 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4084 ? cpu_names[ptr->tune]
4087 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4091 fprintf (file, "%*s%s\n", indent, "", target_string);
4092 free (target_string);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4103 struct gcc_options *enum_opts_set)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4134 IX86_ATTR_ISA ("abm", OPT_mabm),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4139 IX86_ATTR_ISA ("aes", OPT_maes),
4140 IX86_ATTR_ISA ("avx", OPT_mavx),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4145 IX86_ATTR_ISA ("sse", OPT_msse),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4154 IX86_ATTR_ISA ("fma", OPT_mfma),
4155 IX86_ATTR_ISA ("xop", OPT_mxop),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387,
4175 MASK_NO_FANCY_MATH_387),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops,
4183 MASK_INLINE_ALL_STRINGOPS),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops,
4191 MASK_NO_ALIGN_STRINGOPS),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args) == TREE_LIST)
4204 for (; args; args = TREE_CHAIN (args))
4205 if (TREE_VALUE (args)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4207 p_strings, enum_opts_set))
4213 else if (TREE_CODE (args) != STRING_CST)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4219 while (next_optstr && *next_optstr != '\0')
4221 char *p = next_optstr;
4223 char *comma = strchr (next_optstr, ',');
4224 const char *opt_string;
4225 size_t len, opt_len;
4230 enum ix86_opt_type type = ix86_opt_unknown;
4236 len = comma - next_optstr;
4237 next_optstr = comma + 1;
4245 /* Recognize no-xxx. */
4246 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4255 /* Find the option. */
4258 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4260 type = attrs[i].type;
4261 opt_len = attrs[i].len;
4262 if (ch == attrs[i].string[0]
4263 && ((type != ix86_opt_str && type != ix86_opt_enum)
4266 && memcmp (p, attrs[i].string, opt_len) == 0)
4269 mask = attrs[i].mask;
4270 opt_string = attrs[i].string;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p);
4282 else if (type == ix86_opt_isa)
4284 struct cl_decoded_option decoded;
4286 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4287 ix86_handle_option (&global_options, &global_options_set,
4288 &decoded, input_location);
4291 else if (type == ix86_opt_yes || type == ix86_opt_no)
4293 if (type == ix86_opt_no)
4294 opt_set_p = !opt_set_p;
4297 target_flags |= mask;
4299 target_flags &= ~mask;
4302 else if (type == ix86_opt_str)
4306 error ("option(\"%s\") was already specified", opt_string);
4310 p_strings[opt] = xstrdup (p + opt_len);
4313 else if (type == ix86_opt_enum)
4318 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4320 set_option (&global_options, enum_opts_set, opt, value,
4321 p + opt_len, DK_UNSPECIFIED, input_location,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args)
4342 const char *orig_arch_string = ix86_arch_string;
4343 const char *orig_tune_string = ix86_tune_string;
4344 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4345 int orig_tune_defaulted = ix86_tune_defaulted;
4346 int orig_arch_specified = ix86_arch_specified;
4347 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4350 struct cl_target_option *def
4351 = TREE_TARGET_OPTION (target_option_default_node);
4352 struct gcc_options enum_opts_set;
4354 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags != def->x_ix86_isa_flags
4366 || target_flags != def->x_target_flags
4367 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4368 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4369 || enum_opts_set.x_ix86_fpmath)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4374 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4375 else if (!orig_arch_specified)
4376 ix86_arch_string = NULL;
4378 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4379 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4380 else if (orig_tune_defaulted)
4381 ix86_tune_string = NULL;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set.x_ix86_fpmath)
4385 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4386 else if (!TARGET_64BIT && TARGET_SSE)
4388 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4389 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags);
4398 /* Save the current options unless we are validating options for
4400 t = build_target_option_node ();
4402 ix86_arch_string = orig_arch_string;
4403 ix86_tune_string = orig_tune_string;
4404 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4406 /* Free up memory allocated to hold the strings */
4407 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4408 free (option_strings[i]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl,
4418 tree ARG_UNUSED (name),
4420 int ARG_UNUSED (flags))
4422 struct cl_target_option cur_target;
4424 tree old_optimize = build_optimization_node ();
4425 tree new_target, new_optimize;
4426 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize && func_optimize != old_optimize)
4431 cl_optimization_restore (&global_options,
4432 TREE_OPTIMIZATION (func_optimize));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target, &global_options);
4437 new_target = ix86_valid_target_attribute_tree (args);
4438 new_optimize = build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4447 if (old_optimize != new_optimize)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4451 cl_target_option_restore (&global_options, &cur_target);
4453 if (old_optimize != new_optimize)
4454 cl_optimization_restore (&global_options,
4455 TREE_OPTIMIZATION (old_optimize));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller, tree callee)
4467 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4468 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree)
4481 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4482 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4488 != callee_opts->x_ix86_isa_flags)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts->arch != callee_opts->arch)
4499 else if (caller_opts->tune != callee_opts->tune)
4502 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4505 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl && fndecl != ix86_previous_fndecl)
4530 tree old_tree = (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4534 tree new_tree = (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4538 ix86_previous_fndecl = fndecl;
4539 if (old_tree == new_tree)
4544 cl_target_option_restore (&global_options,
4545 TREE_TARGET_OPTION (new_tree));
4551 struct cl_target_option *def
4552 = TREE_TARGET_OPTION (target_option_current_node);
4554 cl_target_option_restore (&global_options, def);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp)
4566 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp) == FUNCTION_DECL)
4573 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4575 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4576 if (strcmp (section, ".ldata") == 0
4577 || strcmp (section, ".lbss") == 0)
4583 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size || size > ix86_section_threshold)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4603 x86_64_elf_select_section (tree decl, int reloc,
4604 unsigned HOST_WIDE_INT align)
4606 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4607 && ix86_in_large_data_p (decl))
4609 const char *sname = NULL;
4610 unsigned int flags = SECTION_WRITE;
4611 switch (categorize_decl_for_section (decl, reloc))
4616 case SECCAT_DATA_REL:
4617 sname = ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL:
4620 sname = ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO:
4623 sname = ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL:
4626 sname = ".ldata.rel.ro.local";
4630 flags |= SECTION_BSS;
4633 case SECCAT_RODATA_MERGE_STR:
4634 case SECCAT_RODATA_MERGE_STR_INIT:
4635 case SECCAT_RODATA_MERGE_CONST:
4639 case SECCAT_SRODATA:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname, flags, NULL);
4657 return get_named_section (decl, sname, reloc);
4660 return default_elf_select_section (decl, reloc, align);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl, int reloc)
4671 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4672 && ix86_in_large_data_p (decl))
4674 const char *prefix = NULL;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4678 switch (categorize_decl_for_section (decl, reloc))
4681 case SECCAT_DATA_REL:
4682 case SECCAT_DATA_REL_LOCAL:
4683 case SECCAT_DATA_REL_RO:
4684 case SECCAT_DATA_REL_RO_LOCAL:
4685 prefix = one_only ? ".ld" : ".ldata";
4688 prefix = one_only ? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR:
4692 case SECCAT_RODATA_MERGE_STR_INIT:
4693 case SECCAT_RODATA_MERGE_CONST:
4694 prefix = one_only ? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name, *linkonce;
4712 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4713 name = targetm.strip_name_encoding (name);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce = one_only ? ".gnu.linkonce" : "";
4719 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4721 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4725 default_unique_section (decl, reloc);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file,
4736 const char *name, unsigned HOST_WIDE_INT size,
4739 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4740 && size > (unsigned int)ix86_section_threshold)
4741 fputs (".largecomm\t", file);
4743 fputs (COMMON_ASM_OP, file);
4744 assemble_name (file, name);
4745 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4746 size, align / BITS_PER_UNIT);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4755 const char *name, unsigned HOST_WIDE_INT size,
4758 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4759 && size > (unsigned int)ix86_section_threshold)
4760 switch_to_section (get_named_section (decl, ".lbss", 0));
4762 switch_to_section (bss_section);
4763 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl = decl;
4766 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file, name);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file, size ? size : 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4785 return TARGET_STACK_PROBE;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl, tree exp)
4795 tree type, decl_or_type;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl || !targetm.binds_local_p (decl)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY)
4815 decl_or_type = decl;
4816 type = TREE_TYPE (decl);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type = CALL_EXPR_FN (exp); /* pointer expression */
4822 type = TREE_TYPE (type); /* pointer type */
4823 type = TREE_TYPE (type); /* function type */
4824 decl_or_type = type;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4837 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4839 if (STACK_REG_P (a) || STACK_REG_P (b))
4841 if (!rtx_equal_p (a, b))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun->machine->callee_return_avx256_p
4850 && !cfun->machine->caller_return_avx256_p)
4853 else if (!rtx_equal_p (a, b))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun->machine->call_abi == MS_ABI
4861 && ix86_function_type_abi (type) == SYSV_ABI)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4873 if (ix86_function_regparm (type, NULL) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree *node, tree name,
4893 int flags ATTRIBUTE_UNUSED,
4896 if (TREE_CODE (*node) != FUNCTION_TYPE
4897 && TREE_CODE (*node) != METHOD_TYPE
4898 && TREE_CODE (*node) != FIELD_DECL
4899 && TREE_CODE (*node) != TYPE_DECL)
4901 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4903 *no_add_attrs = true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst = TREE_VALUE (args);
4923 if (TREE_CODE (cst) != INTEGER_CST)
4925 warning (OPT_Wattributes,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs = true;
4930 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4932 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4934 *no_add_attrs = true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node) != FUNCTION_TYPE
4944 && TREE_CODE (*node) != METHOD_TYPE)
4945 || ix86_function_type_abi (*node) != MS_ABI)
4946 warning (OPT_Wattributes, "%qE attribute ignored",
4948 *no_add_attrs = true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name))
5009 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5010 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* The transactional memory builtins are implicitly regparm or fastcall
5032 depending on the ABI. Override the generic do-nothing attribute that
5033 these builtins were declared with, and replace it with one of the two
5034 attributes that we expect elsewhere. */
5037 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5038 tree args ATTRIBUTE_UNUSED,
5039 int flags ATTRIBUTE_UNUSED,
5044 /* In no case do we want to add the placeholder attribute. */
5045 *no_add_attrs = true;
5047 /* The 64-bit ABI is unchanged for transactional memory. */
5051 /* ??? Is there a better way to validate 32-bit windows? We have
5052 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5053 if (CHECK_STACK_LIMIT > 0)
5054 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5057 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5058 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5060 decl_attributes (node, alt, flags);
5065 /* This function determines from TYPE the calling-convention. */
5068 ix86_get_callcvt (const_tree type)
5070 unsigned int ret = 0;
5075 return IX86_CALLCVT_CDECL;
5077 attrs = TYPE_ATTRIBUTES (type);
5078 if (attrs != NULL_TREE)
5080 if (lookup_attribute ("cdecl", attrs))
5081 ret |= IX86_CALLCVT_CDECL;
5082 else if (lookup_attribute ("stdcall", attrs))
5083 ret |= IX86_CALLCVT_STDCALL;
5084 else if (lookup_attribute ("fastcall", attrs))
5085 ret |= IX86_CALLCVT_FASTCALL;
5086 else if (lookup_attribute ("thiscall", attrs))
5087 ret |= IX86_CALLCVT_THISCALL;
5089 /* Regparam isn't allowed for thiscall and fastcall. */
5090 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5092 if (lookup_attribute ("regparm", attrs))
5093 ret |= IX86_CALLCVT_REGPARM;
5094 if (lookup_attribute ("sseregparm", attrs))
5095 ret |= IX86_CALLCVT_SSEREGPARM;
5098 if (IX86_BASE_CALLCVT(ret) != 0)
5102 is_stdarg = stdarg_p (type);
5103 if (TARGET_RTD && !is_stdarg)
5104 return IX86_CALLCVT_STDCALL | ret;
5108 || TREE_CODE (type) != METHOD_TYPE
5109 || ix86_function_type_abi (type) != MS_ABI)
5110 return IX86_CALLCVT_CDECL | ret;
5112 return IX86_CALLCVT_THISCALL;
5115 /* Return 0 if the attributes for two types are incompatible, 1 if they
5116 are compatible, and 2 if they are nearly compatible (which causes a
5117 warning to be generated). */
5120 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5122 unsigned int ccvt1, ccvt2;
5124 if (TREE_CODE (type1) != FUNCTION_TYPE
5125 && TREE_CODE (type1) != METHOD_TYPE)
5128 ccvt1 = ix86_get_callcvt (type1);
5129 ccvt2 = ix86_get_callcvt (type2);
5132 if (ix86_function_regparm (type1, NULL)
5133 != ix86_function_regparm (type2, NULL))
5139 /* Return the regparm value for a function with the indicated TYPE and DECL.
5140 DECL may be NULL when calling function indirectly
5141 or considering a libcall. */
5144 ix86_function_regparm (const_tree type, const_tree decl)
5151 return (ix86_function_type_abi (type) == SYSV_ABI
5152 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5153 ccvt = ix86_get_callcvt (type);
5154 regparm = ix86_regparm;
5156 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5158 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5161 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5165 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5167 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5170 /* Use register calling convention for local functions when possible. */
5172 && TREE_CODE (decl) == FUNCTION_DECL
5174 && !(profile_flag && !flag_fentry))
5176 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5177 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5178 if (i && i->local && i->can_change_signature)
5180 int local_regparm, globals = 0, regno;
5182 /* Make sure no regparm register is taken by a
5183 fixed register variable. */
5184 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5185 if (fixed_regs[local_regparm])
5188 /* We don't want to use regparm(3) for nested functions as
5189 these use a static chain pointer in the third argument. */
5190 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5193 /* In 32-bit mode save a register for the split stack. */
5194 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5197 /* Each fixed register usage increases register pressure,
5198 so less registers should be used for argument passing.
5199 This functionality can be overriden by an explicit
5201 for (regno = 0; regno <= DI_REG; regno++)
5202 if (fixed_regs[regno])
5206 = globals < local_regparm ? local_regparm - globals : 0;
5208 if (local_regparm > regparm)
5209 regparm = local_regparm;
5216 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5217 DFmode (2) arguments in SSE registers for a function with the
5218 indicated TYPE and DECL. DECL may be NULL when calling function
5219 indirectly or considering a libcall. Otherwise return 0. */
5222 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5224 gcc_assert (!TARGET_64BIT);
5226 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5227 by the sseregparm attribute. */
5228 if (TARGET_SSEREGPARM
5229 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5236 error ("calling %qD with attribute sseregparm without "
5237 "SSE/SSE2 enabled", decl);
5239 error ("calling %qT with attribute sseregparm without "
5240 "SSE/SSE2 enabled", type);
5248 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5249 (and DFmode for SSE2) arguments in SSE registers. */
5250 if (decl && TARGET_SSE_MATH && optimize
5251 && !(profile_flag && !flag_fentry))
5253 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5254 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5255 if (i && i->local && i->can_change_signature)
5256 return TARGET_SSE2 ? 2 : 1;
5262 /* Return true if EAX is live at the start of the function. Used by
5263 ix86_expand_prologue to determine if we need special help before
5264 calling allocate_stack_worker. */
5267 ix86_eax_live_at_start_p (void)
5269 /* Cheat. Don't bother working forward from ix86_function_regparm
5270 to the function type to whether an actual argument is located in
5271 eax. Instead just look at cfg info, which is still close enough
5272 to correct at this point. This gives false positives for broken
5273 functions that might use uninitialized data that happens to be
5274 allocated in eax, but who cares? */
5275 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5279 ix86_keep_aggregate_return_pointer (tree fntype)
5285 attr = lookup_attribute ("callee_pop_aggregate_return",
5286 TYPE_ATTRIBUTES (fntype));
5288 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5290 /* For 32-bit MS-ABI the default is to keep aggregate
5292 if (ix86_function_type_abi (fntype) == MS_ABI)
5295 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5298 /* Value is the number of bytes of arguments automatically
5299 popped when returning from a subroutine call.
5300 FUNDECL is the declaration node of the function (as a tree),
5301 FUNTYPE is the data type of the function (as a tree),
5302 or for a library call it is an identifier node for the subroutine name.
5303 SIZE is the number of bytes of arguments passed on the stack.
5305 On the 80386, the RTD insn may be used to pop them if the number
5306 of args is fixed, but if the number is variable then the caller
5307 must pop them all. RTD can't be used for library calls now
5308 because the library is compiled with the Unix compiler.
5309 Use of RTD is a selectable option, since it is incompatible with
5310 standard Unix calling sequences. If the option is not selected,
5311 the caller must always pop the args.
5313 The attribute stdcall is equivalent to RTD on a per module basis. */
5316 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5320 /* None of the 64-bit ABIs pop arguments. */
5324 ccvt = ix86_get_callcvt (funtype);
5326 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5327 | IX86_CALLCVT_THISCALL)) != 0
5328 && ! stdarg_p (funtype))
5331 /* Lose any fake structure return argument if it is passed on the stack. */
5332 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5333 && !ix86_keep_aggregate_return_pointer (funtype))
5335 int nregs = ix86_function_regparm (funtype, fundecl);
5337 return GET_MODE_SIZE (Pmode);
5343 /* Argument support functions. */
5345 /* Return true when register may be used to pass function parameters. */
5347 ix86_function_arg_regno_p (int regno)
5350 const int *parm_regs;
5355 return (regno < REGPARM_MAX
5356 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5358 return (regno < REGPARM_MAX
5359 || (TARGET_MMX && MMX_REGNO_P (regno)
5360 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5361 || (TARGET_SSE && SSE_REGNO_P (regno)
5362 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5367 if (SSE_REGNO_P (regno) && TARGET_SSE)
5372 if (TARGET_SSE && SSE_REGNO_P (regno)
5373 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5377 /* TODO: The function should depend on current function ABI but
5378 builtins.c would need updating then. Therefore we use the
5381 /* RAX is used as hidden argument to va_arg functions. */
5382 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5385 if (ix86_abi == MS_ABI)
5386 parm_regs = x86_64_ms_abi_int_parameter_registers;
5388 parm_regs = x86_64_int_parameter_registers;
5389 for (i = 0; i < (ix86_abi == MS_ABI
5390 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5391 if (regno == parm_regs[i])
5396 /* Return if we do not know how to pass TYPE solely in registers. */
5399 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5401 if (must_pass_in_stack_var_size_or_pad (mode, type))
5404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5405 The layout_type routine is crafty and tries to trick us into passing
5406 currently unsupported vector types on the stack by using TImode. */
5407 return (!TARGET_64BIT && mode == TImode
5408 && type && TREE_CODE (type) != VECTOR_TYPE);
5411 /* It returns the size, in bytes, of the area reserved for arguments passed
5412 in registers for the function represented by fndecl dependent to the used
5415 ix86_reg_parm_stack_space (const_tree fndecl)
5417 enum calling_abi call_abi = SYSV_ABI;
5418 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5419 call_abi = ix86_function_abi (fndecl);
5421 call_abi = ix86_function_type_abi (fndecl);
5422 if (TARGET_64BIT && call_abi == MS_ABI)
5427 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5430 ix86_function_type_abi (const_tree fntype)
5432 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5434 enum calling_abi abi = ix86_abi;
5435 if (abi == SYSV_ABI)
5437 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5440 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5448 ix86_function_ms_hook_prologue (const_tree fn)
5450 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5452 if (decl_function_context (fn) != NULL_TREE)
5453 error_at (DECL_SOURCE_LOCATION (fn),
5454 "ms_hook_prologue is not compatible with nested function");
5461 static enum calling_abi
5462 ix86_function_abi (const_tree fndecl)
5466 return ix86_function_type_abi (TREE_TYPE (fndecl));
5469 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5472 ix86_cfun_abi (void)
5476 return cfun->machine->call_abi;
5479 /* Write the extra assembler code needed to declare a function properly. */
5482 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5485 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5489 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5490 unsigned int filler_cc = 0xcccccccc;
5492 for (i = 0; i < filler_count; i += 4)
5493 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5496 #ifdef SUBTARGET_ASM_UNWIND_INIT
5497 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5500 ASM_OUTPUT_LABEL (asm_out_file, fname);
5502 /* Output magic byte marker, if hot-patch attribute is set. */
5507 /* leaq [%rsp + 0], %rsp */
5508 asm_fprintf (asm_out_file, ASM_BYTE
5509 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5513 /* movl.s %edi, %edi
5515 movl.s %esp, %ebp */
5516 asm_fprintf (asm_out_file, ASM_BYTE
5517 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5523 extern void init_regs (void);
5525 /* Implementation of call abi switching target hook. Specific to FNDECL
5526 the specific call register sets are set. See also
5527 ix86_conditional_register_usage for more details. */
5529 ix86_call_abi_override (const_tree fndecl)
5531 if (fndecl == NULL_TREE)
5532 cfun->machine->call_abi = ix86_abi;
5534 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5537 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5538 expensive re-initialization of init_regs each time we switch function context
5539 since this is needed only during RTL expansion. */
5541 ix86_maybe_switch_abi (void)
5544 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is 0. */
5553 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5554 tree fntype, /* tree ptr for function decl */
5555 rtx libname, /* SYMBOL_REF of library name or 0 */
5559 struct cgraph_local_info *i;
5562 memset (cum, 0, sizeof (*cum));
5564 /* Initialize for the current callee. */
5567 cfun->machine->callee_pass_avx256_p = false;
5568 cfun->machine->callee_return_avx256_p = false;
5573 i = cgraph_local_info (fndecl);
5574 cum->call_abi = ix86_function_abi (fndecl);
5575 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5580 cum->call_abi = ix86_function_type_abi (fntype);
5582 fnret_type = TREE_TYPE (fntype);
5587 if (TARGET_VZEROUPPER && fnret_type)
5589 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5591 if (function_pass_avx256_p (fnret_value))
5593 /* The return value of this function uses 256bit AVX modes. */
5595 cfun->machine->callee_return_avx256_p = true;
5597 cfun->machine->caller_return_avx256_p = true;
5601 cum->caller = caller;
5603 /* Set up the number of registers to use for passing arguments. */
5605 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5606 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5607 "or subtarget optimization implying it");
5608 cum->nregs = ix86_regparm;
5611 cum->nregs = (cum->call_abi == SYSV_ABI
5612 ? X86_64_REGPARM_MAX
5613 : X86_64_MS_REGPARM_MAX);
5617 cum->sse_nregs = SSE_REGPARM_MAX;
5620 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5621 ? X86_64_SSE_REGPARM_MAX
5622 : X86_64_MS_SSE_REGPARM_MAX);
5626 cum->mmx_nregs = MMX_REGPARM_MAX;
5627 cum->warn_avx = true;
5628 cum->warn_sse = true;
5629 cum->warn_mmx = true;
5631 /* Because type might mismatch in between caller and callee, we need to
5632 use actual type of function for local calls.
5633 FIXME: cgraph_analyze can be told to actually record if function uses
5634 va_start so for local functions maybe_vaarg can be made aggressive
5636 FIXME: once typesytem is fixed, we won't need this code anymore. */
5637 if (i && i->local && i->can_change_signature)
5638 fntype = TREE_TYPE (fndecl);
5639 cum->maybe_vaarg = (fntype
5640 ? (!prototype_p (fntype) || stdarg_p (fntype))
5645 /* If there are variable arguments, then we won't pass anything
5646 in registers in 32-bit mode. */
5647 if (stdarg_p (fntype))
5658 /* Use ecx and edx registers if function has fastcall attribute,
5659 else look for regparm information. */
5662 unsigned int ccvt = ix86_get_callcvt (fntype);
5663 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5666 cum->fastcall = 1; /* Same first register as in fastcall. */
5668 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5674 cum->nregs = ix86_function_regparm (fntype, fndecl);
5677 /* Set up the number of SSE registers used for passing SFmode
5678 and DFmode arguments. Warn for mismatching ABI. */
5679 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5683 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5684 But in the case of vector types, it is some vector mode.
5686 When we have only some of our vector isa extensions enabled, then there
5687 are some modes for which vector_mode_supported_p is false. For these
5688 modes, the generic vector support in gcc will choose some non-vector mode
5689 in order to implement the type. By computing the natural mode, we'll
5690 select the proper ABI location for the operand and not depend on whatever
5691 the middle-end decides to do with these vector types.
5693 The midde-end can't deal with the vector types > 16 bytes. In this
5694 case, we return the original mode and warn ABI change if CUM isn't
5697 static enum machine_mode
5698 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5700 enum machine_mode mode = TYPE_MODE (type);
5702 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5704 HOST_WIDE_INT size = int_size_in_bytes (type);
5705 if ((size == 8 || size == 16 || size == 32)
5706 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5707 && TYPE_VECTOR_SUBPARTS (type) > 1)
5709 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5711 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5712 mode = MIN_MODE_VECTOR_FLOAT;
5714 mode = MIN_MODE_VECTOR_INT;
5716 /* Get the mode which has this inner mode and number of units. */
5717 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5718 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5719 && GET_MODE_INNER (mode) == innermode)
5721 if (size == 32 && !TARGET_AVX)
5723 static bool warnedavx;
5730 warning (0, "AVX vector argument without AVX "
5731 "enabled changes the ABI");
5733 return TYPE_MODE (type);
5746 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5747 this may not agree with the mode that the type system has chosen for the
5748 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5749 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5752 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5757 if (orig_mode != BLKmode)
5758 tmp = gen_rtx_REG (orig_mode, regno);
5761 tmp = gen_rtx_REG (mode, regno);
5762 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5763 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5769 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5770 of this code is to classify each 8bytes of incoming argument by the register
5771 class and assign registers accordingly. */
5773 /* Return the union class of CLASS1 and CLASS2.
5774 See the x86-64 PS ABI for details. */
5776 static enum x86_64_reg_class
5777 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5779 /* Rule #1: If both classes are equal, this is the resulting class. */
5780 if (class1 == class2)
5783 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5785 if (class1 == X86_64_NO_CLASS)
5787 if (class2 == X86_64_NO_CLASS)
5790 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5791 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5792 return X86_64_MEMORY_CLASS;
5794 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5795 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5796 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5797 return X86_64_INTEGERSI_CLASS;
5798 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5799 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5800 return X86_64_INTEGER_CLASS;
5802 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5804 if (class1 == X86_64_X87_CLASS
5805 || class1 == X86_64_X87UP_CLASS
5806 || class1 == X86_64_COMPLEX_X87_CLASS
5807 || class2 == X86_64_X87_CLASS
5808 || class2 == X86_64_X87UP_CLASS
5809 || class2 == X86_64_COMPLEX_X87_CLASS)
5810 return X86_64_MEMORY_CLASS;
5812 /* Rule #6: Otherwise class SSE is used. */
5813 return X86_64_SSE_CLASS;
5816 /* Classify the argument of type TYPE and mode MODE.
5817 CLASSES will be filled by the register class used to pass each word
5818 of the operand. The number of words is returned. In case the parameter
5819 should be passed in memory, 0 is returned. As a special case for zero
5820 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5822 BIT_OFFSET is used internally for handling records and specifies offset
5823 of the offset in bits modulo 256 to avoid overflow cases.
5825 See the x86-64 PS ABI for details.
5829 classify_argument (enum machine_mode mode, const_tree type,
5830 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5832 HOST_WIDE_INT bytes =
5833 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5834 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5836 /* Variable sized entities are always passed/returned in memory. */
5840 if (mode != VOIDmode
5841 && targetm.calls.must_pass_in_stack (mode, type))
5844 if (type && AGGREGATE_TYPE_P (type))
5848 enum x86_64_reg_class subclasses[MAX_CLASSES];
5850 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5854 for (i = 0; i < words; i++)
5855 classes[i] = X86_64_NO_CLASS;
5857 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5858 signalize memory class, so handle it as special case. */
5861 classes[0] = X86_64_NO_CLASS;
5865 /* Classify each field of record and merge classes. */
5866 switch (TREE_CODE (type))
5869 /* And now merge the fields of structure. */
5870 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5872 if (TREE_CODE (field) == FIELD_DECL)
5876 if (TREE_TYPE (field) == error_mark_node)
5879 /* Bitfields are always classified as integer. Handle them
5880 early, since later code would consider them to be
5881 misaligned integers. */
5882 if (DECL_BIT_FIELD (field))
5884 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5885 i < ((int_bit_position (field) + (bit_offset % 64))
5886 + tree_low_cst (DECL_SIZE (field), 0)
5889 merge_classes (X86_64_INTEGER_CLASS,
5896 type = TREE_TYPE (field);
5898 /* Flexible array member is ignored. */
5899 if (TYPE_MODE (type) == BLKmode
5900 && TREE_CODE (type) == ARRAY_TYPE
5901 && TYPE_SIZE (type) == NULL_TREE
5902 && TYPE_DOMAIN (type) != NULL_TREE
5903 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5908 if (!warned && warn_psabi)
5911 inform (input_location,
5912 "the ABI of passing struct with"
5913 " a flexible array member has"
5914 " changed in GCC 4.4");
5918 num = classify_argument (TYPE_MODE (type), type,
5920 (int_bit_position (field)
5921 + bit_offset) % 256);
5924 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5925 for (i = 0; i < num && (i + pos) < words; i++)
5927 merge_classes (subclasses[i], classes[i + pos]);
5934 /* Arrays are handled as small records. */
5937 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5938 TREE_TYPE (type), subclasses, bit_offset);
5942 /* The partial classes are now full classes. */
5943 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5944 subclasses[0] = X86_64_SSE_CLASS;
5945 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5946 && !((bit_offset % 64) == 0 && bytes == 4))
5947 subclasses[0] = X86_64_INTEGER_CLASS;
5949 for (i = 0; i < words; i++)
5950 classes[i] = subclasses[i % num];
5955 case QUAL_UNION_TYPE:
5956 /* Unions are similar to RECORD_TYPE but offset is always 0.
5958 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5960 if (TREE_CODE (field) == FIELD_DECL)
5964 if (TREE_TYPE (field) == error_mark_node)
5967 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5968 TREE_TYPE (field), subclasses,
5972 for (i = 0; i < num; i++)
5973 classes[i] = merge_classes (subclasses[i], classes[i]);
5984 /* When size > 16 bytes, if the first one isn't
5985 X86_64_SSE_CLASS or any other ones aren't
5986 X86_64_SSEUP_CLASS, everything should be passed in
5988 if (classes[0] != X86_64_SSE_CLASS)
5991 for (i = 1; i < words; i++)
5992 if (classes[i] != X86_64_SSEUP_CLASS)
5996 /* Final merger cleanup. */
5997 for (i = 0; i < words; i++)
5999 /* If one class is MEMORY, everything should be passed in
6001 if (classes[i] == X86_64_MEMORY_CLASS)
6004 /* The X86_64_SSEUP_CLASS should be always preceded by
6005 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6006 if (classes[i] == X86_64_SSEUP_CLASS
6007 && classes[i - 1] != X86_64_SSE_CLASS
6008 && classes[i - 1] != X86_64_SSEUP_CLASS)
6010 /* The first one should never be X86_64_SSEUP_CLASS. */
6011 gcc_assert (i != 0);
6012 classes[i] = X86_64_SSE_CLASS;
6015 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6016 everything should be passed in memory. */
6017 if (classes[i] == X86_64_X87UP_CLASS
6018 && (classes[i - 1] != X86_64_X87_CLASS))
6022 /* The first one should never be X86_64_X87UP_CLASS. */
6023 gcc_assert (i != 0);
6024 if (!warned && warn_psabi)
6027 inform (input_location,
6028 "the ABI of passing union with long double"
6029 " has changed in GCC 4.4");
6037 /* Compute alignment needed. We align all types to natural boundaries with
6038 exception of XFmode that is aligned to 64bits. */
6039 if (mode != VOIDmode && mode != BLKmode)
6041 int mode_alignment = GET_MODE_BITSIZE (mode);
6044 mode_alignment = 128;
6045 else if (mode == XCmode)
6046 mode_alignment = 256;
6047 if (COMPLEX_MODE_P (mode))
6048 mode_alignment /= 2;
6049 /* Misaligned fields are always returned in memory. */
6050 if (bit_offset % mode_alignment)
6054 /* for V1xx modes, just use the base mode */
6055 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6056 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6057 mode = GET_MODE_INNER (mode);
6059 /* Classification of atomic types. */
6064 classes[0] = X86_64_SSE_CLASS;
6067 classes[0] = X86_64_SSE_CLASS;
6068 classes[1] = X86_64_SSEUP_CLASS;
6078 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6082 classes[0] = X86_64_INTEGERSI_CLASS;
6085 else if (size <= 64)
6087 classes[0] = X86_64_INTEGER_CLASS;
6090 else if (size <= 64+32)
6092 classes[0] = X86_64_INTEGER_CLASS;
6093 classes[1] = X86_64_INTEGERSI_CLASS;
6096 else if (size <= 64+64)
6098 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6106 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6110 /* OImode shouldn't be used directly. */
6115 if (!(bit_offset % 64))
6116 classes[0] = X86_64_SSESF_CLASS;
6118 classes[0] = X86_64_SSE_CLASS;
6121 classes[0] = X86_64_SSEDF_CLASS;
6124 classes[0] = X86_64_X87_CLASS;
6125 classes[1] = X86_64_X87UP_CLASS;
6128 classes[0] = X86_64_SSE_CLASS;
6129 classes[1] = X86_64_SSEUP_CLASS;
6132 classes[0] = X86_64_SSE_CLASS;
6133 if (!(bit_offset % 64))
6139 if (!warned && warn_psabi)
6142 inform (input_location,
6143 "the ABI of passing structure with complex float"
6144 " member has changed in GCC 4.4");
6146 classes[1] = X86_64_SSESF_CLASS;
6150 classes[0] = X86_64_SSEDF_CLASS;
6151 classes[1] = X86_64_SSEDF_CLASS;
6154 classes[0] = X86_64_COMPLEX_X87_CLASS;
6157 /* This modes is larger than 16 bytes. */
6165 classes[0] = X86_64_SSE_CLASS;
6166 classes[1] = X86_64_SSEUP_CLASS;
6167 classes[2] = X86_64_SSEUP_CLASS;
6168 classes[3] = X86_64_SSEUP_CLASS;
6176 classes[0] = X86_64_SSE_CLASS;
6177 classes[1] = X86_64_SSEUP_CLASS;
6185 classes[0] = X86_64_SSE_CLASS;
6191 gcc_assert (VECTOR_MODE_P (mode));
6196 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6198 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6199 classes[0] = X86_64_INTEGERSI_CLASS;
6201 classes[0] = X86_64_INTEGER_CLASS;
6202 classes[1] = X86_64_INTEGER_CLASS;
6203 return 1 + (bytes > 8);
6207 /* Examine the argument and return set number of register required in each
6208 class. Return 0 iff parameter should be passed in memory. */
6210 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6211 int *int_nregs, int *sse_nregs)
6213 enum x86_64_reg_class regclass[MAX_CLASSES];
6214 int n = classify_argument (mode, type, regclass, 0);
6220 for (n--; n >= 0; n--)
6221 switch (regclass[n])
6223 case X86_64_INTEGER_CLASS:
6224 case X86_64_INTEGERSI_CLASS:
6227 case X86_64_SSE_CLASS:
6228 case X86_64_SSESF_CLASS:
6229 case X86_64_SSEDF_CLASS:
6232 case X86_64_NO_CLASS:
6233 case X86_64_SSEUP_CLASS:
6235 case X86_64_X87_CLASS:
6236 case X86_64_X87UP_CLASS:
6240 case X86_64_COMPLEX_X87_CLASS:
6241 return in_return ? 2 : 0;
6242 case X86_64_MEMORY_CLASS:
6248 /* Construct container for the argument used by GCC interface. See
6249 FUNCTION_ARG for the detailed description. */
6252 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6253 const_tree type, int in_return, int nintregs, int nsseregs,
6254 const int *intreg, int sse_regno)
6256 /* The following variables hold the static issued_error state. */
6257 static bool issued_sse_arg_error;
6258 static bool issued_sse_ret_error;
6259 static bool issued_x87_ret_error;
6261 enum machine_mode tmpmode;
6263 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6264 enum x86_64_reg_class regclass[MAX_CLASSES];
6268 int needed_sseregs, needed_intregs;
6269 rtx exp[MAX_CLASSES];
6272 n = classify_argument (mode, type, regclass, 0);
6275 if (!examine_argument (mode, type, in_return, &needed_intregs,
6278 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6281 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6282 some less clueful developer tries to use floating-point anyway. */
6283 if (needed_sseregs && !TARGET_SSE)
6287 if (!issued_sse_ret_error)
6289 error ("SSE register return with SSE disabled");
6290 issued_sse_ret_error = true;
6293 else if (!issued_sse_arg_error)
6295 error ("SSE register argument with SSE disabled");
6296 issued_sse_arg_error = true;
6301 /* Likewise, error if the ABI requires us to return values in the
6302 x87 registers and the user specified -mno-80387. */
6303 if (!TARGET_80387 && in_return)
6304 for (i = 0; i < n; i++)
6305 if (regclass[i] == X86_64_X87_CLASS
6306 || regclass[i] == X86_64_X87UP_CLASS
6307 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6309 if (!issued_x87_ret_error)
6311 error ("x87 register return with x87 disabled");
6312 issued_x87_ret_error = true;
6317 /* First construct simple cases. Avoid SCmode, since we want to use
6318 single register to pass this type. */
6319 if (n == 1 && mode != SCmode)
6320 switch (regclass[0])
6322 case X86_64_INTEGER_CLASS:
6323 case X86_64_INTEGERSI_CLASS:
6324 return gen_rtx_REG (mode, intreg[0]);
6325 case X86_64_SSE_CLASS:
6326 case X86_64_SSESF_CLASS:
6327 case X86_64_SSEDF_CLASS:
6328 if (mode != BLKmode)
6329 return gen_reg_or_parallel (mode, orig_mode,
6330 SSE_REGNO (sse_regno));
6332 case X86_64_X87_CLASS:
6333 case X86_64_COMPLEX_X87_CLASS:
6334 return gen_rtx_REG (mode, FIRST_STACK_REG);
6335 case X86_64_NO_CLASS:
6336 /* Zero sized array, struct or class. */
6341 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6342 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6343 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6345 && regclass[0] == X86_64_SSE_CLASS
6346 && regclass[1] == X86_64_SSEUP_CLASS
6347 && regclass[2] == X86_64_SSEUP_CLASS
6348 && regclass[3] == X86_64_SSEUP_CLASS
6350 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6353 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6354 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6355 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6356 && regclass[1] == X86_64_INTEGER_CLASS
6357 && (mode == CDImode || mode == TImode || mode == TFmode)
6358 && intreg[0] + 1 == intreg[1])
6359 return gen_rtx_REG (mode, intreg[0]);
6361 /* Otherwise figure out the entries of the PARALLEL. */
6362 for (i = 0; i < n; i++)
6366 switch (regclass[i])
6368 case X86_64_NO_CLASS:
6370 case X86_64_INTEGER_CLASS:
6371 case X86_64_INTEGERSI_CLASS:
6372 /* Merge TImodes on aligned occasions here too. */
6373 if (i * 8 + 8 > bytes)
6374 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6375 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6379 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6380 if (tmpmode == BLKmode)
6382 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6383 gen_rtx_REG (tmpmode, *intreg),
6387 case X86_64_SSESF_CLASS:
6388 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6389 gen_rtx_REG (SFmode,
6390 SSE_REGNO (sse_regno)),
6394 case X86_64_SSEDF_CLASS:
6395 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6396 gen_rtx_REG (DFmode,
6397 SSE_REGNO (sse_regno)),
6401 case X86_64_SSE_CLASS:
6409 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6419 && regclass[1] == X86_64_SSEUP_CLASS
6420 && regclass[2] == X86_64_SSEUP_CLASS
6421 && regclass[3] == X86_64_SSEUP_CLASS);
6428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6429 gen_rtx_REG (tmpmode,
6430 SSE_REGNO (sse_regno)),
6439 /* Empty aligned struct, union or class. */
6443 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6444 for (i = 0; i < nexps; i++)
6445 XVECEXP (ret, 0, i) = exp [i];
6449 /* Update the data in CUM to advance over an argument of mode MODE
6450 and data type TYPE. (TYPE is null for libcalls where that information
6451 may not be available.) */
6454 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6455 const_tree type, HOST_WIDE_INT bytes,
6456 HOST_WIDE_INT words)
6472 cum->words += words;
6473 cum->nregs -= words;
6474 cum->regno += words;
6476 if (cum->nregs <= 0)
6484 /* OImode shouldn't be used directly. */
6488 if (cum->float_in_sse < 2)
6491 if (cum->float_in_sse < 1)
6508 if (!type || !AGGREGATE_TYPE_P (type))
6510 cum->sse_words += words;
6511 cum->sse_nregs -= 1;
6512 cum->sse_regno += 1;
6513 if (cum->sse_nregs <= 0)
6527 if (!type || !AGGREGATE_TYPE_P (type))
6529 cum->mmx_words += words;
6530 cum->mmx_nregs -= 1;
6531 cum->mmx_regno += 1;
6532 if (cum->mmx_nregs <= 0)
6543 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6544 const_tree type, HOST_WIDE_INT words, bool named)
6546 int int_nregs, sse_nregs;
6548 /* Unnamed 256bit vector mode parameters are passed on stack. */
6549 if (!named && VALID_AVX256_REG_MODE (mode))
6552 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6553 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6555 cum->nregs -= int_nregs;
6556 cum->sse_nregs -= sse_nregs;
6557 cum->regno += int_nregs;
6558 cum->sse_regno += sse_nregs;
6562 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6563 cum->words = (cum->words + align - 1) & ~(align - 1);
6564 cum->words += words;
6569 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6570 HOST_WIDE_INT words)
6572 /* Otherwise, this should be passed indirect. */
6573 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6575 cum->words += words;
6583 /* Update the data in CUM to advance over an argument of mode MODE and
6584 data type TYPE. (TYPE is null for libcalls where that information
6585 may not be available.) */
6588 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6589 const_tree type, bool named)
6591 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6592 HOST_WIDE_INT bytes, words;
6594 if (mode == BLKmode)
6595 bytes = int_size_in_bytes (type);
6597 bytes = GET_MODE_SIZE (mode);
6598 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6601 mode = type_natural_mode (type, NULL);
6603 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6604 function_arg_advance_ms_64 (cum, bytes, words);
6605 else if (TARGET_64BIT)
6606 function_arg_advance_64 (cum, mode, type, words, named);
6608 function_arg_advance_32 (cum, mode, type, bytes, words);
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis). */
6625 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6626 enum machine_mode orig_mode, const_tree type,
6627 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6629 static bool warnedsse, warnedmmx;
6631 /* Avoid the AL settings for the Unix64 ABI. */
6632 if (mode == VOIDmode)
6648 if (words <= cum->nregs)
6650 int regno = cum->regno;
6652 /* Fastcall allocates the first two DWORD (SImode) or
6653 smaller arguments to ECX and EDX if it isn't an
6659 || (type && AGGREGATE_TYPE_P (type)))
6662 /* ECX not EAX is the first allocated register. */
6663 if (regno == AX_REG)
6666 return gen_rtx_REG (mode, regno);
6671 if (cum->float_in_sse < 2)
6674 if (cum->float_in_sse < 1)
6678 /* In 32bit, we pass TImode in xmm registers. */
6685 if (!type || !AGGREGATE_TYPE_P (type))
6687 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6690 warning (0, "SSE vector argument without SSE enabled "
6694 return gen_reg_or_parallel (mode, orig_mode,
6695 cum->sse_regno + FIRST_SSE_REG);
6700 /* OImode shouldn't be used directly. */
6709 if (!type || !AGGREGATE_TYPE_P (type))
6712 return gen_reg_or_parallel (mode, orig_mode,
6713 cum->sse_regno + FIRST_SSE_REG);
6723 if (!type || !AGGREGATE_TYPE_P (type))
6725 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6728 warning (0, "MMX vector argument without MMX enabled "
6732 return gen_reg_or_parallel (mode, orig_mode,
6733 cum->mmx_regno + FIRST_MMX_REG);
6742 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6743 enum machine_mode orig_mode, const_tree type, bool named)
6745 /* Handle a hidden AL argument containing number of registers
6746 for varargs x86-64 functions. */
6747 if (mode == VOIDmode)
6748 return GEN_INT (cum->maybe_vaarg
6749 ? (cum->sse_nregs < 0
6750 ? X86_64_SSE_REGPARM_MAX
6765 /* Unnamed 256bit vector mode parameters are passed on stack. */
6771 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6773 &x86_64_int_parameter_registers [cum->regno],
6778 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6779 enum machine_mode orig_mode, bool named,
6780 HOST_WIDE_INT bytes)
6784 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6785 We use value of -2 to specify that current function call is MSABI. */
6786 if (mode == VOIDmode)
6787 return GEN_INT (-2);
6789 /* If we've run out of registers, it goes on the stack. */
6790 if (cum->nregs == 0)
6793 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6795 /* Only floating point modes are passed in anything but integer regs. */
6796 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6799 regno = cum->regno + FIRST_SSE_REG;
6804 /* Unnamed floating parameters are passed in both the
6805 SSE and integer registers. */
6806 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6807 t2 = gen_rtx_REG (mode, regno);
6808 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6809 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6810 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6813 /* Handle aggregated types passed in register. */
6814 if (orig_mode == BLKmode)
6816 if (bytes > 0 && bytes <= 8)
6817 mode = (bytes > 4 ? DImode : SImode);
6818 if (mode == BLKmode)
6822 return gen_reg_or_parallel (mode, orig_mode, regno);
6825 /* Return where to put the arguments to a function.
6826 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6828 MODE is the argument's machine mode. TYPE is the data type of the
6829 argument. It is null for libcalls where that information may not be
6830 available. CUM gives information about the preceding args and about
6831 the function being called. NAMED is nonzero if this argument is a
6832 named parameter (otherwise it is an extra parameter matching an
6836 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6837 const_tree type, bool named)
6839 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6840 enum machine_mode mode = omode;
6841 HOST_WIDE_INT bytes, words;
6844 if (mode == BLKmode)
6845 bytes = int_size_in_bytes (type);
6847 bytes = GET_MODE_SIZE (mode);
6848 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6850 /* To simplify the code below, represent vector types with a vector mode
6851 even if MMX/SSE are not active. */
6852 if (type && TREE_CODE (type) == VECTOR_TYPE)
6853 mode = type_natural_mode (type, cum);
6855 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6856 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6857 else if (TARGET_64BIT)
6858 arg = function_arg_64 (cum, mode, omode, type, named);
6860 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6862 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6864 /* This argument uses 256bit AVX modes. */
6866 cfun->machine->callee_pass_avx256_p = true;
6868 cfun->machine->caller_pass_avx256_p = true;
6874 /* A C expression that indicates when an argument must be passed by
6875 reference. If nonzero for an argument, a copy of that argument is
6876 made in memory and a pointer to the argument is passed instead of
6877 the argument itself. The pointer is passed in whatever way is
6878 appropriate for passing a pointer to that type. */
6881 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6882 enum machine_mode mode ATTRIBUTE_UNUSED,
6883 const_tree type, bool named ATTRIBUTE_UNUSED)
6885 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6887 /* See Windows x64 Software Convention. */
6888 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6890 int msize = (int) GET_MODE_SIZE (mode);
6893 /* Arrays are passed by reference. */
6894 if (TREE_CODE (type) == ARRAY_TYPE)
6897 if (AGGREGATE_TYPE_P (type))
6899 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6900 are passed by reference. */
6901 msize = int_size_in_bytes (type);
6905 /* __m128 is passed by reference. */
6907 case 1: case 2: case 4: case 8:
6913 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6919 /* Return true when TYPE should be 128bit aligned for 32bit argument
6920 passing ABI. XXX: This function is obsolete and is only used for
6921 checking psABI compatibility with previous versions of GCC. */
6924 ix86_compat_aligned_value_p (const_tree type)
6926 enum machine_mode mode = TYPE_MODE (type);
6927 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6931 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6933 if (TYPE_ALIGN (type) < 128)
6936 if (AGGREGATE_TYPE_P (type))
6938 /* Walk the aggregates recursively. */
6939 switch (TREE_CODE (type))
6943 case QUAL_UNION_TYPE:
6947 /* Walk all the structure fields. */
6948 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6950 if (TREE_CODE (field) == FIELD_DECL
6951 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6958 /* Just for use if some languages passes arrays by value. */
6959 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6970 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6971 XXX: This function is obsolete and is only used for checking psABI
6972 compatibility with previous versions of GCC. */
6975 ix86_compat_function_arg_boundary (enum machine_mode mode,
6976 const_tree type, unsigned int align)
6978 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6979 natural boundaries. */
6980 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6982 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6983 make an exception for SSE modes since these require 128bit
6986 The handling here differs from field_alignment. ICC aligns MMX
6987 arguments to 4 byte boundaries, while structure fields are aligned
6988 to 8 byte boundaries. */
6991 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6992 align = PARM_BOUNDARY;
6996 if (!ix86_compat_aligned_value_p (type))
6997 align = PARM_BOUNDARY;
7000 if (align > BIGGEST_ALIGNMENT)
7001 align = BIGGEST_ALIGNMENT;
7005 /* Return true when TYPE should be 128bit aligned for 32bit argument
7009 ix86_contains_aligned_value_p (const_tree type)
7011 enum machine_mode mode = TYPE_MODE (type);
7013 if (mode == XFmode || mode == XCmode)
7016 if (TYPE_ALIGN (type) < 128)
7019 if (AGGREGATE_TYPE_P (type))
7021 /* Walk the aggregates recursively. */
7022 switch (TREE_CODE (type))
7026 case QUAL_UNION_TYPE:
7030 /* Walk all the structure fields. */
7031 for (field = TYPE_FIELDS (type);
7033 field = DECL_CHAIN (field))
7035 if (TREE_CODE (field) == FIELD_DECL
7036 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7043 /* Just for use if some languages passes arrays by value. */
7044 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7053 return TYPE_ALIGN (type) >= 128;
7058 /* Gives the alignment boundary, in bits, of an argument with the
7059 specified mode and type. */
7062 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7067 /* Since the main variant type is used for call, we convert it to
7068 the main variant type. */
7069 type = TYPE_MAIN_VARIANT (type);
7070 align = TYPE_ALIGN (type);
7073 align = GET_MODE_ALIGNMENT (mode);
7074 if (align < PARM_BOUNDARY)
7075 align = PARM_BOUNDARY;
7079 unsigned int saved_align = align;
7083 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7086 if (mode == XFmode || mode == XCmode)
7087 align = PARM_BOUNDARY;
7089 else if (!ix86_contains_aligned_value_p (type))
7090 align = PARM_BOUNDARY;
7093 align = PARM_BOUNDARY;
7098 && align != ix86_compat_function_arg_boundary (mode, type,
7102 inform (input_location,
7103 "The ABI for passing parameters with %d-byte"
7104 " alignment has changed in GCC 4.6",
7105 align / BITS_PER_UNIT);
7112 /* Return true if N is a possible register number of function value. */
7115 ix86_function_value_regno_p (const unsigned int regno)
7122 case FIRST_FLOAT_REG:
7123 /* TODO: The function should depend on current function ABI but
7124 builtins.c would need updating then. Therefore we use the
7126 if (TARGET_64BIT && ix86_abi == MS_ABI)
7128 return TARGET_FLOAT_RETURNS_IN_80387;
7134 if (TARGET_MACHO || TARGET_64BIT)
7142 /* Define how to find the value returned by a function.
7143 VALTYPE is the data type of the value (as a tree).
7144 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7145 otherwise, FUNC is 0. */
7148 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7149 const_tree fntype, const_tree fn)
7153 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7154 we normally prevent this case when mmx is not available. However
7155 some ABIs may require the result to be returned like DImode. */
7156 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7157 regno = FIRST_MMX_REG;
7159 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7160 we prevent this case when sse is not available. However some ABIs
7161 may require the result to be returned like integer TImode. */
7162 else if (mode == TImode
7163 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7164 regno = FIRST_SSE_REG;
7166 /* 32-byte vector modes in %ymm0. */
7167 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7168 regno = FIRST_SSE_REG;
7170 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7171 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7172 regno = FIRST_FLOAT_REG;
7174 /* Most things go in %eax. */
7177 /* Override FP return register with %xmm0 for local functions when
7178 SSE math is enabled or for functions with sseregparm attribute. */
7179 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7181 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7182 if ((sse_level >= 1 && mode == SFmode)
7183 || (sse_level == 2 && mode == DFmode))
7184 regno = FIRST_SSE_REG;
7187 /* OImode shouldn't be used directly. */
7188 gcc_assert (mode != OImode);
7190 return gen_rtx_REG (orig_mode, regno);
7194 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7199 /* Handle libcalls, which don't provide a type node. */
7200 if (valtype == NULL)
7214 regno = FIRST_SSE_REG;
7218 regno = FIRST_FLOAT_REG;
7226 return gen_rtx_REG (mode, regno);
7228 else if (POINTER_TYPE_P (valtype))
7230 /* Pointers are always returned in Pmode. */
7234 ret = construct_container (mode, orig_mode, valtype, 1,
7235 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7236 x86_64_int_return_registers, 0);
7238 /* For zero sized structures, construct_container returns NULL, but we
7239 need to keep rest of compiler happy by returning meaningful value. */
7241 ret = gen_rtx_REG (orig_mode, AX_REG);
7247 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7249 unsigned int regno = AX_REG;
7253 switch (GET_MODE_SIZE (mode))
7256 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7257 && !COMPLEX_MODE_P (mode))
7258 regno = FIRST_SSE_REG;
7262 if (mode == SFmode || mode == DFmode)
7263 regno = FIRST_SSE_REG;
7269 return gen_rtx_REG (orig_mode, regno);
7273 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7274 enum machine_mode orig_mode, enum machine_mode mode)
7276 const_tree fn, fntype;
7279 if (fntype_or_decl && DECL_P (fntype_or_decl))
7280 fn = fntype_or_decl;
7281 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7283 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7284 return function_value_ms_64 (orig_mode, mode);
7285 else if (TARGET_64BIT)
7286 return function_value_64 (orig_mode, mode, valtype);
7288 return function_value_32 (orig_mode, mode, fntype, fn);
7292 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7293 bool outgoing ATTRIBUTE_UNUSED)
7295 enum machine_mode mode, orig_mode;
7297 orig_mode = TYPE_MODE (valtype);
7298 mode = type_natural_mode (valtype, NULL);
7299 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7302 /* Pointer function arguments and return values are promoted to Pmode. */
7304 static enum machine_mode
7305 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7306 int *punsignedp, const_tree fntype,
7309 if (type != NULL_TREE && POINTER_TYPE_P (type))
7311 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7314 return default_promote_function_mode (type, mode, punsignedp, fntype,
7319 ix86_libcall_value (enum machine_mode mode)
7321 return ix86_function_value_1 (NULL, NULL, mode, mode);
7324 /* Return true iff type is returned in memory. */
7326 static bool ATTRIBUTE_UNUSED
7327 return_in_memory_32 (const_tree type, enum machine_mode mode)
7331 if (mode == BLKmode)
7334 size = int_size_in_bytes (type);
7336 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7339 if (VECTOR_MODE_P (mode) || mode == TImode)
7341 /* User-created vectors small enough to fit in EAX. */
7345 /* MMX/3dNow values are returned in MM0,
7346 except when it doesn't exits or the ABI prescribes otherwise. */
7348 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7350 /* SSE values are returned in XMM0, except when it doesn't exist. */
7354 /* AVX values are returned in YMM0, except when it doesn't exist. */
7365 /* OImode shouldn't be used directly. */
7366 gcc_assert (mode != OImode);
7371 static bool ATTRIBUTE_UNUSED
7372 return_in_memory_64 (const_tree type, enum machine_mode mode)
7374 int needed_intregs, needed_sseregs;
7375 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7378 static bool ATTRIBUTE_UNUSED
7379 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7381 HOST_WIDE_INT size = int_size_in_bytes (type);
7383 /* __m128 is returned in xmm0. */
7384 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7385 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7388 /* Otherwise, the size must be exactly in [1248]. */
7389 return size != 1 && size != 2 && size != 4 && size != 8;
7393 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7395 #ifdef SUBTARGET_RETURN_IN_MEMORY
7396 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7398 const enum machine_mode mode = type_natural_mode (type, NULL);
7402 if (ix86_function_type_abi (fntype) == MS_ABI)
7403 return return_in_memory_ms_64 (type, mode);
7405 return return_in_memory_64 (type, mode);
7408 return return_in_memory_32 (type, mode);
7412 /* When returning SSE vector types, we have a choice of either
7413 (1) being abi incompatible with a -march switch, or
7414 (2) generating an error.
7415 Given no good solution, I think the safest thing is one warning.
7416 The user won't be able to use -Werror, but....
7418 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7419 called in response to actually generating a caller or callee that
7420 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7421 via aggregate_value_p for general type probing from tree-ssa. */
7424 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7426 static bool warnedsse, warnedmmx;
7428 if (!TARGET_64BIT && type)
7430 /* Look at the return type of the function, not the function type. */
7431 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7433 if (!TARGET_SSE && !warnedsse)
7436 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7439 warning (0, "SSE vector return without SSE enabled "
7444 if (!TARGET_MMX && !warnedmmx)
7446 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7449 warning (0, "MMX vector return without MMX enabled "
7459 /* Create the va_list data type. */
7461 /* Returns the calling convention specific va_list date type.
7462 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7465 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7467 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7469 /* For i386 we use plain pointer to argument area. */
7470 if (!TARGET_64BIT || abi == MS_ABI)
7471 return build_pointer_type (char_type_node);
7473 record = lang_hooks.types.make_type (RECORD_TYPE);
7474 type_decl = build_decl (BUILTINS_LOCATION,
7475 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7477 f_gpr = build_decl (BUILTINS_LOCATION,
7478 FIELD_DECL, get_identifier ("gp_offset"),
7479 unsigned_type_node);
7480 f_fpr = build_decl (BUILTINS_LOCATION,
7481 FIELD_DECL, get_identifier ("fp_offset"),
7482 unsigned_type_node);
7483 f_ovf = build_decl (BUILTINS_LOCATION,
7484 FIELD_DECL, get_identifier ("overflow_arg_area"),
7486 f_sav = build_decl (BUILTINS_LOCATION,
7487 FIELD_DECL, get_identifier ("reg_save_area"),
7490 va_list_gpr_counter_field = f_gpr;
7491 va_list_fpr_counter_field = f_fpr;
7493 DECL_FIELD_CONTEXT (f_gpr) = record;
7494 DECL_FIELD_CONTEXT (f_fpr) = record;
7495 DECL_FIELD_CONTEXT (f_ovf) = record;
7496 DECL_FIELD_CONTEXT (f_sav) = record;
7498 TYPE_STUB_DECL (record) = type_decl;
7499 TYPE_NAME (record) = type_decl;
7500 TYPE_FIELDS (record) = f_gpr;
7501 DECL_CHAIN (f_gpr) = f_fpr;
7502 DECL_CHAIN (f_fpr) = f_ovf;
7503 DECL_CHAIN (f_ovf) = f_sav;
7505 layout_type (record);
7507 /* The correct type is an array type of one element. */
7508 return build_array_type (record, build_index_type (size_zero_node));
7511 /* Setup the builtin va_list data type and for 64-bit the additional
7512 calling convention specific va_list data types. */
7515 ix86_build_builtin_va_list (void)
7517 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7519 /* Initialize abi specific va_list builtin types. */
7523 if (ix86_abi == MS_ABI)
7525 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7526 if (TREE_CODE (t) != RECORD_TYPE)
7527 t = build_variant_type_copy (t);
7528 sysv_va_list_type_node = t;
7533 if (TREE_CODE (t) != RECORD_TYPE)
7534 t = build_variant_type_copy (t);
7535 sysv_va_list_type_node = t;
7537 if (ix86_abi != MS_ABI)
7539 t = ix86_build_builtin_va_list_abi (MS_ABI);
7540 if (TREE_CODE (t) != RECORD_TYPE)
7541 t = build_variant_type_copy (t);
7542 ms_va_list_type_node = t;
7547 if (TREE_CODE (t) != RECORD_TYPE)
7548 t = build_variant_type_copy (t);
7549 ms_va_list_type_node = t;
7556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7565 /* GPR size of varargs save area. */
7566 if (cfun->va_list_gpr_size)
7567 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7569 ix86_varargs_gpr_size = 0;
7571 /* FPR size of varargs save area. We don't need it if we don't pass
7572 anything in SSE registers. */
7573 if (TARGET_SSE && cfun->va_list_fpr_size)
7574 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7576 ix86_varargs_fpr_size = 0;
7578 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7581 save_area = frame_pointer_rtx;
7582 set = get_varargs_alias_set ();
7584 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7585 if (max > X86_64_REGPARM_MAX)
7586 max = X86_64_REGPARM_MAX;
7588 for (i = cum->regno; i < max; i++)
7590 mem = gen_rtx_MEM (Pmode,
7591 plus_constant (save_area, i * UNITS_PER_WORD));
7592 MEM_NOTRAP_P (mem) = 1;
7593 set_mem_alias_set (mem, set);
7594 emit_move_insn (mem, gen_rtx_REG (Pmode,
7595 x86_64_int_parameter_registers[i]));
7598 if (ix86_varargs_fpr_size)
7600 enum machine_mode smode;
7603 /* Now emit code to save SSE registers. The AX parameter contains number
7604 of SSE parameter registers used to call this function, though all we
7605 actually check here is the zero/non-zero status. */
7607 label = gen_label_rtx ();
7608 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7609 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7612 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7613 we used movdqa (i.e. TImode) instead? Perhaps even better would
7614 be if we could determine the real mode of the data, via a hook
7615 into pass_stdarg. Ignore all that for now. */
7617 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7618 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7620 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7621 if (max > X86_64_SSE_REGPARM_MAX)
7622 max = X86_64_SSE_REGPARM_MAX;
7624 for (i = cum->sse_regno; i < max; ++i)
7626 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7627 mem = gen_rtx_MEM (smode, mem);
7628 MEM_NOTRAP_P (mem) = 1;
7629 set_mem_alias_set (mem, set);
7630 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7632 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7640 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7642 alias_set_type set = get_varargs_alias_set ();
7645 /* Reset to zero, as there might be a sysv vaarg used
7647 ix86_varargs_gpr_size = 0;
7648 ix86_varargs_fpr_size = 0;
7650 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7654 mem = gen_rtx_MEM (Pmode,
7655 plus_constant (virtual_incoming_args_rtx,
7656 i * UNITS_PER_WORD));
7657 MEM_NOTRAP_P (mem) = 1;
7658 set_mem_alias_set (mem, set);
7660 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7661 emit_move_insn (mem, reg);
7666 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7667 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7670 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7671 CUMULATIVE_ARGS next_cum;
7674 /* This argument doesn't appear to be used anymore. Which is good,
7675 because the old code here didn't suppress rtl generation. */
7676 gcc_assert (!no_rtl);
7681 fntype = TREE_TYPE (current_function_decl);
7683 /* For varargs, we do not want to skip the dummy va_dcl argument.
7684 For stdargs, we do want to skip the last named argument. */
7686 if (stdarg_p (fntype))
7687 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7690 if (cum->call_abi == MS_ABI)
7691 setup_incoming_varargs_ms_64 (&next_cum);
7693 setup_incoming_varargs_64 (&next_cum);
7696 /* Checks if TYPE is of kind va_list char *. */
7699 is_va_list_char_pointer (tree type)
7703 /* For 32-bit it is always true. */
7706 canonic = ix86_canonical_va_list_type (type);
7707 return (canonic == ms_va_list_type_node
7708 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7711 /* Implement va_start. */
7714 ix86_va_start (tree valist, rtx nextarg)
7716 HOST_WIDE_INT words, n_gpr, n_fpr;
7717 tree f_gpr, f_fpr, f_ovf, f_sav;
7718 tree gpr, fpr, ovf, sav, t;
7722 if (flag_split_stack
7723 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7725 unsigned int scratch_regno;
7727 /* When we are splitting the stack, we can't refer to the stack
7728 arguments using internal_arg_pointer, because they may be on
7729 the old stack. The split stack prologue will arrange to
7730 leave a pointer to the old stack arguments in a scratch
7731 register, which we here copy to a pseudo-register. The split
7732 stack prologue can't set the pseudo-register directly because
7733 it (the prologue) runs before any registers have been saved. */
7735 scratch_regno = split_stack_prologue_scratch_regno ();
7736 if (scratch_regno != INVALID_REGNUM)
7740 reg = gen_reg_rtx (Pmode);
7741 cfun->machine->split_stack_varargs_pointer = reg;
7744 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7748 push_topmost_sequence ();
7749 emit_insn_after (seq, entry_of_function ());
7750 pop_topmost_sequence ();
7754 /* Only 64bit target needs something special. */
7755 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7757 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7758 std_expand_builtin_va_start (valist, nextarg);
7763 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7764 next = expand_binop (ptr_mode, add_optab,
7765 cfun->machine->split_stack_varargs_pointer,
7766 crtl->args.arg_offset_rtx,
7767 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7768 convert_move (va_r, next, 0);
7773 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7774 f_fpr = DECL_CHAIN (f_gpr);
7775 f_ovf = DECL_CHAIN (f_fpr);
7776 f_sav = DECL_CHAIN (f_ovf);
7778 valist = build_simple_mem_ref (valist);
7779 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7780 /* The following should be folded into the MEM_REF offset. */
7781 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7783 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7785 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7787 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7790 /* Count number of gp and fp argument registers used. */
7791 words = crtl->args.info.words;
7792 n_gpr = crtl->args.info.regno;
7793 n_fpr = crtl->args.info.sse_regno;
7795 if (cfun->va_list_gpr_size)
7797 type = TREE_TYPE (gpr);
7798 t = build2 (MODIFY_EXPR, type,
7799 gpr, build_int_cst (type, n_gpr * 8));
7800 TREE_SIDE_EFFECTS (t) = 1;
7801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7804 if (TARGET_SSE && cfun->va_list_fpr_size)
7806 type = TREE_TYPE (fpr);
7807 t = build2 (MODIFY_EXPR, type, fpr,
7808 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7809 TREE_SIDE_EFFECTS (t) = 1;
7810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7813 /* Find the overflow area. */
7814 type = TREE_TYPE (ovf);
7815 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7816 ovf_rtx = crtl->args.internal_arg_pointer;
7818 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7819 t = make_tree (type, ovf_rtx);
7821 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7822 t = build2 (MODIFY_EXPR, type, ovf, t);
7823 TREE_SIDE_EFFECTS (t) = 1;
7824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7826 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7828 /* Find the register save area.
7829 Prologue of the function save it right above stack frame. */
7830 type = TREE_TYPE (sav);
7831 t = make_tree (type, frame_pointer_rtx);
7832 if (!ix86_varargs_gpr_size)
7833 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7834 t = build2 (MODIFY_EXPR, type, sav, t);
7835 TREE_SIDE_EFFECTS (t) = 1;
7836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7840 /* Implement va_arg. */
7843 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7846 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7847 tree f_gpr, f_fpr, f_ovf, f_sav;
7848 tree gpr, fpr, ovf, sav, t;
7850 tree lab_false, lab_over = NULL_TREE;
7855 enum machine_mode nat_mode;
7856 unsigned int arg_boundary;
7858 /* Only 64bit target needs something special. */
7859 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7860 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7862 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7863 f_fpr = DECL_CHAIN (f_gpr);
7864 f_ovf = DECL_CHAIN (f_fpr);
7865 f_sav = DECL_CHAIN (f_ovf);
7867 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7868 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7869 valist = build_va_arg_indirect_ref (valist);
7870 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7871 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7872 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7874 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7876 type = build_pointer_type (type);
7877 size = int_size_in_bytes (type);
7878 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7880 nat_mode = type_natural_mode (type, NULL);
7889 /* Unnamed 256bit vector mode parameters are passed on stack. */
7890 if (!TARGET_64BIT_MS_ABI)
7897 container = construct_container (nat_mode, TYPE_MODE (type),
7898 type, 0, X86_64_REGPARM_MAX,
7899 X86_64_SSE_REGPARM_MAX, intreg,
7904 /* Pull the value out of the saved registers. */
7906 addr = create_tmp_var (ptr_type_node, "addr");
7910 int needed_intregs, needed_sseregs;
7912 tree int_addr, sse_addr;
7914 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7915 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7917 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7919 need_temp = (!REG_P (container)
7920 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7921 || TYPE_ALIGN (type) > 128));
7923 /* In case we are passing structure, verify that it is consecutive block
7924 on the register save area. If not we need to do moves. */
7925 if (!need_temp && !REG_P (container))
7927 /* Verify that all registers are strictly consecutive */
7928 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7932 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7934 rtx slot = XVECEXP (container, 0, i);
7935 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7936 || INTVAL (XEXP (slot, 1)) != i * 16)
7944 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7946 rtx slot = XVECEXP (container, 0, i);
7947 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7948 || INTVAL (XEXP (slot, 1)) != i * 8)
7960 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7961 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7964 /* First ensure that we fit completely in registers. */
7967 t = build_int_cst (TREE_TYPE (gpr),
7968 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7969 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7970 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7971 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7972 gimplify_and_add (t, pre_p);
7976 t = build_int_cst (TREE_TYPE (fpr),
7977 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7978 + X86_64_REGPARM_MAX * 8);
7979 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7980 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7981 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7982 gimplify_and_add (t, pre_p);
7985 /* Compute index to start of area used for integer regs. */
7988 /* int_addr = gpr + sav; */
7989 t = fold_build_pointer_plus (sav, gpr);
7990 gimplify_assign (int_addr, t, pre_p);
7994 /* sse_addr = fpr + sav; */
7995 t = fold_build_pointer_plus (sav, fpr);
7996 gimplify_assign (sse_addr, t, pre_p);
8000 int i, prev_size = 0;
8001 tree temp = create_tmp_var (type, "va_arg_tmp");
8004 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8005 gimplify_assign (addr, t, pre_p);
8007 for (i = 0; i < XVECLEN (container, 0); i++)
8009 rtx slot = XVECEXP (container, 0, i);
8010 rtx reg = XEXP (slot, 0);
8011 enum machine_mode mode = GET_MODE (reg);
8017 tree dest_addr, dest;
8018 int cur_size = GET_MODE_SIZE (mode);
8020 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8021 prev_size = INTVAL (XEXP (slot, 1));
8022 if (prev_size + cur_size > size)
8024 cur_size = size - prev_size;
8025 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8026 if (mode == BLKmode)
8029 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8030 if (mode == GET_MODE (reg))
8031 addr_type = build_pointer_type (piece_type);
8033 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8035 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8038 if (SSE_REGNO_P (REGNO (reg)))
8040 src_addr = sse_addr;
8041 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8045 src_addr = int_addr;
8046 src_offset = REGNO (reg) * 8;
8048 src_addr = fold_convert (addr_type, src_addr);
8049 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8051 dest_addr = fold_convert (daddr_type, addr);
8052 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8053 if (cur_size == GET_MODE_SIZE (mode))
8055 src = build_va_arg_indirect_ref (src_addr);
8056 dest = build_va_arg_indirect_ref (dest_addr);
8058 gimplify_assign (dest, src, pre_p);
8063 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8064 3, dest_addr, src_addr,
8065 size_int (cur_size));
8066 gimplify_and_add (copy, pre_p);
8068 prev_size += cur_size;
8074 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8075 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8076 gimplify_assign (gpr, t, pre_p);
8081 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8082 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8083 gimplify_assign (fpr, t, pre_p);
8086 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8088 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8091 /* ... otherwise out of the overflow area. */
8093 /* When we align parameter on stack for caller, if the parameter
8094 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8095 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8096 here with caller. */
8097 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8098 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8099 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8101 /* Care for on-stack alignment if needed. */
8102 if (arg_boundary <= 64 || size == 0)
8106 HOST_WIDE_INT align = arg_boundary / 8;
8107 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8108 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8109 build_int_cst (TREE_TYPE (t), -align));
8112 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8113 gimplify_assign (addr, t, pre_p);
8115 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8116 gimplify_assign (unshare_expr (ovf), t, pre_p);
8119 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8121 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8122 addr = fold_convert (ptrtype, addr);
8125 addr = build_va_arg_indirect_ref (addr);
8126 return build_va_arg_indirect_ref (addr);
8129 /* Return true if OPNUM's MEM should be matched
8130 in movabs* patterns. */
8133 ix86_check_movabs (rtx insn, int opnum)
8137 set = PATTERN (insn);
8138 if (GET_CODE (set) == PARALLEL)
8139 set = XVECEXP (set, 0, 0);
8140 gcc_assert (GET_CODE (set) == SET);
8141 mem = XEXP (set, opnum);
8142 while (GET_CODE (mem) == SUBREG)
8143 mem = SUBREG_REG (mem);
8144 gcc_assert (MEM_P (mem));
8145 return volatile_ok || !MEM_VOLATILE_P (mem);
8148 /* Initialize the table of extra 80387 mathematical constants. */
8151 init_ext_80387_constants (void)
8153 static const char * cst[5] =
8155 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8156 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8157 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8158 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8159 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8163 for (i = 0; i < 5; i++)
8165 real_from_string (&ext_80387_constants_table[i], cst[i]);
8166 /* Ensure each constant is rounded to XFmode precision. */
8167 real_convert (&ext_80387_constants_table[i],
8168 XFmode, &ext_80387_constants_table[i]);
8171 ext_80387_constants_init = 1;
8174 /* Return non-zero if the constant is something that
8175 can be loaded with a special instruction. */
8178 standard_80387_constant_p (rtx x)
8180 enum machine_mode mode = GET_MODE (x);
8184 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8187 if (x == CONST0_RTX (mode))
8189 if (x == CONST1_RTX (mode))
8192 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8194 /* For XFmode constants, try to find a special 80387 instruction when
8195 optimizing for size or on those CPUs that benefit from them. */
8197 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8201 if (! ext_80387_constants_init)
8202 init_ext_80387_constants ();
8204 for (i = 0; i < 5; i++)
8205 if (real_identical (&r, &ext_80387_constants_table[i]))
8209 /* Load of the constant -0.0 or -1.0 will be split as
8210 fldz;fchs or fld1;fchs sequence. */
8211 if (real_isnegzero (&r))
8213 if (real_identical (&r, &dconstm1))
8219 /* Return the opcode of the special instruction to be used to load
8223 standard_80387_constant_opcode (rtx x)
8225 switch (standard_80387_constant_p (x))
8249 /* Return the CONST_DOUBLE representing the 80387 constant that is
8250 loaded by the specified special instruction. The argument IDX
8251 matches the return value from standard_80387_constant_p. */
8254 standard_80387_constant_rtx (int idx)
8258 if (! ext_80387_constants_init)
8259 init_ext_80387_constants ();
8275 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8279 /* Return 1 if X is all 0s and 2 if x is all 1s
8280 in supported SSE/AVX vector mode. */
8283 standard_sse_constant_p (rtx x)
8285 enum machine_mode mode = GET_MODE (x);
8287 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8289 if (vector_all_ones_operand (x, mode))
8311 /* Return the opcode of the special instruction to be used to load
8315 standard_sse_constant_opcode (rtx insn, rtx x)
8317 switch (standard_sse_constant_p (x))
8320 switch (get_attr_mode (insn))
8323 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8324 return "%vpxor\t%0, %d0";
8326 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8327 return "%vxorpd\t%0, %d0";
8329 return "%vxorps\t%0, %d0";
8332 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8333 return "vpxor\t%x0, %x0, %x0";
8335 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8336 return "vxorpd\t%x0, %x0, %x0";
8338 return "vxorps\t%x0, %x0, %x0";
8346 return "vpcmpeqd\t%0, %0, %0";
8348 return "pcmpeqd\t%0, %0";
8356 /* Returns true if OP contains a symbol reference */
8359 symbolic_reference_mentioned_p (rtx op)
8364 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8367 fmt = GET_RTX_FORMAT (GET_CODE (op));
8368 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8374 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8375 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8379 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8386 /* Return true if it is appropriate to emit `ret' instructions in the
8387 body of a function. Do this only if the epilogue is simple, needing a
8388 couple of insns. Prior to reloading, we can't tell how many registers
8389 must be saved, so return false then. Return false if there is no frame
8390 marker to de-allocate. */
8393 ix86_can_use_return_insn_p (void)
8395 struct ix86_frame frame;
8397 if (! reload_completed || frame_pointer_needed)
8400 /* Don't allow more than 32k pop, since that's all we can do
8401 with one instruction. */
8402 if (crtl->args.pops_args && crtl->args.size >= 32768)
8405 ix86_compute_frame_layout (&frame);
8406 return (frame.stack_pointer_offset == UNITS_PER_WORD
8407 && (frame.nregs + frame.nsseregs) == 0);
8410 /* Value should be nonzero if functions must have frame pointers.
8411 Zero means the frame pointer need not be set up (and parms may
8412 be accessed via the stack pointer) in functions that seem suitable. */
8415 ix86_frame_pointer_required (void)
8417 /* If we accessed previous frames, then the generated code expects
8418 to be able to access the saved ebp value in our frame. */
8419 if (cfun->machine->accesses_prev_frame)
8422 /* Several x86 os'es need a frame pointer for other reasons,
8423 usually pertaining to setjmp. */
8424 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8427 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8428 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8431 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8432 allocation is 4GB. */
8433 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8436 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8437 turns off the frame pointer by default. Turn it back on now if
8438 we've not got a leaf function. */
8439 if (TARGET_OMIT_LEAF_FRAME_POINTER
8440 && (!current_function_is_leaf
8441 || ix86_current_function_calls_tls_descriptor))
8444 if (crtl->profile && !flag_fentry)
8450 /* Record that the current function accesses previous call frames. */
8453 ix86_setup_frame_addresses (void)
8455 cfun->machine->accesses_prev_frame = 1;
8458 #ifndef USE_HIDDEN_LINKONCE
8459 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8460 # define USE_HIDDEN_LINKONCE 1
8462 # define USE_HIDDEN_LINKONCE 0
8466 static int pic_labels_used;
8468 /* Fills in the label name that should be used for a pc thunk for
8469 the given register. */
8472 get_pc_thunk_name (char name[32], unsigned int regno)
8474 gcc_assert (!TARGET_64BIT);
8476 if (USE_HIDDEN_LINKONCE)
8477 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8479 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8483 /* This function generates code for -fpic that loads %ebx with
8484 the return address of the caller and then returns. */
8487 ix86_code_end (void)
8492 for (regno = AX_REG; regno <= SP_REG; regno++)
8497 if (!(pic_labels_used & (1 << regno)))
8500 get_pc_thunk_name (name, regno);
8502 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8503 get_identifier (name),
8504 build_function_type_list (void_type_node, NULL_TREE));
8505 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8506 NULL_TREE, void_type_node);
8507 TREE_PUBLIC (decl) = 1;
8508 TREE_STATIC (decl) = 1;
8513 switch_to_section (darwin_sections[text_coal_section]);
8514 fputs ("\t.weak_definition\t", asm_out_file);
8515 assemble_name (asm_out_file, name);
8516 fputs ("\n\t.private_extern\t", asm_out_file);
8517 assemble_name (asm_out_file, name);
8518 putc ('\n', asm_out_file);
8519 ASM_OUTPUT_LABEL (asm_out_file, name);
8520 DECL_WEAK (decl) = 1;
8524 if (USE_HIDDEN_LINKONCE)
8526 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8528 targetm.asm_out.unique_section (decl, 0);
8529 switch_to_section (get_named_section (decl, NULL, 0));
8531 targetm.asm_out.globalize_label (asm_out_file, name);
8532 fputs ("\t.hidden\t", asm_out_file);
8533 assemble_name (asm_out_file, name);
8534 putc ('\n', asm_out_file);
8535 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8539 switch_to_section (text_section);
8540 ASM_OUTPUT_LABEL (asm_out_file, name);
8543 DECL_INITIAL (decl) = make_node (BLOCK);
8544 current_function_decl = decl;
8545 init_function_start (decl);
8546 first_function_block_is_cold = false;
8547 /* Make sure unwind info is emitted for the thunk if needed. */
8548 final_start_function (emit_barrier (), asm_out_file, 1);
8550 /* Pad stack IP move with 4 instructions (two NOPs count
8551 as one instruction). */
8552 if (TARGET_PAD_SHORT_FUNCTION)
8557 fputs ("\tnop\n", asm_out_file);
8560 xops[0] = gen_rtx_REG (Pmode, regno);
8561 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8562 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8563 fputs ("\tret\n", asm_out_file);
8564 final_end_function ();
8565 init_insn_lengths ();
8566 free_after_compilation (cfun);
8568 current_function_decl = NULL;
8571 if (flag_split_stack)
8572 file_end_indicate_split_stack ();
8575 /* Emit code for the SET_GOT patterns. */
8578 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8584 if (TARGET_VXWORKS_RTP && flag_pic)
8586 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8587 xops[2] = gen_rtx_MEM (Pmode,
8588 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8589 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8591 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8592 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8593 an unadorned address. */
8594 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8595 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8596 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8600 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8604 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8606 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8609 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8610 is what will be referenced by the Mach-O PIC subsystem. */
8612 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8615 targetm.asm_out.internal_label (asm_out_file, "L",
8616 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8621 get_pc_thunk_name (name, REGNO (dest));
8622 pic_labels_used |= 1 << REGNO (dest);
8624 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8625 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8626 output_asm_insn ("call\t%X2", xops);
8627 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8628 is what will be referenced by the Mach-O PIC subsystem. */
8631 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8633 targetm.asm_out.internal_label (asm_out_file, "L",
8634 CODE_LABEL_NUMBER (label));
8639 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8644 /* Generate an "push" pattern for input ARG. */
8649 struct machine_function *m = cfun->machine;
8651 if (m->fs.cfa_reg == stack_pointer_rtx)
8652 m->fs.cfa_offset += UNITS_PER_WORD;
8653 m->fs.sp_offset += UNITS_PER_WORD;
8655 return gen_rtx_SET (VOIDmode,
8657 gen_rtx_PRE_DEC (Pmode,
8658 stack_pointer_rtx)),
8662 /* Generate an "pop" pattern for input ARG. */
8667 return gen_rtx_SET (VOIDmode,
8670 gen_rtx_POST_INC (Pmode,
8671 stack_pointer_rtx)));
8674 /* Return >= 0 if there is an unused call-clobbered register available
8675 for the entire function. */
8678 ix86_select_alt_pic_regnum (void)
8680 if (current_function_is_leaf
8682 && !ix86_current_function_calls_tls_descriptor)
8685 /* Can't use the same register for both PIC and DRAP. */
8687 drap = REGNO (crtl->drap_reg);
8690 for (i = 2; i >= 0; --i)
8691 if (i != drap && !df_regs_ever_live_p (i))
8695 return INVALID_REGNUM;
8698 /* Return TRUE if we need to save REGNO. */
8701 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8703 if (pic_offset_table_rtx
8704 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8705 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8707 || crtl->calls_eh_return
8708 || crtl->uses_const_pool))
8709 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8711 if (crtl->calls_eh_return && maybe_eh_return)
8716 unsigned test = EH_RETURN_DATA_REGNO (i);
8717 if (test == INVALID_REGNUM)
8724 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8727 return (df_regs_ever_live_p (regno)
8728 && !call_used_regs[regno]
8729 && !fixed_regs[regno]
8730 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8733 /* Return number of saved general prupose registers. */
8736 ix86_nsaved_regs (void)
8741 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8742 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8747 /* Return number of saved SSE registrers. */
8750 ix86_nsaved_sseregs (void)
8755 if (!TARGET_64BIT_MS_ABI)
8757 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8758 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8763 /* Given FROM and TO register numbers, say whether this elimination is
8764 allowed. If stack alignment is needed, we can only replace argument
8765 pointer with hard frame pointer, or replace frame pointer with stack
8766 pointer. Otherwise, frame pointer elimination is automatically
8767 handled and all other eliminations are valid. */
8770 ix86_can_eliminate (const int from, const int to)
8772 if (stack_realign_fp)
8773 return ((from == ARG_POINTER_REGNUM
8774 && to == HARD_FRAME_POINTER_REGNUM)
8775 || (from == FRAME_POINTER_REGNUM
8776 && to == STACK_POINTER_REGNUM));
8778 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8781 /* Return the offset between two registers, one to be eliminated, and the other
8782 its replacement, at the start of a routine. */
8785 ix86_initial_elimination_offset (int from, int to)
8787 struct ix86_frame frame;
8788 ix86_compute_frame_layout (&frame);
8790 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8791 return frame.hard_frame_pointer_offset;
8792 else if (from == FRAME_POINTER_REGNUM
8793 && to == HARD_FRAME_POINTER_REGNUM)
8794 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8797 gcc_assert (to == STACK_POINTER_REGNUM);
8799 if (from == ARG_POINTER_REGNUM)
8800 return frame.stack_pointer_offset;
8802 gcc_assert (from == FRAME_POINTER_REGNUM);
8803 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8807 /* In a dynamically-aligned function, we can't know the offset from
8808 stack pointer to frame pointer, so we must ensure that setjmp
8809 eliminates fp against the hard fp (%ebp) rather than trying to
8810 index from %esp up to the top of the frame across a gap that is
8811 of unknown (at compile-time) size. */
8813 ix86_builtin_setjmp_frame_value (void)
8815 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8818 /* When using -fsplit-stack, the allocation routines set a field in
8819 the TCB to the bottom of the stack plus this much space, measured
8822 #define SPLIT_STACK_AVAILABLE 256
8824 /* Fill structure ix86_frame about frame of currently computed function. */
8827 ix86_compute_frame_layout (struct ix86_frame *frame)
8829 unsigned int stack_alignment_needed;
8830 HOST_WIDE_INT offset;
8831 unsigned int preferred_alignment;
8832 HOST_WIDE_INT size = get_frame_size ();
8833 HOST_WIDE_INT to_allocate;
8835 frame->nregs = ix86_nsaved_regs ();
8836 frame->nsseregs = ix86_nsaved_sseregs ();
8838 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8839 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8841 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8842 function prologues and leaf. */
8843 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8844 && (!current_function_is_leaf || cfun->calls_alloca != 0
8845 || ix86_current_function_calls_tls_descriptor))
8847 preferred_alignment = 16;
8848 stack_alignment_needed = 16;
8849 crtl->preferred_stack_boundary = 128;
8850 crtl->stack_alignment_needed = 128;
8853 gcc_assert (!size || stack_alignment_needed);
8854 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8855 gcc_assert (preferred_alignment <= stack_alignment_needed);
8857 /* For SEH we have to limit the amount of code movement into the prologue.
8858 At present we do this via a BLOCKAGE, at which point there's very little
8859 scheduling that can be done, which means that there's very little point
8860 in doing anything except PUSHs. */
8862 cfun->machine->use_fast_prologue_epilogue = false;
8864 /* During reload iteration the amount of registers saved can change.
8865 Recompute the value as needed. Do not recompute when amount of registers
8866 didn't change as reload does multiple calls to the function and does not
8867 expect the decision to change within single iteration. */
8868 else if (!optimize_function_for_size_p (cfun)
8869 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8871 int count = frame->nregs;
8872 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8874 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8876 /* The fast prologue uses move instead of push to save registers. This
8877 is significantly longer, but also executes faster as modern hardware
8878 can execute the moves in parallel, but can't do that for push/pop.
8880 Be careful about choosing what prologue to emit: When function takes
8881 many instructions to execute we may use slow version as well as in
8882 case function is known to be outside hot spot (this is known with
8883 feedback only). Weight the size of function by number of registers
8884 to save as it is cheap to use one or two push instructions but very
8885 slow to use many of them. */
8887 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8888 if (node->frequency < NODE_FREQUENCY_NORMAL
8889 || (flag_branch_probabilities
8890 && node->frequency < NODE_FREQUENCY_HOT))
8891 cfun->machine->use_fast_prologue_epilogue = false;
8893 cfun->machine->use_fast_prologue_epilogue
8894 = !expensive_function_p (count);
8897 frame->save_regs_using_mov
8898 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8899 /* If static stack checking is enabled and done with probes,
8900 the registers need to be saved before allocating the frame. */
8901 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8903 /* Skip return address. */
8904 offset = UNITS_PER_WORD;
8906 /* Skip pushed static chain. */
8907 if (ix86_static_chain_on_stack)
8908 offset += UNITS_PER_WORD;
8910 /* Skip saved base pointer. */
8911 if (frame_pointer_needed)
8912 offset += UNITS_PER_WORD;
8913 frame->hfp_save_offset = offset;
8915 /* The traditional frame pointer location is at the top of the frame. */
8916 frame->hard_frame_pointer_offset = offset;
8918 /* Register save area */
8919 offset += frame->nregs * UNITS_PER_WORD;
8920 frame->reg_save_offset = offset;
8922 /* On SEH target, registers are pushed just before the frame pointer
8925 frame->hard_frame_pointer_offset = offset;
8927 /* Align and set SSE register save area. */
8928 if (frame->nsseregs)
8930 /* The only ABI that has saved SSE registers (Win64) also has a
8931 16-byte aligned default stack, and thus we don't need to be
8932 within the re-aligned local stack frame to save them. */
8933 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8934 offset = (offset + 16 - 1) & -16;
8935 offset += frame->nsseregs * 16;
8937 frame->sse_reg_save_offset = offset;
8939 /* The re-aligned stack starts here. Values before this point are not
8940 directly comparable with values below this point. In order to make
8941 sure that no value happens to be the same before and after, force
8942 the alignment computation below to add a non-zero value. */
8943 if (stack_realign_fp)
8944 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8947 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8948 offset += frame->va_arg_size;
8950 /* Align start of frame for local function. */
8951 if (stack_realign_fp
8952 || offset != frame->sse_reg_save_offset
8954 || !current_function_is_leaf
8955 || cfun->calls_alloca
8956 || ix86_current_function_calls_tls_descriptor)
8957 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8959 /* Frame pointer points here. */
8960 frame->frame_pointer_offset = offset;
8964 /* Add outgoing arguments area. Can be skipped if we eliminated
8965 all the function calls as dead code.
8966 Skipping is however impossible when function calls alloca. Alloca
8967 expander assumes that last crtl->outgoing_args_size
8968 of stack frame are unused. */
8969 if (ACCUMULATE_OUTGOING_ARGS
8970 && (!current_function_is_leaf || cfun->calls_alloca
8971 || ix86_current_function_calls_tls_descriptor))
8973 offset += crtl->outgoing_args_size;
8974 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8977 frame->outgoing_arguments_size = 0;
8979 /* Align stack boundary. Only needed if we're calling another function
8981 if (!current_function_is_leaf || cfun->calls_alloca
8982 || ix86_current_function_calls_tls_descriptor)
8983 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8985 /* We've reached end of stack frame. */
8986 frame->stack_pointer_offset = offset;
8988 /* Size prologue needs to allocate. */
8989 to_allocate = offset - frame->sse_reg_save_offset;
8991 if ((!to_allocate && frame->nregs <= 1)
8992 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8993 frame->save_regs_using_mov = false;
8995 if (ix86_using_red_zone ()
8996 && current_function_sp_is_unchanging
8997 && current_function_is_leaf
8998 && !ix86_current_function_calls_tls_descriptor)
9000 frame->red_zone_size = to_allocate;
9001 if (frame->save_regs_using_mov)
9002 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9003 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9004 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9007 frame->red_zone_size = 0;
9008 frame->stack_pointer_offset -= frame->red_zone_size;
9010 /* The SEH frame pointer location is near the bottom of the frame.
9011 This is enforced by the fact that the difference between the
9012 stack pointer and the frame pointer is limited to 240 bytes in
9013 the unwind data structure. */
9018 /* If we can leave the frame pointer where it is, do so. Also, returns
9019 the establisher frame for __builtin_frame_address (0). */
9020 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9021 if (diff <= SEH_MAX_FRAME_SIZE
9022 && (diff > 240 || (diff & 15) != 0)
9023 && !crtl->accesses_prior_frames)
9025 /* Ideally we'd determine what portion of the local stack frame
9026 (within the constraint of the lowest 240) is most heavily used.
9027 But without that complication, simply bias the frame pointer
9028 by 128 bytes so as to maximize the amount of the local stack
9029 frame that is addressable with 8-bit offsets. */
9030 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9035 /* This is semi-inlined memory_address_length, but simplified
9036 since we know that we're always dealing with reg+offset, and
9037 to avoid having to create and discard all that rtl. */
9040 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9046 /* EBP and R13 cannot be encoded without an offset. */
9047 len = (regno == BP_REG || regno == R13_REG);
9049 else if (IN_RANGE (offset, -128, 127))
9052 /* ESP and R12 must be encoded with a SIB byte. */
9053 if (regno == SP_REG || regno == R12_REG)
9059 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9060 The valid base registers are taken from CFUN->MACHINE->FS. */
9063 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9065 const struct machine_function *m = cfun->machine;
9066 rtx base_reg = NULL;
9067 HOST_WIDE_INT base_offset = 0;
9069 if (m->use_fast_prologue_epilogue)
9071 /* Choose the base register most likely to allow the most scheduling
9072 opportunities. Generally FP is valid througout the function,
9073 while DRAP must be reloaded within the epilogue. But choose either
9074 over the SP due to increased encoding size. */
9078 base_reg = hard_frame_pointer_rtx;
9079 base_offset = m->fs.fp_offset - cfa_offset;
9081 else if (m->fs.drap_valid)
9083 base_reg = crtl->drap_reg;
9084 base_offset = 0 - cfa_offset;
9086 else if (m->fs.sp_valid)
9088 base_reg = stack_pointer_rtx;
9089 base_offset = m->fs.sp_offset - cfa_offset;
9094 HOST_WIDE_INT toffset;
9097 /* Choose the base register with the smallest address encoding.
9098 With a tie, choose FP > DRAP > SP. */
9101 base_reg = stack_pointer_rtx;
9102 base_offset = m->fs.sp_offset - cfa_offset;
9103 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9105 if (m->fs.drap_valid)
9107 toffset = 0 - cfa_offset;
9108 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9111 base_reg = crtl->drap_reg;
9112 base_offset = toffset;
9118 toffset = m->fs.fp_offset - cfa_offset;
9119 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9122 base_reg = hard_frame_pointer_rtx;
9123 base_offset = toffset;
9128 gcc_assert (base_reg != NULL);
9130 return plus_constant (base_reg, base_offset);
9133 /* Emit code to save registers in the prologue. */
9136 ix86_emit_save_regs (void)
9141 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9142 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9144 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9145 RTX_FRAME_RELATED_P (insn) = 1;
9149 /* Emit a single register save at CFA - CFA_OFFSET. */
9152 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9153 HOST_WIDE_INT cfa_offset)
9155 struct machine_function *m = cfun->machine;
9156 rtx reg = gen_rtx_REG (mode, regno);
9157 rtx mem, addr, base, insn;
9159 addr = choose_baseaddr (cfa_offset);
9160 mem = gen_frame_mem (mode, addr);
9162 /* For SSE saves, we need to indicate the 128-bit alignment. */
9163 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9165 insn = emit_move_insn (mem, reg);
9166 RTX_FRAME_RELATED_P (insn) = 1;
9169 if (GET_CODE (base) == PLUS)
9170 base = XEXP (base, 0);
9171 gcc_checking_assert (REG_P (base));
9173 /* When saving registers into a re-aligned local stack frame, avoid
9174 any tricky guessing by dwarf2out. */
9175 if (m->fs.realigned)
9177 gcc_checking_assert (stack_realign_drap);
9179 if (regno == REGNO (crtl->drap_reg))
9181 /* A bit of a hack. We force the DRAP register to be saved in
9182 the re-aligned stack frame, which provides us with a copy
9183 of the CFA that will last past the prologue. Install it. */
9184 gcc_checking_assert (cfun->machine->fs.fp_valid);
9185 addr = plus_constant (hard_frame_pointer_rtx,
9186 cfun->machine->fs.fp_offset - cfa_offset);
9187 mem = gen_rtx_MEM (mode, addr);
9188 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9192 /* The frame pointer is a stable reference within the
9193 aligned frame. Use it. */
9194 gcc_checking_assert (cfun->machine->fs.fp_valid);
9195 addr = plus_constant (hard_frame_pointer_rtx,
9196 cfun->machine->fs.fp_offset - cfa_offset);
9197 mem = gen_rtx_MEM (mode, addr);
9198 add_reg_note (insn, REG_CFA_EXPRESSION,
9199 gen_rtx_SET (VOIDmode, mem, reg));
9203 /* The memory may not be relative to the current CFA register,
9204 which means that we may need to generate a new pattern for
9205 use by the unwind info. */
9206 else if (base != m->fs.cfa_reg)
9208 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9209 mem = gen_rtx_MEM (mode, addr);
9210 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9214 /* Emit code to save registers using MOV insns.
9215 First register is stored at CFA - CFA_OFFSET. */
9217 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9222 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9224 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9225 cfa_offset -= UNITS_PER_WORD;
9229 /* Emit code to save SSE registers using MOV insns.
9230 First register is stored at CFA - CFA_OFFSET. */
9232 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9236 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9237 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9239 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9244 static GTY(()) rtx queued_cfa_restores;
9246 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9247 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9248 Don't add the note if the previously saved value will be left untouched
9249 within stack red-zone till return, as unwinders can find the same value
9250 in the register and on the stack. */
9253 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9255 if (!crtl->shrink_wrapped
9256 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9261 add_reg_note (insn, REG_CFA_RESTORE, reg);
9262 RTX_FRAME_RELATED_P (insn) = 1;
9266 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9269 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9272 ix86_add_queued_cfa_restore_notes (rtx insn)
9275 if (!queued_cfa_restores)
9277 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9279 XEXP (last, 1) = REG_NOTES (insn);
9280 REG_NOTES (insn) = queued_cfa_restores;
9281 queued_cfa_restores = NULL_RTX;
9282 RTX_FRAME_RELATED_P (insn) = 1;
9285 /* Expand prologue or epilogue stack adjustment.
9286 The pattern exist to put a dependency on all ebp-based memory accesses.
9287 STYLE should be negative if instructions should be marked as frame related,
9288 zero if %r11 register is live and cannot be freely used and positive
9292 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9293 int style, bool set_cfa)
9295 struct machine_function *m = cfun->machine;
9297 bool add_frame_related_expr = false;
9300 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9301 else if (x86_64_immediate_operand (offset, DImode))
9302 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9306 /* r11 is used by indirect sibcall return as well, set before the
9307 epilogue and used after the epilogue. */
9309 tmp = gen_rtx_REG (DImode, R11_REG);
9312 gcc_assert (src != hard_frame_pointer_rtx
9313 && dest != hard_frame_pointer_rtx);
9314 tmp = hard_frame_pointer_rtx;
9316 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9318 add_frame_related_expr = true;
9320 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9323 insn = emit_insn (insn);
9325 ix86_add_queued_cfa_restore_notes (insn);
9331 gcc_assert (m->fs.cfa_reg == src);
9332 m->fs.cfa_offset += INTVAL (offset);
9333 m->fs.cfa_reg = dest;
9335 r = gen_rtx_PLUS (Pmode, src, offset);
9336 r = gen_rtx_SET (VOIDmode, dest, r);
9337 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9338 RTX_FRAME_RELATED_P (insn) = 1;
9342 RTX_FRAME_RELATED_P (insn) = 1;
9343 if (add_frame_related_expr)
9345 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9346 r = gen_rtx_SET (VOIDmode, dest, r);
9347 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9351 if (dest == stack_pointer_rtx)
9353 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9354 bool valid = m->fs.sp_valid;
9356 if (src == hard_frame_pointer_rtx)
9358 valid = m->fs.fp_valid;
9359 ooffset = m->fs.fp_offset;
9361 else if (src == crtl->drap_reg)
9363 valid = m->fs.drap_valid;
9368 /* Else there are two possibilities: SP itself, which we set
9369 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9370 taken care of this by hand along the eh_return path. */
9371 gcc_checking_assert (src == stack_pointer_rtx
9372 || offset == const0_rtx);
9375 m->fs.sp_offset = ooffset - INTVAL (offset);
9376 m->fs.sp_valid = valid;
9380 /* Find an available register to be used as dynamic realign argument
9381 pointer regsiter. Such a register will be written in prologue and
9382 used in begin of body, so it must not be
9383 1. parameter passing register.
9385 We reuse static-chain register if it is available. Otherwise, we
9386 use DI for i386 and R13 for x86-64. We chose R13 since it has
9389 Return: the regno of chosen register. */
9392 find_drap_reg (void)
9394 tree decl = cfun->decl;
9398 /* Use R13 for nested function or function need static chain.
9399 Since function with tail call may use any caller-saved
9400 registers in epilogue, DRAP must not use caller-saved
9401 register in such case. */
9402 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9409 /* Use DI for nested function or function need static chain.
9410 Since function with tail call may use any caller-saved
9411 registers in epilogue, DRAP must not use caller-saved
9412 register in such case. */
9413 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9416 /* Reuse static chain register if it isn't used for parameter
9418 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9420 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9421 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9428 /* Return minimum incoming stack alignment. */
9431 ix86_minimum_incoming_stack_boundary (bool sibcall)
9433 unsigned int incoming_stack_boundary;
9435 /* Prefer the one specified at command line. */
9436 if (ix86_user_incoming_stack_boundary)
9437 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9438 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9439 if -mstackrealign is used, it isn't used for sibcall check and
9440 estimated stack alignment is 128bit. */
9443 && ix86_force_align_arg_pointer
9444 && crtl->stack_alignment_estimated == 128)
9445 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9447 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9449 /* Incoming stack alignment can be changed on individual functions
9450 via force_align_arg_pointer attribute. We use the smallest
9451 incoming stack boundary. */
9452 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9453 && lookup_attribute (ix86_force_align_arg_pointer_string,
9454 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9455 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9457 /* The incoming stack frame has to be aligned at least at
9458 parm_stack_boundary. */
9459 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9460 incoming_stack_boundary = crtl->parm_stack_boundary;
9462 /* Stack at entrance of main is aligned by runtime. We use the
9463 smallest incoming stack boundary. */
9464 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9465 && DECL_NAME (current_function_decl)
9466 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9467 && DECL_FILE_SCOPE_P (current_function_decl))
9468 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9470 return incoming_stack_boundary;
9473 /* Update incoming stack boundary and estimated stack alignment. */
9476 ix86_update_stack_boundary (void)
9478 ix86_incoming_stack_boundary
9479 = ix86_minimum_incoming_stack_boundary (false);
9481 /* x86_64 vararg needs 16byte stack alignment for register save
9485 && crtl->stack_alignment_estimated < 128)
9486 crtl->stack_alignment_estimated = 128;
9489 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9490 needed or an rtx for DRAP otherwise. */
9493 ix86_get_drap_rtx (void)
9495 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9496 crtl->need_drap = true;
9498 if (stack_realign_drap)
9500 /* Assign DRAP to vDRAP and returns vDRAP */
9501 unsigned int regno = find_drap_reg ();
9506 arg_ptr = gen_rtx_REG (Pmode, regno);
9507 crtl->drap_reg = arg_ptr;
9510 drap_vreg = copy_to_reg (arg_ptr);
9514 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9517 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9518 RTX_FRAME_RELATED_P (insn) = 1;
9526 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9529 ix86_internal_arg_pointer (void)
9531 return virtual_incoming_args_rtx;
9534 struct scratch_reg {
9539 /* Return a short-lived scratch register for use on function entry.
9540 In 32-bit mode, it is valid only after the registers are saved
9541 in the prologue. This register must be released by means of
9542 release_scratch_register_on_entry once it is dead. */
9545 get_scratch_register_on_entry (struct scratch_reg *sr)
9553 /* We always use R11 in 64-bit mode. */
9558 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9560 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9561 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9562 int regparm = ix86_function_regparm (fntype, decl);
9564 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9566 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9567 for the static chain register. */
9568 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9569 && drap_regno != AX_REG)
9571 else if (regparm < 2 && drap_regno != DX_REG)
9573 /* ecx is the static chain register. */
9574 else if (regparm < 3 && !fastcall_p && !static_chain_p
9575 && drap_regno != CX_REG)
9577 else if (ix86_save_reg (BX_REG, true))
9579 /* esi is the static chain register. */
9580 else if (!(regparm == 3 && static_chain_p)
9581 && ix86_save_reg (SI_REG, true))
9583 else if (ix86_save_reg (DI_REG, true))
9587 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9592 sr->reg = gen_rtx_REG (Pmode, regno);
9595 rtx insn = emit_insn (gen_push (sr->reg));
9596 RTX_FRAME_RELATED_P (insn) = 1;
9600 /* Release a scratch register obtained from the preceding function. */
9603 release_scratch_register_on_entry (struct scratch_reg *sr)
9607 rtx x, insn = emit_insn (gen_pop (sr->reg));
9609 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9610 RTX_FRAME_RELATED_P (insn) = 1;
9611 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9612 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9613 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9617 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9619 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9622 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9624 /* We skip the probe for the first interval + a small dope of 4 words and
9625 probe that many bytes past the specified size to maintain a protection
9626 area at the botton of the stack. */
9627 const int dope = 4 * UNITS_PER_WORD;
9628 rtx size_rtx = GEN_INT (size), last;
9630 /* See if we have a constant small number of probes to generate. If so,
9631 that's the easy case. The run-time loop is made up of 11 insns in the
9632 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9633 for n # of intervals. */
9634 if (size <= 5 * PROBE_INTERVAL)
9636 HOST_WIDE_INT i, adjust;
9637 bool first_probe = true;
9639 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9640 values of N from 1 until it exceeds SIZE. If only one probe is
9641 needed, this will not generate any code. Then adjust and probe
9642 to PROBE_INTERVAL + SIZE. */
9643 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9647 adjust = 2 * PROBE_INTERVAL + dope;
9648 first_probe = false;
9651 adjust = PROBE_INTERVAL;
9653 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9654 plus_constant (stack_pointer_rtx, -adjust)));
9655 emit_stack_probe (stack_pointer_rtx);
9659 adjust = size + PROBE_INTERVAL + dope;
9661 adjust = size + PROBE_INTERVAL - i;
9663 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9664 plus_constant (stack_pointer_rtx, -adjust)));
9665 emit_stack_probe (stack_pointer_rtx);
9667 /* Adjust back to account for the additional first interval. */
9668 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9669 plus_constant (stack_pointer_rtx,
9670 PROBE_INTERVAL + dope)));
9673 /* Otherwise, do the same as above, but in a loop. Note that we must be
9674 extra careful with variables wrapping around because we might be at
9675 the very top (or the very bottom) of the address space and we have
9676 to be able to handle this case properly; in particular, we use an
9677 equality test for the loop condition. */
9680 HOST_WIDE_INT rounded_size;
9681 struct scratch_reg sr;
9683 get_scratch_register_on_entry (&sr);
9686 /* Step 1: round SIZE to the previous multiple of the interval. */
9688 rounded_size = size & -PROBE_INTERVAL;
9691 /* Step 2: compute initial and final value of the loop counter. */
9693 /* SP = SP_0 + PROBE_INTERVAL. */
9694 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9695 plus_constant (stack_pointer_rtx,
9696 - (PROBE_INTERVAL + dope))));
9698 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9699 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9700 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9701 gen_rtx_PLUS (Pmode, sr.reg,
9702 stack_pointer_rtx)));
9707 while (SP != LAST_ADDR)
9709 SP = SP + PROBE_INTERVAL
9713 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9714 values of N from 1 until it is equal to ROUNDED_SIZE. */
9716 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9719 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9720 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9722 if (size != rounded_size)
9724 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9725 plus_constant (stack_pointer_rtx,
9726 rounded_size - size)));
9727 emit_stack_probe (stack_pointer_rtx);
9730 /* Adjust back to account for the additional first interval. */
9731 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9732 plus_constant (stack_pointer_rtx,
9733 PROBE_INTERVAL + dope)));
9735 release_scratch_register_on_entry (&sr);
9738 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9740 /* Even if the stack pointer isn't the CFA register, we need to correctly
9741 describe the adjustments made to it, in particular differentiate the
9742 frame-related ones from the frame-unrelated ones. */
9745 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9746 XVECEXP (expr, 0, 0)
9747 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9748 plus_constant (stack_pointer_rtx, -size));
9749 XVECEXP (expr, 0, 1)
9750 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9751 plus_constant (stack_pointer_rtx,
9752 PROBE_INTERVAL + dope + size));
9753 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9754 RTX_FRAME_RELATED_P (last) = 1;
9756 cfun->machine->fs.sp_offset += size;
9759 /* Make sure nothing is scheduled before we are done. */
9760 emit_insn (gen_blockage ());
9763 /* Adjust the stack pointer up to REG while probing it. */
9766 output_adjust_stack_and_probe (rtx reg)
9768 static int labelno = 0;
9769 char loop_lab[32], end_lab[32];
9772 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9773 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9775 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9777 /* Jump to END_LAB if SP == LAST_ADDR. */
9778 xops[0] = stack_pointer_rtx;
9780 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9781 fputs ("\tje\t", asm_out_file);
9782 assemble_name_raw (asm_out_file, end_lab);
9783 fputc ('\n', asm_out_file);
9785 /* SP = SP + PROBE_INTERVAL. */
9786 xops[1] = GEN_INT (PROBE_INTERVAL);
9787 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9790 xops[1] = const0_rtx;
9791 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9793 fprintf (asm_out_file, "\tjmp\t");
9794 assemble_name_raw (asm_out_file, loop_lab);
9795 fputc ('\n', asm_out_file);
9797 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9802 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9803 inclusive. These are offsets from the current stack pointer. */
9806 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9808 /* See if we have a constant small number of probes to generate. If so,
9809 that's the easy case. The run-time loop is made up of 7 insns in the
9810 generic case while the compile-time loop is made up of n insns for n #
9812 if (size <= 7 * PROBE_INTERVAL)
9816 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9817 it exceeds SIZE. If only one probe is needed, this will not
9818 generate any code. Then probe at FIRST + SIZE. */
9819 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9820 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9822 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9825 /* Otherwise, do the same as above, but in a loop. Note that we must be
9826 extra careful with variables wrapping around because we might be at
9827 the very top (or the very bottom) of the address space and we have
9828 to be able to handle this case properly; in particular, we use an
9829 equality test for the loop condition. */
9832 HOST_WIDE_INT rounded_size, last;
9833 struct scratch_reg sr;
9835 get_scratch_register_on_entry (&sr);
9838 /* Step 1: round SIZE to the previous multiple of the interval. */
9840 rounded_size = size & -PROBE_INTERVAL;
9843 /* Step 2: compute initial and final value of the loop counter. */
9845 /* TEST_OFFSET = FIRST. */
9846 emit_move_insn (sr.reg, GEN_INT (-first));
9848 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9849 last = first + rounded_size;
9854 while (TEST_ADDR != LAST_ADDR)
9856 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9860 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9861 until it is equal to ROUNDED_SIZE. */
9863 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9866 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9867 that SIZE is equal to ROUNDED_SIZE. */
9869 if (size != rounded_size)
9870 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9873 rounded_size - size));
9875 release_scratch_register_on_entry (&sr);
9878 /* Make sure nothing is scheduled before we are done. */
9879 emit_insn (gen_blockage ());
9882 /* Probe a range of stack addresses from REG to END, inclusive. These are
9883 offsets from the current stack pointer. */
9886 output_probe_stack_range (rtx reg, rtx end)
9888 static int labelno = 0;
9889 char loop_lab[32], end_lab[32];
9892 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9893 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9895 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9897 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9900 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9901 fputs ("\tje\t", asm_out_file);
9902 assemble_name_raw (asm_out_file, end_lab);
9903 fputc ('\n', asm_out_file);
9905 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9906 xops[1] = GEN_INT (PROBE_INTERVAL);
9907 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9909 /* Probe at TEST_ADDR. */
9910 xops[0] = stack_pointer_rtx;
9912 xops[2] = const0_rtx;
9913 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9915 fprintf (asm_out_file, "\tjmp\t");
9916 assemble_name_raw (asm_out_file, loop_lab);
9917 fputc ('\n', asm_out_file);
9919 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9924 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9925 to be generated in correct form. */
9927 ix86_finalize_stack_realign_flags (void)
9929 /* Check if stack realign is really needed after reload, and
9930 stores result in cfun */
9931 unsigned int incoming_stack_boundary
9932 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9933 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9934 unsigned int stack_realign = (incoming_stack_boundary
9935 < (current_function_is_leaf
9936 ? crtl->max_used_stack_slot_alignment
9937 : crtl->stack_alignment_needed));
9939 if (crtl->stack_realign_finalized)
9941 /* After stack_realign_needed is finalized, we can't no longer
9943 gcc_assert (crtl->stack_realign_needed == stack_realign);
9947 /* If the only reason for frame_pointer_needed is that we conservatively
9948 assumed stack realignment might be needed, but in the end nothing that
9949 needed the stack alignment had been spilled, clear frame_pointer_needed
9950 and say we don't need stack realignment. */
9953 && frame_pointer_needed
9954 && current_function_is_leaf
9955 && flag_omit_frame_pointer
9956 && current_function_sp_is_unchanging
9957 && !ix86_current_function_calls_tls_descriptor
9958 && !crtl->accesses_prior_frames
9959 && !cfun->calls_alloca
9960 && !crtl->calls_eh_return
9961 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
9962 && !ix86_frame_pointer_required ()
9963 && get_frame_size () == 0
9964 && ix86_nsaved_sseregs () == 0
9965 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9967 HARD_REG_SET set_up_by_prologue, prologue_used;
9970 CLEAR_HARD_REG_SET (prologue_used);
9971 CLEAR_HARD_REG_SET (set_up_by_prologue);
9972 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
9973 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
9974 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
9975 HARD_FRAME_POINTER_REGNUM);
9979 FOR_BB_INSNS (bb, insn)
9980 if (NONDEBUG_INSN_P (insn)
9981 && requires_stack_frame_p (insn, prologue_used,
9982 set_up_by_prologue))
9984 crtl->stack_realign_needed = stack_realign;
9985 crtl->stack_realign_finalized = true;
9990 frame_pointer_needed = false;
9991 stack_realign = false;
9992 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9993 crtl->stack_alignment_needed = incoming_stack_boundary;
9994 crtl->stack_alignment_estimated = incoming_stack_boundary;
9995 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9996 crtl->preferred_stack_boundary = incoming_stack_boundary;
9997 df_finish_pass (true);
9998 df_scan_alloc (NULL);
10000 df_compute_regs_ever_live (true);
10004 crtl->stack_realign_needed = stack_realign;
10005 crtl->stack_realign_finalized = true;
10008 /* Expand the prologue into a bunch of separate insns. */
10011 ix86_expand_prologue (void)
10013 struct machine_function *m = cfun->machine;
10016 struct ix86_frame frame;
10017 HOST_WIDE_INT allocate;
10018 bool int_registers_saved;
10019 bool sse_registers_saved;
10021 ix86_finalize_stack_realign_flags ();
10023 /* DRAP should not coexist with stack_realign_fp */
10024 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10026 memset (&m->fs, 0, sizeof (m->fs));
10028 /* Initialize CFA state for before the prologue. */
10029 m->fs.cfa_reg = stack_pointer_rtx;
10030 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10032 /* Track SP offset to the CFA. We continue tracking this after we've
10033 swapped the CFA register away from SP. In the case of re-alignment
10034 this is fudged; we're interested to offsets within the local frame. */
10035 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10036 m->fs.sp_valid = true;
10038 ix86_compute_frame_layout (&frame);
10040 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10042 /* We should have already generated an error for any use of
10043 ms_hook on a nested function. */
10044 gcc_checking_assert (!ix86_static_chain_on_stack);
10046 /* Check if profiling is active and we shall use profiling before
10047 prologue variant. If so sorry. */
10048 if (crtl->profile && flag_fentry != 0)
10049 sorry ("ms_hook_prologue attribute isn%'t compatible "
10050 "with -mfentry for 32-bit");
10052 /* In ix86_asm_output_function_label we emitted:
10053 8b ff movl.s %edi,%edi
10055 8b ec movl.s %esp,%ebp
10057 This matches the hookable function prologue in Win32 API
10058 functions in Microsoft Windows XP Service Pack 2 and newer.
10059 Wine uses this to enable Windows apps to hook the Win32 API
10060 functions provided by Wine.
10062 What that means is that we've already set up the frame pointer. */
10064 if (frame_pointer_needed
10065 && !(crtl->drap_reg && crtl->stack_realign_needed))
10069 /* We've decided to use the frame pointer already set up.
10070 Describe this to the unwinder by pretending that both
10071 push and mov insns happen right here.
10073 Putting the unwind info here at the end of the ms_hook
10074 is done so that we can make absolutely certain we get
10075 the required byte sequence at the start of the function,
10076 rather than relying on an assembler that can produce
10077 the exact encoding required.
10079 However it does mean (in the unpatched case) that we have
10080 a 1 insn window where the asynchronous unwind info is
10081 incorrect. However, if we placed the unwind info at
10082 its correct location we would have incorrect unwind info
10083 in the patched case. Which is probably all moot since
10084 I don't expect Wine generates dwarf2 unwind info for the
10085 system libraries that use this feature. */
10087 insn = emit_insn (gen_blockage ());
10089 push = gen_push (hard_frame_pointer_rtx);
10090 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10091 stack_pointer_rtx);
10092 RTX_FRAME_RELATED_P (push) = 1;
10093 RTX_FRAME_RELATED_P (mov) = 1;
10095 RTX_FRAME_RELATED_P (insn) = 1;
10096 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10097 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10099 /* Note that gen_push incremented m->fs.cfa_offset, even
10100 though we didn't emit the push insn here. */
10101 m->fs.cfa_reg = hard_frame_pointer_rtx;
10102 m->fs.fp_offset = m->fs.cfa_offset;
10103 m->fs.fp_valid = true;
10107 /* The frame pointer is not needed so pop %ebp again.
10108 This leaves us with a pristine state. */
10109 emit_insn (gen_pop (hard_frame_pointer_rtx));
10113 /* The first insn of a function that accepts its static chain on the
10114 stack is to push the register that would be filled in by a direct
10115 call. This insn will be skipped by the trampoline. */
10116 else if (ix86_static_chain_on_stack)
10118 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10119 emit_insn (gen_blockage ());
10121 /* We don't want to interpret this push insn as a register save,
10122 only as a stack adjustment. The real copy of the register as
10123 a save will be done later, if needed. */
10124 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10125 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10126 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10127 RTX_FRAME_RELATED_P (insn) = 1;
10130 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10131 of DRAP is needed and stack realignment is really needed after reload */
10132 if (stack_realign_drap)
10134 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10136 /* Only need to push parameter pointer reg if it is caller saved. */
10137 if (!call_used_regs[REGNO (crtl->drap_reg)])
10139 /* Push arg pointer reg */
10140 insn = emit_insn (gen_push (crtl->drap_reg));
10141 RTX_FRAME_RELATED_P (insn) = 1;
10144 /* Grab the argument pointer. */
10145 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10146 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10147 RTX_FRAME_RELATED_P (insn) = 1;
10148 m->fs.cfa_reg = crtl->drap_reg;
10149 m->fs.cfa_offset = 0;
10151 /* Align the stack. */
10152 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10154 GEN_INT (-align_bytes)));
10155 RTX_FRAME_RELATED_P (insn) = 1;
10157 /* Replicate the return address on the stack so that return
10158 address can be reached via (argp - 1) slot. This is needed
10159 to implement macro RETURN_ADDR_RTX and intrinsic function
10160 expand_builtin_return_addr etc. */
10161 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10162 t = gen_frame_mem (Pmode, t);
10163 insn = emit_insn (gen_push (t));
10164 RTX_FRAME_RELATED_P (insn) = 1;
10166 /* For the purposes of frame and register save area addressing,
10167 we've started over with a new frame. */
10168 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10169 m->fs.realigned = true;
10172 int_registers_saved = (frame.nregs == 0);
10173 sse_registers_saved = (frame.nsseregs == 0);
10175 if (frame_pointer_needed && !m->fs.fp_valid)
10177 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10178 slower on all targets. Also sdb doesn't like it. */
10179 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10180 RTX_FRAME_RELATED_P (insn) = 1;
10182 /* Push registers now, before setting the frame pointer
10184 if (!int_registers_saved
10186 && !frame.save_regs_using_mov)
10188 ix86_emit_save_regs ();
10189 int_registers_saved = true;
10190 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10193 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10195 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10196 RTX_FRAME_RELATED_P (insn) = 1;
10198 if (m->fs.cfa_reg == stack_pointer_rtx)
10199 m->fs.cfa_reg = hard_frame_pointer_rtx;
10200 m->fs.fp_offset = m->fs.sp_offset;
10201 m->fs.fp_valid = true;
10205 if (!int_registers_saved)
10207 /* If saving registers via PUSH, do so now. */
10208 if (!frame.save_regs_using_mov)
10210 ix86_emit_save_regs ();
10211 int_registers_saved = true;
10212 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10215 /* When using red zone we may start register saving before allocating
10216 the stack frame saving one cycle of the prologue. However, avoid
10217 doing this if we have to probe the stack; at least on x86_64 the
10218 stack probe can turn into a call that clobbers a red zone location. */
10219 else if (ix86_using_red_zone ()
10220 && (! TARGET_STACK_PROBE
10221 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10223 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10224 int_registers_saved = true;
10228 if (stack_realign_fp)
10230 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10231 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10233 /* The computation of the size of the re-aligned stack frame means
10234 that we must allocate the size of the register save area before
10235 performing the actual alignment. Otherwise we cannot guarantee
10236 that there's enough storage above the realignment point. */
10237 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10238 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10239 GEN_INT (m->fs.sp_offset
10240 - frame.sse_reg_save_offset),
10243 /* Align the stack. */
10244 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10246 GEN_INT (-align_bytes)));
10248 /* For the purposes of register save area addressing, the stack
10249 pointer is no longer valid. As for the value of sp_offset,
10250 see ix86_compute_frame_layout, which we need to match in order
10251 to pass verification of stack_pointer_offset at the end. */
10252 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10253 m->fs.sp_valid = false;
10256 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10258 if (flag_stack_usage_info)
10260 /* We start to count from ARG_POINTER. */
10261 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10263 /* If it was realigned, take into account the fake frame. */
10264 if (stack_realign_drap)
10266 if (ix86_static_chain_on_stack)
10267 stack_size += UNITS_PER_WORD;
10269 if (!call_used_regs[REGNO (crtl->drap_reg)])
10270 stack_size += UNITS_PER_WORD;
10272 /* This over-estimates by 1 minimal-stack-alignment-unit but
10273 mitigates that by counting in the new return address slot. */
10274 current_function_dynamic_stack_size
10275 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10278 current_function_static_stack_size = stack_size;
10281 /* On SEH target with very large frame size, allocate an area to save
10282 SSE registers (as the very large allocation won't be described). */
10284 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10285 && !sse_registers_saved)
10287 HOST_WIDE_INT sse_size =
10288 frame.sse_reg_save_offset - frame.reg_save_offset;
10290 gcc_assert (int_registers_saved);
10292 /* No need to do stack checking as the area will be immediately
10294 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10295 GEN_INT (-sse_size), -1,
10296 m->fs.cfa_reg == stack_pointer_rtx);
10297 allocate -= sse_size;
10298 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10299 sse_registers_saved = true;
10302 /* The stack has already been decremented by the instruction calling us
10303 so probe if the size is non-negative to preserve the protection area. */
10304 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10306 /* We expect the registers to be saved when probes are used. */
10307 gcc_assert (int_registers_saved);
10309 if (STACK_CHECK_MOVING_SP)
10311 ix86_adjust_stack_and_probe (allocate);
10316 HOST_WIDE_INT size = allocate;
10318 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10319 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10321 if (TARGET_STACK_PROBE)
10322 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10324 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10330 else if (!ix86_target_stack_probe ()
10331 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10333 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10334 GEN_INT (-allocate), -1,
10335 m->fs.cfa_reg == stack_pointer_rtx);
10339 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10341 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10343 bool eax_live = false;
10344 bool r10_live = false;
10347 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10348 if (!TARGET_64BIT_MS_ABI)
10349 eax_live = ix86_eax_live_at_start_p ();
10353 emit_insn (gen_push (eax));
10354 allocate -= UNITS_PER_WORD;
10358 r10 = gen_rtx_REG (Pmode, R10_REG);
10359 emit_insn (gen_push (r10));
10360 allocate -= UNITS_PER_WORD;
10363 emit_move_insn (eax, GEN_INT (allocate));
10364 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10366 /* Use the fact that AX still contains ALLOCATE. */
10367 adjust_stack_insn = (TARGET_64BIT
10368 ? gen_pro_epilogue_adjust_stack_di_sub
10369 : gen_pro_epilogue_adjust_stack_si_sub);
10371 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10372 stack_pointer_rtx, eax));
10374 /* Note that SEH directives need to continue tracking the stack
10375 pointer even after the frame pointer has been set up. */
10376 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10378 if (m->fs.cfa_reg == stack_pointer_rtx)
10379 m->fs.cfa_offset += allocate;
10381 RTX_FRAME_RELATED_P (insn) = 1;
10382 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10383 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10384 plus_constant (stack_pointer_rtx,
10387 m->fs.sp_offset += allocate;
10389 if (r10_live && eax_live)
10391 t = choose_baseaddr (m->fs.sp_offset - allocate);
10392 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10393 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10394 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10396 else if (eax_live || r10_live)
10398 t = choose_baseaddr (m->fs.sp_offset - allocate);
10399 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10402 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10404 /* If we havn't already set up the frame pointer, do so now. */
10405 if (frame_pointer_needed && !m->fs.fp_valid)
10407 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10408 GEN_INT (frame.stack_pointer_offset
10409 - frame.hard_frame_pointer_offset));
10410 insn = emit_insn (insn);
10411 RTX_FRAME_RELATED_P (insn) = 1;
10412 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10414 if (m->fs.cfa_reg == stack_pointer_rtx)
10415 m->fs.cfa_reg = hard_frame_pointer_rtx;
10416 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10417 m->fs.fp_valid = true;
10420 if (!int_registers_saved)
10421 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10422 if (!sse_registers_saved)
10423 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10425 pic_reg_used = false;
10426 if (pic_offset_table_rtx
10427 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10430 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10432 if (alt_pic_reg_used != INVALID_REGNUM)
10433 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10435 pic_reg_used = true;
10442 if (ix86_cmodel == CM_LARGE_PIC)
10444 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10445 rtx label = gen_label_rtx ();
10446 emit_label (label);
10447 LABEL_PRESERVE_P (label) = 1;
10448 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10449 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10450 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10451 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10452 pic_offset_table_rtx, tmp_reg));
10455 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10459 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10460 RTX_FRAME_RELATED_P (insn) = 1;
10461 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10465 /* In the pic_reg_used case, make sure that the got load isn't deleted
10466 when mcount needs it. Blockage to avoid call movement across mcount
10467 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10469 if (crtl->profile && !flag_fentry && pic_reg_used)
10470 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10472 if (crtl->drap_reg && !crtl->stack_realign_needed)
10474 /* vDRAP is setup but after reload it turns out stack realign
10475 isn't necessary, here we will emit prologue to setup DRAP
10476 without stack realign adjustment */
10477 t = choose_baseaddr (0);
10478 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10481 /* Prevent instructions from being scheduled into register save push
10482 sequence when access to the redzone area is done through frame pointer.
10483 The offset between the frame pointer and the stack pointer is calculated
10484 relative to the value of the stack pointer at the end of the function
10485 prologue, and moving instructions that access redzone area via frame
10486 pointer inside push sequence violates this assumption. */
10487 if (frame_pointer_needed && frame.red_zone_size)
10488 emit_insn (gen_memory_blockage ());
10490 /* Emit cld instruction if stringops are used in the function. */
10491 if (TARGET_CLD && ix86_current_function_needs_cld)
10492 emit_insn (gen_cld ());
10494 /* SEH requires that the prologue end within 256 bytes of the start of
10495 the function. Prevent instruction schedules that would extend that.
10496 Further, prevent alloca modifications to the stack pointer from being
10497 combined with prologue modifications. */
10499 emit_insn (gen_prologue_use (stack_pointer_rtx));
10502 /* Emit code to restore REG using a POP insn. */
10505 ix86_emit_restore_reg_using_pop (rtx reg)
10507 struct machine_function *m = cfun->machine;
10508 rtx insn = emit_insn (gen_pop (reg));
10510 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10511 m->fs.sp_offset -= UNITS_PER_WORD;
10513 if (m->fs.cfa_reg == crtl->drap_reg
10514 && REGNO (reg) == REGNO (crtl->drap_reg))
10516 /* Previously we'd represented the CFA as an expression
10517 like *(%ebp - 8). We've just popped that value from
10518 the stack, which means we need to reset the CFA to
10519 the drap register. This will remain until we restore
10520 the stack pointer. */
10521 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10522 RTX_FRAME_RELATED_P (insn) = 1;
10524 /* This means that the DRAP register is valid for addressing too. */
10525 m->fs.drap_valid = true;
10529 if (m->fs.cfa_reg == stack_pointer_rtx)
10531 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10532 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10533 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10534 RTX_FRAME_RELATED_P (insn) = 1;
10536 m->fs.cfa_offset -= UNITS_PER_WORD;
10539 /* When the frame pointer is the CFA, and we pop it, we are
10540 swapping back to the stack pointer as the CFA. This happens
10541 for stack frames that don't allocate other data, so we assume
10542 the stack pointer is now pointing at the return address, i.e.
10543 the function entry state, which makes the offset be 1 word. */
10544 if (reg == hard_frame_pointer_rtx)
10546 m->fs.fp_valid = false;
10547 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10549 m->fs.cfa_reg = stack_pointer_rtx;
10550 m->fs.cfa_offset -= UNITS_PER_WORD;
10552 add_reg_note (insn, REG_CFA_DEF_CFA,
10553 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10554 GEN_INT (m->fs.cfa_offset)));
10555 RTX_FRAME_RELATED_P (insn) = 1;
10560 /* Emit code to restore saved registers using POP insns. */
10563 ix86_emit_restore_regs_using_pop (void)
10565 unsigned int regno;
10567 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10568 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10569 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10572 /* Emit code and notes for the LEAVE instruction. */
10575 ix86_emit_leave (void)
10577 struct machine_function *m = cfun->machine;
10578 rtx insn = emit_insn (ix86_gen_leave ());
10580 ix86_add_queued_cfa_restore_notes (insn);
10582 gcc_assert (m->fs.fp_valid);
10583 m->fs.sp_valid = true;
10584 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10585 m->fs.fp_valid = false;
10587 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10589 m->fs.cfa_reg = stack_pointer_rtx;
10590 m->fs.cfa_offset = m->fs.sp_offset;
10592 add_reg_note (insn, REG_CFA_DEF_CFA,
10593 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10594 RTX_FRAME_RELATED_P (insn) = 1;
10596 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10600 /* Emit code to restore saved registers using MOV insns.
10601 First register is restored from CFA - CFA_OFFSET. */
10603 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10604 bool maybe_eh_return)
10606 struct machine_function *m = cfun->machine;
10607 unsigned int regno;
10609 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10610 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10612 rtx reg = gen_rtx_REG (Pmode, regno);
10615 mem = choose_baseaddr (cfa_offset);
10616 mem = gen_frame_mem (Pmode, mem);
10617 insn = emit_move_insn (reg, mem);
10619 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10621 /* Previously we'd represented the CFA as an expression
10622 like *(%ebp - 8). We've just popped that value from
10623 the stack, which means we need to reset the CFA to
10624 the drap register. This will remain until we restore
10625 the stack pointer. */
10626 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10627 RTX_FRAME_RELATED_P (insn) = 1;
10629 /* This means that the DRAP register is valid for addressing. */
10630 m->fs.drap_valid = true;
10633 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10635 cfa_offset -= UNITS_PER_WORD;
10639 /* Emit code to restore saved registers using MOV insns.
10640 First register is restored from CFA - CFA_OFFSET. */
10642 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10643 bool maybe_eh_return)
10645 unsigned int regno;
10647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10648 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10650 rtx reg = gen_rtx_REG (V4SFmode, regno);
10653 mem = choose_baseaddr (cfa_offset);
10654 mem = gen_rtx_MEM (V4SFmode, mem);
10655 set_mem_align (mem, 128);
10656 emit_move_insn (reg, mem);
10658 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10664 /* Emit vzeroupper if needed. */
10667 ix86_maybe_emit_epilogue_vzeroupper (void)
10669 if (TARGET_VZEROUPPER
10670 && !TREE_THIS_VOLATILE (cfun->decl)
10671 && !cfun->machine->caller_return_avx256_p)
10672 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10675 /* Restore function stack, frame, and registers. */
10678 ix86_expand_epilogue (int style)
10680 struct machine_function *m = cfun->machine;
10681 struct machine_frame_state frame_state_save = m->fs;
10682 struct ix86_frame frame;
10683 bool restore_regs_via_mov;
10686 ix86_finalize_stack_realign_flags ();
10687 ix86_compute_frame_layout (&frame);
10689 m->fs.sp_valid = (!frame_pointer_needed
10690 || (current_function_sp_is_unchanging
10691 && !stack_realign_fp));
10692 gcc_assert (!m->fs.sp_valid
10693 || m->fs.sp_offset == frame.stack_pointer_offset);
10695 /* The FP must be valid if the frame pointer is present. */
10696 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10697 gcc_assert (!m->fs.fp_valid
10698 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10700 /* We must have *some* valid pointer to the stack frame. */
10701 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10703 /* The DRAP is never valid at this point. */
10704 gcc_assert (!m->fs.drap_valid);
10706 /* See the comment about red zone and frame
10707 pointer usage in ix86_expand_prologue. */
10708 if (frame_pointer_needed && frame.red_zone_size)
10709 emit_insn (gen_memory_blockage ());
10711 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10712 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10714 /* Determine the CFA offset of the end of the red-zone. */
10715 m->fs.red_zone_offset = 0;
10716 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10718 /* The red-zone begins below the return address. */
10719 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10721 /* When the register save area is in the aligned portion of
10722 the stack, determine the maximum runtime displacement that
10723 matches up with the aligned frame. */
10724 if (stack_realign_drap)
10725 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10729 /* Special care must be taken for the normal return case of a function
10730 using eh_return: the eax and edx registers are marked as saved, but
10731 not restored along this path. Adjust the save location to match. */
10732 if (crtl->calls_eh_return && style != 2)
10733 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10735 /* EH_RETURN requires the use of moves to function properly. */
10736 if (crtl->calls_eh_return)
10737 restore_regs_via_mov = true;
10738 /* SEH requires the use of pops to identify the epilogue. */
10739 else if (TARGET_SEH)
10740 restore_regs_via_mov = false;
10741 /* If we're only restoring one register and sp is not valid then
10742 using a move instruction to restore the register since it's
10743 less work than reloading sp and popping the register. */
10744 else if (!m->fs.sp_valid && frame.nregs <= 1)
10745 restore_regs_via_mov = true;
10746 else if (TARGET_EPILOGUE_USING_MOVE
10747 && cfun->machine->use_fast_prologue_epilogue
10748 && (frame.nregs > 1
10749 || m->fs.sp_offset != frame.reg_save_offset))
10750 restore_regs_via_mov = true;
10751 else if (frame_pointer_needed
10753 && m->fs.sp_offset != frame.reg_save_offset)
10754 restore_regs_via_mov = true;
10755 else if (frame_pointer_needed
10756 && TARGET_USE_LEAVE
10757 && cfun->machine->use_fast_prologue_epilogue
10758 && frame.nregs == 1)
10759 restore_regs_via_mov = true;
10761 restore_regs_via_mov = false;
10763 if (restore_regs_via_mov || frame.nsseregs)
10765 /* Ensure that the entire register save area is addressable via
10766 the stack pointer, if we will restore via sp. */
10768 && m->fs.sp_offset > 0x7fffffff
10769 && !(m->fs.fp_valid || m->fs.drap_valid)
10770 && (frame.nsseregs + frame.nregs) != 0)
10772 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10773 GEN_INT (m->fs.sp_offset
10774 - frame.sse_reg_save_offset),
10776 m->fs.cfa_reg == stack_pointer_rtx);
10780 /* If there are any SSE registers to restore, then we have to do it
10781 via moves, since there's obviously no pop for SSE regs. */
10782 if (frame.nsseregs)
10783 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10786 if (restore_regs_via_mov)
10791 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10793 /* eh_return epilogues need %ecx added to the stack pointer. */
10796 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10798 /* Stack align doesn't work with eh_return. */
10799 gcc_assert (!stack_realign_drap);
10800 /* Neither does regparm nested functions. */
10801 gcc_assert (!ix86_static_chain_on_stack);
10803 if (frame_pointer_needed)
10805 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10806 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10807 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10809 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10810 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10812 /* Note that we use SA as a temporary CFA, as the return
10813 address is at the proper place relative to it. We
10814 pretend this happens at the FP restore insn because
10815 prior to this insn the FP would be stored at the wrong
10816 offset relative to SA, and after this insn we have no
10817 other reasonable register to use for the CFA. We don't
10818 bother resetting the CFA to the SP for the duration of
10819 the return insn. */
10820 add_reg_note (insn, REG_CFA_DEF_CFA,
10821 plus_constant (sa, UNITS_PER_WORD));
10822 ix86_add_queued_cfa_restore_notes (insn);
10823 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10824 RTX_FRAME_RELATED_P (insn) = 1;
10826 m->fs.cfa_reg = sa;
10827 m->fs.cfa_offset = UNITS_PER_WORD;
10828 m->fs.fp_valid = false;
10830 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10831 const0_rtx, style, false);
10835 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10836 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10837 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10838 ix86_add_queued_cfa_restore_notes (insn);
10840 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10841 if (m->fs.cfa_offset != UNITS_PER_WORD)
10843 m->fs.cfa_offset = UNITS_PER_WORD;
10844 add_reg_note (insn, REG_CFA_DEF_CFA,
10845 plus_constant (stack_pointer_rtx,
10847 RTX_FRAME_RELATED_P (insn) = 1;
10850 m->fs.sp_offset = UNITS_PER_WORD;
10851 m->fs.sp_valid = true;
10856 /* SEH requires that the function end with (1) a stack adjustment
10857 if necessary, (2) a sequence of pops, and (3) a return or
10858 jump instruction. Prevent insns from the function body from
10859 being scheduled into this sequence. */
10862 /* Prevent a catch region from being adjacent to the standard
10863 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10864 several other flags that would be interesting to test are
10866 if (flag_non_call_exceptions)
10867 emit_insn (gen_nops (const1_rtx));
10869 emit_insn (gen_blockage ());
10872 /* First step is to deallocate the stack frame so that we can
10873 pop the registers. Also do it on SEH target for very large
10874 frame as the emitted instructions aren't allowed by the ABI in
10876 if (!m->fs.sp_valid
10878 && (m->fs.sp_offset - frame.reg_save_offset
10879 >= SEH_MAX_FRAME_SIZE)))
10881 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10882 GEN_INT (m->fs.fp_offset
10883 - frame.reg_save_offset),
10886 else if (m->fs.sp_offset != frame.reg_save_offset)
10888 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10889 GEN_INT (m->fs.sp_offset
10890 - frame.reg_save_offset),
10892 m->fs.cfa_reg == stack_pointer_rtx);
10895 ix86_emit_restore_regs_using_pop ();
10898 /* If we used a stack pointer and haven't already got rid of it,
10900 if (m->fs.fp_valid)
10902 /* If the stack pointer is valid and pointing at the frame
10903 pointer store address, then we only need a pop. */
10904 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10905 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10906 /* Leave results in shorter dependency chains on CPUs that are
10907 able to grok it fast. */
10908 else if (TARGET_USE_LEAVE
10909 || optimize_function_for_size_p (cfun)
10910 || !cfun->machine->use_fast_prologue_epilogue)
10911 ix86_emit_leave ();
10914 pro_epilogue_adjust_stack (stack_pointer_rtx,
10915 hard_frame_pointer_rtx,
10916 const0_rtx, style, !using_drap);
10917 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10923 int param_ptr_offset = UNITS_PER_WORD;
10926 gcc_assert (stack_realign_drap);
10928 if (ix86_static_chain_on_stack)
10929 param_ptr_offset += UNITS_PER_WORD;
10930 if (!call_used_regs[REGNO (crtl->drap_reg)])
10931 param_ptr_offset += UNITS_PER_WORD;
10933 insn = emit_insn (gen_rtx_SET
10934 (VOIDmode, stack_pointer_rtx,
10935 gen_rtx_PLUS (Pmode,
10937 GEN_INT (-param_ptr_offset))));
10938 m->fs.cfa_reg = stack_pointer_rtx;
10939 m->fs.cfa_offset = param_ptr_offset;
10940 m->fs.sp_offset = param_ptr_offset;
10941 m->fs.realigned = false;
10943 add_reg_note (insn, REG_CFA_DEF_CFA,
10944 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10945 GEN_INT (param_ptr_offset)));
10946 RTX_FRAME_RELATED_P (insn) = 1;
10948 if (!call_used_regs[REGNO (crtl->drap_reg)])
10949 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10952 /* At this point the stack pointer must be valid, and we must have
10953 restored all of the registers. We may not have deallocated the
10954 entire stack frame. We've delayed this until now because it may
10955 be possible to merge the local stack deallocation with the
10956 deallocation forced by ix86_static_chain_on_stack. */
10957 gcc_assert (m->fs.sp_valid);
10958 gcc_assert (!m->fs.fp_valid);
10959 gcc_assert (!m->fs.realigned);
10960 if (m->fs.sp_offset != UNITS_PER_WORD)
10962 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10963 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10967 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10969 /* Sibcall epilogues don't want a return instruction. */
10972 m->fs = frame_state_save;
10976 /* Emit vzeroupper if needed. */
10977 ix86_maybe_emit_epilogue_vzeroupper ();
10979 if (crtl->args.pops_args && crtl->args.size)
10981 rtx popc = GEN_INT (crtl->args.pops_args);
10983 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10984 address, do explicit add, and jump indirectly to the caller. */
10986 if (crtl->args.pops_args >= 65536)
10988 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10991 /* There is no "pascal" calling convention in any 64bit ABI. */
10992 gcc_assert (!TARGET_64BIT);
10994 insn = emit_insn (gen_pop (ecx));
10995 m->fs.cfa_offset -= UNITS_PER_WORD;
10996 m->fs.sp_offset -= UNITS_PER_WORD;
10998 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10999 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11000 add_reg_note (insn, REG_CFA_REGISTER,
11001 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11002 RTX_FRAME_RELATED_P (insn) = 1;
11004 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11006 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11009 emit_jump_insn (gen_simple_return_pop_internal (popc));
11012 emit_jump_insn (gen_simple_return_internal ());
11014 /* Restore the state back to the state from the prologue,
11015 so that it's correct for the next epilogue. */
11016 m->fs = frame_state_save;
11019 /* Reset from the function's potential modifications. */
11022 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11023 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11025 if (pic_offset_table_rtx)
11026 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11028 /* Mach-O doesn't support labels at the end of objects, so if
11029 it looks like we might want one, insert a NOP. */
11031 rtx insn = get_last_insn ();
11032 rtx deleted_debug_label = NULL_RTX;
11035 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11037 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11038 notes only, instead set their CODE_LABEL_NUMBER to -1,
11039 otherwise there would be code generation differences
11040 in between -g and -g0. */
11041 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11042 deleted_debug_label = insn;
11043 insn = PREV_INSN (insn);
11048 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11049 fputs ("\tnop\n", file);
11050 else if (deleted_debug_label)
11051 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11052 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11053 CODE_LABEL_NUMBER (insn) = -1;
11059 /* Return a scratch register to use in the split stack prologue. The
11060 split stack prologue is used for -fsplit-stack. It is the first
11061 instructions in the function, even before the regular prologue.
11062 The scratch register can be any caller-saved register which is not
11063 used for parameters or for the static chain. */
11065 static unsigned int
11066 split_stack_prologue_scratch_regno (void)
11075 is_fastcall = (lookup_attribute ("fastcall",
11076 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11078 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11082 if (DECL_STATIC_CHAIN (cfun->decl))
11084 sorry ("-fsplit-stack does not support fastcall with "
11085 "nested function");
11086 return INVALID_REGNUM;
11090 else if (regparm < 3)
11092 if (!DECL_STATIC_CHAIN (cfun->decl))
11098 sorry ("-fsplit-stack does not support 2 register "
11099 " parameters for a nested function");
11100 return INVALID_REGNUM;
11107 /* FIXME: We could make this work by pushing a register
11108 around the addition and comparison. */
11109 sorry ("-fsplit-stack does not support 3 register parameters");
11110 return INVALID_REGNUM;
11115 /* A SYMBOL_REF for the function which allocates new stackspace for
11118 static GTY(()) rtx split_stack_fn;
11120 /* A SYMBOL_REF for the more stack function when using the large
11123 static GTY(()) rtx split_stack_fn_large;
11125 /* Handle -fsplit-stack. These are the first instructions in the
11126 function, even before the regular prologue. */
11129 ix86_expand_split_stack_prologue (void)
11131 struct ix86_frame frame;
11132 HOST_WIDE_INT allocate;
11133 unsigned HOST_WIDE_INT args_size;
11134 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11135 rtx scratch_reg = NULL_RTX;
11136 rtx varargs_label = NULL_RTX;
11139 gcc_assert (flag_split_stack && reload_completed);
11141 ix86_finalize_stack_realign_flags ();
11142 ix86_compute_frame_layout (&frame);
11143 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11145 /* This is the label we will branch to if we have enough stack
11146 space. We expect the basic block reordering pass to reverse this
11147 branch if optimizing, so that we branch in the unlikely case. */
11148 label = gen_label_rtx ();
11150 /* We need to compare the stack pointer minus the frame size with
11151 the stack boundary in the TCB. The stack boundary always gives
11152 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11153 can compare directly. Otherwise we need to do an addition. */
11155 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11156 UNSPEC_STACK_CHECK);
11157 limit = gen_rtx_CONST (Pmode, limit);
11158 limit = gen_rtx_MEM (Pmode, limit);
11159 if (allocate < SPLIT_STACK_AVAILABLE)
11160 current = stack_pointer_rtx;
11163 unsigned int scratch_regno;
11166 /* We need a scratch register to hold the stack pointer minus
11167 the required frame size. Since this is the very start of the
11168 function, the scratch register can be any caller-saved
11169 register which is not used for parameters. */
11170 offset = GEN_INT (- allocate);
11171 scratch_regno = split_stack_prologue_scratch_regno ();
11172 if (scratch_regno == INVALID_REGNUM)
11174 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11175 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11177 /* We don't use ix86_gen_add3 in this case because it will
11178 want to split to lea, but when not optimizing the insn
11179 will not be split after this point. */
11180 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11181 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11186 emit_move_insn (scratch_reg, offset);
11187 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11188 stack_pointer_rtx));
11190 current = scratch_reg;
11193 ix86_expand_branch (GEU, current, limit, label);
11194 jump_insn = get_last_insn ();
11195 JUMP_LABEL (jump_insn) = label;
11197 /* Mark the jump as very likely to be taken. */
11198 add_reg_note (jump_insn, REG_BR_PROB,
11199 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11201 if (split_stack_fn == NULL_RTX)
11202 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11203 fn = split_stack_fn;
11205 /* Get more stack space. We pass in the desired stack space and the
11206 size of the arguments to copy to the new stack. In 32-bit mode
11207 we push the parameters; __morestack will return on a new stack
11208 anyhow. In 64-bit mode we pass the parameters in r10 and
11210 allocate_rtx = GEN_INT (allocate);
11211 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11212 call_fusage = NULL_RTX;
11217 reg10 = gen_rtx_REG (Pmode, R10_REG);
11218 reg11 = gen_rtx_REG (Pmode, R11_REG);
11220 /* If this function uses a static chain, it will be in %r10.
11221 Preserve it across the call to __morestack. */
11222 if (DECL_STATIC_CHAIN (cfun->decl))
11226 rax = gen_rtx_REG (Pmode, AX_REG);
11227 emit_move_insn (rax, reg10);
11228 use_reg (&call_fusage, rax);
11231 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11233 HOST_WIDE_INT argval;
11235 /* When using the large model we need to load the address
11236 into a register, and we've run out of registers. So we
11237 switch to a different calling convention, and we call a
11238 different function: __morestack_large. We pass the
11239 argument size in the upper 32 bits of r10 and pass the
11240 frame size in the lower 32 bits. */
11241 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11242 gcc_assert ((args_size & 0xffffffff) == args_size);
11244 if (split_stack_fn_large == NULL_RTX)
11245 split_stack_fn_large =
11246 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11248 if (ix86_cmodel == CM_LARGE_PIC)
11252 label = gen_label_rtx ();
11253 emit_label (label);
11254 LABEL_PRESERVE_P (label) = 1;
11255 emit_insn (gen_set_rip_rex64 (reg10, label));
11256 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11257 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11258 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11260 x = gen_rtx_CONST (Pmode, x);
11261 emit_move_insn (reg11, x);
11262 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11263 x = gen_const_mem (Pmode, x);
11264 emit_move_insn (reg11, x);
11267 emit_move_insn (reg11, split_stack_fn_large);
11271 argval = ((args_size << 16) << 16) + allocate;
11272 emit_move_insn (reg10, GEN_INT (argval));
11276 emit_move_insn (reg10, allocate_rtx);
11277 emit_move_insn (reg11, GEN_INT (args_size));
11278 use_reg (&call_fusage, reg11);
11281 use_reg (&call_fusage, reg10);
11285 emit_insn (gen_push (GEN_INT (args_size)));
11286 emit_insn (gen_push (allocate_rtx));
11288 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11289 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11291 add_function_usage_to (call_insn, call_fusage);
11293 /* In order to make call/return prediction work right, we now need
11294 to execute a return instruction. See
11295 libgcc/config/i386/morestack.S for the details on how this works.
11297 For flow purposes gcc must not see this as a return
11298 instruction--we need control flow to continue at the subsequent
11299 label. Therefore, we use an unspec. */
11300 gcc_assert (crtl->args.pops_args < 65536);
11301 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11303 /* If we are in 64-bit mode and this function uses a static chain,
11304 we saved %r10 in %rax before calling _morestack. */
11305 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11306 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11307 gen_rtx_REG (Pmode, AX_REG));
11309 /* If this function calls va_start, we need to store a pointer to
11310 the arguments on the old stack, because they may not have been
11311 all copied to the new stack. At this point the old stack can be
11312 found at the frame pointer value used by __morestack, because
11313 __morestack has set that up before calling back to us. Here we
11314 store that pointer in a scratch register, and in
11315 ix86_expand_prologue we store the scratch register in a stack
11317 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11319 unsigned int scratch_regno;
11323 scratch_regno = split_stack_prologue_scratch_regno ();
11324 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11325 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11329 return address within this function
11330 return address of caller of this function
11332 So we add three words to get to the stack arguments.
11336 return address within this function
11337 first argument to __morestack
11338 second argument to __morestack
11339 return address of caller of this function
11341 So we add five words to get to the stack arguments.
11343 words = TARGET_64BIT ? 3 : 5;
11344 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11345 gen_rtx_PLUS (Pmode, frame_reg,
11346 GEN_INT (words * UNITS_PER_WORD))));
11348 varargs_label = gen_label_rtx ();
11349 emit_jump_insn (gen_jump (varargs_label));
11350 JUMP_LABEL (get_last_insn ()) = varargs_label;
11355 emit_label (label);
11356 LABEL_NUSES (label) = 1;
11358 /* If this function calls va_start, we now have to set the scratch
11359 register for the case where we do not call __morestack. In this
11360 case we need to set it based on the stack pointer. */
11361 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11363 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11364 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11365 GEN_INT (UNITS_PER_WORD))));
11367 emit_label (varargs_label);
11368 LABEL_NUSES (varargs_label) = 1;
11372 /* We may have to tell the dataflow pass that the split stack prologue
11373 is initializing a scratch register. */
11376 ix86_live_on_entry (bitmap regs)
11378 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11380 gcc_assert (flag_split_stack);
11381 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11385 /* Determine if op is suitable SUBREG RTX for address. */
11388 ix86_address_subreg_operand (rtx op)
11390 enum machine_mode mode;
11395 mode = GET_MODE (op);
11397 if (GET_MODE_CLASS (mode) != MODE_INT)
11400 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11401 failures when the register is one word out of a two word structure. */
11402 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11405 /* simplify_subreg does not handle stack pointer. */
11406 if (REGNO (op) == STACK_POINTER_REGNUM)
11409 /* Allow only SUBREGs of non-eliminable hard registers. */
11410 return register_no_elim_operand (op, mode);
11413 /* Extract the parts of an RTL expression that is a valid memory address
11414 for an instruction. Return 0 if the structure of the address is
11415 grossly off. Return -1 if the address contains ASHIFT, so it is not
11416 strictly valid, but still used for computing length of lea instruction. */
11419 ix86_decompose_address (rtx addr, struct ix86_address *out)
11421 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11422 rtx base_reg, index_reg;
11423 HOST_WIDE_INT scale = 1;
11424 rtx scale_rtx = NULL_RTX;
11427 enum ix86_address_seg seg = SEG_DEFAULT;
11429 /* Allow zero-extended SImode addresses,
11430 they will be emitted with addr32 prefix. */
11431 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11433 if (GET_CODE (addr) == ZERO_EXTEND
11434 && GET_MODE (XEXP (addr, 0)) == SImode)
11436 addr = XEXP (addr, 0);
11437 if (CONST_INT_P (addr))
11440 else if (GET_CODE (addr) == AND
11441 && const_32bit_mask (XEXP (addr, 1), DImode))
11443 addr = XEXP (addr, 0);
11445 /* Adjust SUBREGs. */
11446 if (GET_CODE (addr) == SUBREG
11447 && GET_MODE (SUBREG_REG (addr)) == SImode)
11449 addr = SUBREG_REG (addr);
11450 if (CONST_INT_P (addr))
11453 else if (GET_MODE (addr) == DImode)
11454 addr = gen_rtx_SUBREG (SImode, addr, 0);
11455 else if (GET_MODE (addr) != VOIDmode)
11460 /* Allow SImode subregs of DImode addresses,
11461 they will be emitted with addr32 prefix. */
11462 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11464 if (GET_CODE (addr) == SUBREG
11465 && GET_MODE (SUBREG_REG (addr)) == DImode)
11467 addr = SUBREG_REG (addr);
11468 if (CONST_INT_P (addr))
11475 else if (GET_CODE (addr) == SUBREG)
11477 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11482 else if (GET_CODE (addr) == PLUS)
11484 rtx addends[4], op;
11492 addends[n++] = XEXP (op, 1);
11495 while (GET_CODE (op) == PLUS);
11500 for (i = n; i >= 0; --i)
11503 switch (GET_CODE (op))
11508 index = XEXP (op, 0);
11509 scale_rtx = XEXP (op, 1);
11515 index = XEXP (op, 0);
11516 tmp = XEXP (op, 1);
11517 if (!CONST_INT_P (tmp))
11519 scale = INTVAL (tmp);
11520 if ((unsigned HOST_WIDE_INT) scale > 3)
11522 scale = 1 << scale;
11526 if (XINT (op, 1) == UNSPEC_TP
11527 && TARGET_TLS_DIRECT_SEG_REFS
11528 && seg == SEG_DEFAULT)
11529 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11535 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11562 else if (GET_CODE (addr) == MULT)
11564 index = XEXP (addr, 0); /* index*scale */
11565 scale_rtx = XEXP (addr, 1);
11567 else if (GET_CODE (addr) == ASHIFT)
11569 /* We're called for lea too, which implements ashift on occasion. */
11570 index = XEXP (addr, 0);
11571 tmp = XEXP (addr, 1);
11572 if (!CONST_INT_P (tmp))
11574 scale = INTVAL (tmp);
11575 if ((unsigned HOST_WIDE_INT) scale > 3)
11577 scale = 1 << scale;
11580 else if (CONST_INT_P (addr))
11582 if (!x86_64_immediate_operand (addr, VOIDmode))
11585 /* Constant addresses are sign extended to 64bit, we have to
11586 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11588 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11594 disp = addr; /* displacement */
11600 else if (GET_CODE (index) == SUBREG
11601 && ix86_address_subreg_operand (SUBREG_REG (index)))
11607 /* Extract the integral value of scale. */
11610 if (!CONST_INT_P (scale_rtx))
11612 scale = INTVAL (scale_rtx);
11615 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11616 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11618 /* Avoid useless 0 displacement. */
11619 if (disp == const0_rtx && (base || index))
11622 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11623 if (base_reg && index_reg && scale == 1
11624 && (index_reg == arg_pointer_rtx
11625 || index_reg == frame_pointer_rtx
11626 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11629 tmp = base, base = index, index = tmp;
11630 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11633 /* Special case: %ebp cannot be encoded as a base without a displacement.
11637 && (base_reg == hard_frame_pointer_rtx
11638 || base_reg == frame_pointer_rtx
11639 || base_reg == arg_pointer_rtx
11640 || (REG_P (base_reg)
11641 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11642 || REGNO (base_reg) == R13_REG))))
11645 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11646 Avoid this by transforming to [%esi+0].
11647 Reload calls address legitimization without cfun defined, so we need
11648 to test cfun for being non-NULL. */
11649 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11650 && base_reg && !index_reg && !disp
11651 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11654 /* Special case: encode reg+reg instead of reg*2. */
11655 if (!base && index && scale == 2)
11656 base = index, base_reg = index_reg, scale = 1;
11658 /* Special case: scaling cannot be encoded without base or displacement. */
11659 if (!base && !disp && index && scale != 1)
11663 out->index = index;
11665 out->scale = scale;
11671 /* Return cost of the memory address x.
11672 For i386, it is better to use a complex address than let gcc copy
11673 the address into a reg and make a new pseudo. But not if the address
11674 requires to two regs - that would mean more pseudos with longer
11677 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11679 struct ix86_address parts;
11681 int ok = ix86_decompose_address (x, &parts);
11685 if (parts.base && GET_CODE (parts.base) == SUBREG)
11686 parts.base = SUBREG_REG (parts.base);
11687 if (parts.index && GET_CODE (parts.index) == SUBREG)
11688 parts.index = SUBREG_REG (parts.index);
11690 /* Attempt to minimize number of registers in the address. */
11692 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11694 && (!REG_P (parts.index)
11695 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11699 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11701 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11702 && parts.base != parts.index)
11705 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11706 since it's predecode logic can't detect the length of instructions
11707 and it degenerates to vector decoded. Increase cost of such
11708 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11709 to split such addresses or even refuse such addresses at all.
11711 Following addressing modes are affected:
11716 The first and last case may be avoidable by explicitly coding the zero in
11717 memory address, but I don't have AMD-K6 machine handy to check this
11721 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11722 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11723 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11729 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11730 this is used for to form addresses to local data when -fPIC is in
11734 darwin_local_data_pic (rtx disp)
11736 return (GET_CODE (disp) == UNSPEC
11737 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11740 /* Determine if a given RTX is a valid constant. We already know this
11741 satisfies CONSTANT_P. */
11744 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11746 switch (GET_CODE (x))
11751 if (GET_CODE (x) == PLUS)
11753 if (!CONST_INT_P (XEXP (x, 1)))
11758 if (TARGET_MACHO && darwin_local_data_pic (x))
11761 /* Only some unspecs are valid as "constants". */
11762 if (GET_CODE (x) == UNSPEC)
11763 switch (XINT (x, 1))
11766 case UNSPEC_GOTOFF:
11767 case UNSPEC_PLTOFF:
11768 return TARGET_64BIT;
11770 case UNSPEC_NTPOFF:
11771 x = XVECEXP (x, 0, 0);
11772 return (GET_CODE (x) == SYMBOL_REF
11773 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11774 case UNSPEC_DTPOFF:
11775 x = XVECEXP (x, 0, 0);
11776 return (GET_CODE (x) == SYMBOL_REF
11777 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11782 /* We must have drilled down to a symbol. */
11783 if (GET_CODE (x) == LABEL_REF)
11785 if (GET_CODE (x) != SYMBOL_REF)
11790 /* TLS symbols are never valid. */
11791 if (SYMBOL_REF_TLS_MODEL (x))
11794 /* DLLIMPORT symbols are never valid. */
11795 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11796 && SYMBOL_REF_DLLIMPORT_P (x))
11800 /* mdynamic-no-pic */
11801 if (MACHO_DYNAMIC_NO_PIC_P)
11802 return machopic_symbol_defined_p (x);
11807 if (GET_MODE (x) == TImode
11808 && x != CONST0_RTX (TImode)
11814 if (!standard_sse_constant_p (x))
11821 /* Otherwise we handle everything else in the move patterns. */
11825 /* Determine if it's legal to put X into the constant pool. This
11826 is not possible for the address of thread-local symbols, which
11827 is checked above. */
11830 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11832 /* We can always put integral constants and vectors in memory. */
11833 switch (GET_CODE (x))
11843 return !ix86_legitimate_constant_p (mode, x);
11847 /* Nonzero if the constant value X is a legitimate general operand
11848 when generating PIC code. It is given that flag_pic is on and
11849 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11852 legitimate_pic_operand_p (rtx x)
11856 switch (GET_CODE (x))
11859 inner = XEXP (x, 0);
11860 if (GET_CODE (inner) == PLUS
11861 && CONST_INT_P (XEXP (inner, 1)))
11862 inner = XEXP (inner, 0);
11864 /* Only some unspecs are valid as "constants". */
11865 if (GET_CODE (inner) == UNSPEC)
11866 switch (XINT (inner, 1))
11869 case UNSPEC_GOTOFF:
11870 case UNSPEC_PLTOFF:
11871 return TARGET_64BIT;
11873 x = XVECEXP (inner, 0, 0);
11874 return (GET_CODE (x) == SYMBOL_REF
11875 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11876 case UNSPEC_MACHOPIC_OFFSET:
11877 return legitimate_pic_address_disp_p (x);
11885 return legitimate_pic_address_disp_p (x);
11892 /* Determine if a given CONST RTX is a valid memory displacement
11896 legitimate_pic_address_disp_p (rtx disp)
11900 /* In 64bit mode we can allow direct addresses of symbols and labels
11901 when they are not dynamic symbols. */
11904 rtx op0 = disp, op1;
11906 switch (GET_CODE (disp))
11912 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11914 op0 = XEXP (XEXP (disp, 0), 0);
11915 op1 = XEXP (XEXP (disp, 0), 1);
11916 if (!CONST_INT_P (op1)
11917 || INTVAL (op1) >= 16*1024*1024
11918 || INTVAL (op1) < -16*1024*1024)
11920 if (GET_CODE (op0) == LABEL_REF)
11922 if (GET_CODE (op0) == CONST
11923 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11924 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11926 if (GET_CODE (op0) == UNSPEC
11927 && XINT (op0, 1) == UNSPEC_PCREL)
11929 if (GET_CODE (op0) != SYMBOL_REF)
11934 /* TLS references should always be enclosed in UNSPEC. */
11935 if (SYMBOL_REF_TLS_MODEL (op0))
11937 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11938 && ix86_cmodel != CM_LARGE_PIC)
11946 if (GET_CODE (disp) != CONST)
11948 disp = XEXP (disp, 0);
11952 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11953 of GOT tables. We should not need these anyway. */
11954 if (GET_CODE (disp) != UNSPEC
11955 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11956 && XINT (disp, 1) != UNSPEC_GOTOFF
11957 && XINT (disp, 1) != UNSPEC_PCREL
11958 && XINT (disp, 1) != UNSPEC_PLTOFF))
11961 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11962 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11968 if (GET_CODE (disp) == PLUS)
11970 if (!CONST_INT_P (XEXP (disp, 1)))
11972 disp = XEXP (disp, 0);
11976 if (TARGET_MACHO && darwin_local_data_pic (disp))
11979 if (GET_CODE (disp) != UNSPEC)
11982 switch (XINT (disp, 1))
11987 /* We need to check for both symbols and labels because VxWorks loads
11988 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11990 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11991 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11992 case UNSPEC_GOTOFF:
11993 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11994 While ABI specify also 32bit relocation but we don't produce it in
11995 small PIC model at all. */
11996 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11997 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11999 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12001 case UNSPEC_GOTTPOFF:
12002 case UNSPEC_GOTNTPOFF:
12003 case UNSPEC_INDNTPOFF:
12006 disp = XVECEXP (disp, 0, 0);
12007 return (GET_CODE (disp) == SYMBOL_REF
12008 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12009 case UNSPEC_NTPOFF:
12010 disp = XVECEXP (disp, 0, 0);
12011 return (GET_CODE (disp) == SYMBOL_REF
12012 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12013 case UNSPEC_DTPOFF:
12014 disp = XVECEXP (disp, 0, 0);
12015 return (GET_CODE (disp) == SYMBOL_REF
12016 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12022 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12023 replace the input X, or the original X if no replacement is called for.
12024 The output parameter *WIN is 1 if the calling macro should goto WIN,
12025 0 if it should not. */
12028 ix86_legitimize_reload_address (rtx x,
12029 enum machine_mode mode ATTRIBUTE_UNUSED,
12030 int opnum, int type,
12031 int ind_levels ATTRIBUTE_UNUSED)
12033 /* Reload can generate:
12035 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12039 This RTX is rejected from ix86_legitimate_address_p due to
12040 non-strictness of base register 97. Following this rejection,
12041 reload pushes all three components into separate registers,
12042 creating invalid memory address RTX.
12044 Following code reloads only the invalid part of the
12045 memory address RTX. */
12047 if (GET_CODE (x) == PLUS
12048 && REG_P (XEXP (x, 1))
12049 && GET_CODE (XEXP (x, 0)) == PLUS
12050 && REG_P (XEXP (XEXP (x, 0), 1)))
12053 bool something_reloaded = false;
12055 base = XEXP (XEXP (x, 0), 1);
12056 if (!REG_OK_FOR_BASE_STRICT_P (base))
12058 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12059 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12060 opnum, (enum reload_type)type);
12061 something_reloaded = true;
12064 index = XEXP (x, 1);
12065 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12067 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12068 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12069 opnum, (enum reload_type)type);
12070 something_reloaded = true;
12073 gcc_assert (something_reloaded);
12080 /* Recognizes RTL expressions that are valid memory addresses for an
12081 instruction. The MODE argument is the machine mode for the MEM
12082 expression that wants to use this address.
12084 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12085 convert common non-canonical forms to canonical form so that they will
12089 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12090 rtx addr, bool strict)
12092 struct ix86_address parts;
12093 rtx base, index, disp;
12094 HOST_WIDE_INT scale;
12096 if (ix86_decompose_address (addr, &parts) <= 0)
12097 /* Decomposition failed. */
12101 index = parts.index;
12103 scale = parts.scale;
12105 /* Validate base register. */
12112 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12113 reg = SUBREG_REG (base);
12115 /* Base is not a register. */
12118 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12121 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12122 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12123 /* Base is not valid. */
12127 /* Validate index register. */
12134 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12135 reg = SUBREG_REG (index);
12137 /* Index is not a register. */
12140 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12143 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12144 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12145 /* Index is not valid. */
12149 /* Index and base should have the same mode. */
12151 && GET_MODE (base) != GET_MODE (index))
12154 /* Validate scale factor. */
12158 /* Scale without index. */
12161 if (scale != 2 && scale != 4 && scale != 8)
12162 /* Scale is not a valid multiplier. */
12166 /* Validate displacement. */
12169 if (GET_CODE (disp) == CONST
12170 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12171 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12172 switch (XINT (XEXP (disp, 0), 1))
12174 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12175 used. While ABI specify also 32bit relocations, we don't produce
12176 them at all and use IP relative instead. */
12178 case UNSPEC_GOTOFF:
12179 gcc_assert (flag_pic);
12181 goto is_legitimate_pic;
12183 /* 64bit address unspec. */
12186 case UNSPEC_GOTPCREL:
12188 gcc_assert (flag_pic);
12189 goto is_legitimate_pic;
12191 case UNSPEC_GOTTPOFF:
12192 case UNSPEC_GOTNTPOFF:
12193 case UNSPEC_INDNTPOFF:
12194 case UNSPEC_NTPOFF:
12195 case UNSPEC_DTPOFF:
12198 case UNSPEC_STACK_CHECK:
12199 gcc_assert (flag_split_stack);
12203 /* Invalid address unspec. */
12207 else if (SYMBOLIC_CONST (disp)
12211 && MACHOPIC_INDIRECT
12212 && !machopic_operand_p (disp)
12218 if (TARGET_64BIT && (index || base))
12220 /* foo@dtpoff(%rX) is ok. */
12221 if (GET_CODE (disp) != CONST
12222 || GET_CODE (XEXP (disp, 0)) != PLUS
12223 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12224 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12225 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12226 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12227 /* Non-constant pic memory reference. */
12230 else if ((!TARGET_MACHO || flag_pic)
12231 && ! legitimate_pic_address_disp_p (disp))
12232 /* Displacement is an invalid pic construct. */
12235 else if (MACHO_DYNAMIC_NO_PIC_P
12236 && !ix86_legitimate_constant_p (Pmode, disp))
12237 /* displacment must be referenced via non_lazy_pointer */
12241 /* This code used to verify that a symbolic pic displacement
12242 includes the pic_offset_table_rtx register.
12244 While this is good idea, unfortunately these constructs may
12245 be created by "adds using lea" optimization for incorrect
12254 This code is nonsensical, but results in addressing
12255 GOT table with pic_offset_table_rtx base. We can't
12256 just refuse it easily, since it gets matched by
12257 "addsi3" pattern, that later gets split to lea in the
12258 case output register differs from input. While this
12259 can be handled by separate addsi pattern for this case
12260 that never results in lea, this seems to be easier and
12261 correct fix for crash to disable this test. */
12263 else if (GET_CODE (disp) != LABEL_REF
12264 && !CONST_INT_P (disp)
12265 && (GET_CODE (disp) != CONST
12266 || !ix86_legitimate_constant_p (Pmode, disp))
12267 && (GET_CODE (disp) != SYMBOL_REF
12268 || !ix86_legitimate_constant_p (Pmode, disp)))
12269 /* Displacement is not constant. */
12271 else if (TARGET_64BIT
12272 && !x86_64_immediate_operand (disp, VOIDmode))
12273 /* Displacement is out of range. */
12277 /* Everything looks valid. */
12281 /* Determine if a given RTX is a valid constant address. */
12284 constant_address_p (rtx x)
12286 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12289 /* Return a unique alias set for the GOT. */
12291 static alias_set_type
12292 ix86_GOT_alias_set (void)
12294 static alias_set_type set = -1;
12296 set = new_alias_set ();
12300 /* Return a legitimate reference for ORIG (an address) using the
12301 register REG. If REG is 0, a new pseudo is generated.
12303 There are two types of references that must be handled:
12305 1. Global data references must load the address from the GOT, via
12306 the PIC reg. An insn is emitted to do this load, and the reg is
12309 2. Static data references, constant pool addresses, and code labels
12310 compute the address as an offset from the GOT, whose base is in
12311 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12312 differentiate them from global data objects. The returned
12313 address is the PIC reg + an unspec constant.
12315 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12316 reg also appears in the address. */
12319 legitimize_pic_address (rtx orig, rtx reg)
12322 rtx new_rtx = orig;
12326 if (TARGET_MACHO && !TARGET_64BIT)
12329 reg = gen_reg_rtx (Pmode);
12330 /* Use the generic Mach-O PIC machinery. */
12331 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12335 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12337 else if (TARGET_64BIT
12338 && ix86_cmodel != CM_SMALL_PIC
12339 && gotoff_operand (addr, Pmode))
12342 /* This symbol may be referenced via a displacement from the PIC
12343 base address (@GOTOFF). */
12345 if (reload_in_progress)
12346 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12347 if (GET_CODE (addr) == CONST)
12348 addr = XEXP (addr, 0);
12349 if (GET_CODE (addr) == PLUS)
12351 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12353 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12356 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12357 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12359 tmpreg = gen_reg_rtx (Pmode);
12362 emit_move_insn (tmpreg, new_rtx);
12366 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12367 tmpreg, 1, OPTAB_DIRECT);
12370 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12372 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12374 /* This symbol may be referenced via a displacement from the PIC
12375 base address (@GOTOFF). */
12377 if (reload_in_progress)
12378 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12379 if (GET_CODE (addr) == CONST)
12380 addr = XEXP (addr, 0);
12381 if (GET_CODE (addr) == PLUS)
12383 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12385 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12388 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12389 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12390 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12394 emit_move_insn (reg, new_rtx);
12398 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12399 /* We can't use @GOTOFF for text labels on VxWorks;
12400 see gotoff_operand. */
12401 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12403 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12405 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12406 return legitimize_dllimport_symbol (addr, true);
12407 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12408 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12409 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12411 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12412 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12416 /* For x64 PE-COFF there is no GOT table. So we use address
12418 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12420 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12421 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12424 reg = gen_reg_rtx (Pmode);
12425 emit_move_insn (reg, new_rtx);
12428 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12430 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12431 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12432 new_rtx = gen_const_mem (Pmode, new_rtx);
12433 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12436 reg = gen_reg_rtx (Pmode);
12437 /* Use directly gen_movsi, otherwise the address is loaded
12438 into register for CSE. We don't want to CSE this addresses,
12439 instead we CSE addresses from the GOT table, so skip this. */
12440 emit_insn (gen_movsi (reg, new_rtx));
12445 /* This symbol must be referenced via a load from the
12446 Global Offset Table (@GOT). */
12448 if (reload_in_progress)
12449 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12450 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12451 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12453 new_rtx = force_reg (Pmode, new_rtx);
12454 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12455 new_rtx = gen_const_mem (Pmode, new_rtx);
12456 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12459 reg = gen_reg_rtx (Pmode);
12460 emit_move_insn (reg, new_rtx);
12466 if (CONST_INT_P (addr)
12467 && !x86_64_immediate_operand (addr, VOIDmode))
12471 emit_move_insn (reg, addr);
12475 new_rtx = force_reg (Pmode, addr);
12477 else if (GET_CODE (addr) == CONST)
12479 addr = XEXP (addr, 0);
12481 /* We must match stuff we generate before. Assume the only
12482 unspecs that can get here are ours. Not that we could do
12483 anything with them anyway.... */
12484 if (GET_CODE (addr) == UNSPEC
12485 || (GET_CODE (addr) == PLUS
12486 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12488 gcc_assert (GET_CODE (addr) == PLUS);
12490 if (GET_CODE (addr) == PLUS)
12492 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12494 /* Check first to see if this is a constant offset from a @GOTOFF
12495 symbol reference. */
12496 if (gotoff_operand (op0, Pmode)
12497 && CONST_INT_P (op1))
12501 if (reload_in_progress)
12502 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12503 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12505 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12507 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12511 emit_move_insn (reg, new_rtx);
12517 if (INTVAL (op1) < -16*1024*1024
12518 || INTVAL (op1) >= 16*1024*1024)
12520 if (!x86_64_immediate_operand (op1, Pmode))
12521 op1 = force_reg (Pmode, op1);
12522 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12528 base = legitimize_pic_address (XEXP (addr, 0), reg);
12529 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12530 base == reg ? NULL_RTX : reg);
12532 if (CONST_INT_P (new_rtx))
12533 new_rtx = plus_constant (base, INTVAL (new_rtx));
12536 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12538 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12539 new_rtx = XEXP (new_rtx, 1);
12541 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12549 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12552 get_thread_pointer (bool to_reg)
12554 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12556 if (GET_MODE (tp) != Pmode)
12557 tp = convert_to_mode (Pmode, tp, 1);
12560 tp = copy_addr_to_reg (tp);
12565 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12567 static GTY(()) rtx ix86_tls_symbol;
12570 ix86_tls_get_addr (void)
12572 if (!ix86_tls_symbol)
12575 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12576 ? "___tls_get_addr" : "__tls_get_addr");
12578 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12581 return ix86_tls_symbol;
12584 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12586 static GTY(()) rtx ix86_tls_module_base_symbol;
12589 ix86_tls_module_base (void)
12591 if (!ix86_tls_module_base_symbol)
12593 ix86_tls_module_base_symbol
12594 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12596 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12597 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12600 return ix86_tls_module_base_symbol;
12603 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12604 false if we expect this to be used for a memory address and true if
12605 we expect to load the address into a register. */
12608 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12610 rtx dest, base, off;
12611 rtx pic = NULL_RTX, tp = NULL_RTX;
12616 case TLS_MODEL_GLOBAL_DYNAMIC:
12617 dest = gen_reg_rtx (Pmode);
12622 pic = pic_offset_table_rtx;
12625 pic = gen_reg_rtx (Pmode);
12626 emit_insn (gen_set_got (pic));
12630 if (TARGET_GNU2_TLS)
12633 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12635 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12637 tp = get_thread_pointer (true);
12638 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12640 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12644 rtx caddr = ix86_tls_get_addr ();
12648 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12651 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12652 insns = get_insns ();
12655 RTL_CONST_CALL_P (insns) = 1;
12656 emit_libcall_block (insns, dest, rax, x);
12659 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12663 case TLS_MODEL_LOCAL_DYNAMIC:
12664 base = gen_reg_rtx (Pmode);
12669 pic = pic_offset_table_rtx;
12672 pic = gen_reg_rtx (Pmode);
12673 emit_insn (gen_set_got (pic));
12677 if (TARGET_GNU2_TLS)
12679 rtx tmp = ix86_tls_module_base ();
12682 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12684 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12686 tp = get_thread_pointer (true);
12687 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12688 gen_rtx_MINUS (Pmode, tmp, tp));
12692 rtx caddr = ix86_tls_get_addr ();
12696 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12699 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12700 insns = get_insns ();
12703 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12704 share the LD_BASE result with other LD model accesses. */
12705 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12706 UNSPEC_TLS_LD_BASE);
12708 RTL_CONST_CALL_P (insns) = 1;
12709 emit_libcall_block (insns, base, rax, eqv);
12712 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12715 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12716 off = gen_rtx_CONST (Pmode, off);
12718 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12720 if (TARGET_GNU2_TLS)
12722 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12724 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12728 case TLS_MODEL_INITIAL_EXEC:
12731 if (TARGET_SUN_TLS)
12733 /* The Sun linker took the AMD64 TLS spec literally
12734 and can only handle %rax as destination of the
12735 initial executable code sequence. */
12737 dest = gen_reg_rtx (Pmode);
12738 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12743 type = UNSPEC_GOTNTPOFF;
12747 if (reload_in_progress)
12748 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12749 pic = pic_offset_table_rtx;
12750 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12752 else if (!TARGET_ANY_GNU_TLS)
12754 pic = gen_reg_rtx (Pmode);
12755 emit_insn (gen_set_got (pic));
12756 type = UNSPEC_GOTTPOFF;
12761 type = UNSPEC_INDNTPOFF;
12764 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12765 off = gen_rtx_CONST (Pmode, off);
12767 off = gen_rtx_PLUS (Pmode, pic, off);
12768 off = gen_const_mem (Pmode, off);
12769 set_mem_alias_set (off, ix86_GOT_alias_set ());
12771 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12773 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12774 off = force_reg (Pmode, off);
12775 return gen_rtx_PLUS (Pmode, base, off);
12779 base = get_thread_pointer (true);
12780 dest = gen_reg_rtx (Pmode);
12781 emit_insn (gen_subsi3 (dest, base, off));
12785 case TLS_MODEL_LOCAL_EXEC:
12786 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12787 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12788 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12789 off = gen_rtx_CONST (Pmode, off);
12791 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12793 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12794 return gen_rtx_PLUS (Pmode, base, off);
12798 base = get_thread_pointer (true);
12799 dest = gen_reg_rtx (Pmode);
12800 emit_insn (gen_subsi3 (dest, base, off));
12805 gcc_unreachable ();
12811 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12814 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12815 htab_t dllimport_map;
12818 get_dllimport_decl (tree decl)
12820 struct tree_map *h, in;
12823 const char *prefix;
12824 size_t namelen, prefixlen;
12829 if (!dllimport_map)
12830 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12832 in.hash = htab_hash_pointer (decl);
12833 in.base.from = decl;
12834 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12835 h = (struct tree_map *) *loc;
12839 *loc = h = ggc_alloc_tree_map ();
12841 h->base.from = decl;
12842 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12843 VAR_DECL, NULL, ptr_type_node);
12844 DECL_ARTIFICIAL (to) = 1;
12845 DECL_IGNORED_P (to) = 1;
12846 DECL_EXTERNAL (to) = 1;
12847 TREE_READONLY (to) = 1;
12849 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12850 name = targetm.strip_name_encoding (name);
12851 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12852 ? "*__imp_" : "*__imp__";
12853 namelen = strlen (name);
12854 prefixlen = strlen (prefix);
12855 imp_name = (char *) alloca (namelen + prefixlen + 1);
12856 memcpy (imp_name, prefix, prefixlen);
12857 memcpy (imp_name + prefixlen, name, namelen + 1);
12859 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12860 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12861 SET_SYMBOL_REF_DECL (rtl, to);
12862 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12864 rtl = gen_const_mem (Pmode, rtl);
12865 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12867 SET_DECL_RTL (to, rtl);
12868 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12873 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12874 true if we require the result be a register. */
12877 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12882 gcc_assert (SYMBOL_REF_DECL (symbol));
12883 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12885 x = DECL_RTL (imp_decl);
12887 x = force_reg (Pmode, x);
12891 /* Try machine-dependent ways of modifying an illegitimate address
12892 to be legitimate. If we find one, return the new, valid address.
12893 This macro is used in only one place: `memory_address' in explow.c.
12895 OLDX is the address as it was before break_out_memory_refs was called.
12896 In some cases it is useful to look at this to decide what needs to be done.
12898 It is always safe for this macro to do nothing. It exists to recognize
12899 opportunities to optimize the output.
12901 For the 80386, we handle X+REG by loading X into a register R and
12902 using R+REG. R will go in a general reg and indexing will be used.
12903 However, if REG is a broken-out memory address or multiplication,
12904 nothing needs to be done because REG can certainly go in a general reg.
12906 When -fpic is used, special handling is needed for symbolic references.
12907 See comments by legitimize_pic_address in i386.c for details. */
12910 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12911 enum machine_mode mode)
12916 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12918 return legitimize_tls_address (x, (enum tls_model) log, false);
12919 if (GET_CODE (x) == CONST
12920 && GET_CODE (XEXP (x, 0)) == PLUS
12921 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12922 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12924 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12925 (enum tls_model) log, false);
12926 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12929 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12931 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12932 return legitimize_dllimport_symbol (x, true);
12933 if (GET_CODE (x) == CONST
12934 && GET_CODE (XEXP (x, 0)) == PLUS
12935 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12936 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12938 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12939 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12943 if (flag_pic && SYMBOLIC_CONST (x))
12944 return legitimize_pic_address (x, 0);
12947 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12948 return machopic_indirect_data_reference (x, 0);
12951 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12952 if (GET_CODE (x) == ASHIFT
12953 && CONST_INT_P (XEXP (x, 1))
12954 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12957 log = INTVAL (XEXP (x, 1));
12958 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12959 GEN_INT (1 << log));
12962 if (GET_CODE (x) == PLUS)
12964 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12966 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12967 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12968 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12971 log = INTVAL (XEXP (XEXP (x, 0), 1));
12972 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12973 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12974 GEN_INT (1 << log));
12977 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12978 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12979 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12982 log = INTVAL (XEXP (XEXP (x, 1), 1));
12983 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12984 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12985 GEN_INT (1 << log));
12988 /* Put multiply first if it isn't already. */
12989 if (GET_CODE (XEXP (x, 1)) == MULT)
12991 rtx tmp = XEXP (x, 0);
12992 XEXP (x, 0) = XEXP (x, 1);
12997 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12998 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12999 created by virtual register instantiation, register elimination, and
13000 similar optimizations. */
13001 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13004 x = gen_rtx_PLUS (Pmode,
13005 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13006 XEXP (XEXP (x, 1), 0)),
13007 XEXP (XEXP (x, 1), 1));
13011 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13012 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13013 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13014 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13015 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13016 && CONSTANT_P (XEXP (x, 1)))
13019 rtx other = NULL_RTX;
13021 if (CONST_INT_P (XEXP (x, 1)))
13023 constant = XEXP (x, 1);
13024 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13026 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13028 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13029 other = XEXP (x, 1);
13037 x = gen_rtx_PLUS (Pmode,
13038 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13039 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13040 plus_constant (other, INTVAL (constant)));
13044 if (changed && ix86_legitimate_address_p (mode, x, false))
13047 if (GET_CODE (XEXP (x, 0)) == MULT)
13050 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13053 if (GET_CODE (XEXP (x, 1)) == MULT)
13056 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13060 && REG_P (XEXP (x, 1))
13061 && REG_P (XEXP (x, 0)))
13064 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13067 x = legitimize_pic_address (x, 0);
13070 if (changed && ix86_legitimate_address_p (mode, x, false))
13073 if (REG_P (XEXP (x, 0)))
13075 rtx temp = gen_reg_rtx (Pmode);
13076 rtx val = force_operand (XEXP (x, 1), temp);
13079 if (GET_MODE (val) != Pmode)
13080 val = convert_to_mode (Pmode, val, 1);
13081 emit_move_insn (temp, val);
13084 XEXP (x, 1) = temp;
13088 else if (REG_P (XEXP (x, 1)))
13090 rtx temp = gen_reg_rtx (Pmode);
13091 rtx val = force_operand (XEXP (x, 0), temp);
13094 if (GET_MODE (val) != Pmode)
13095 val = convert_to_mode (Pmode, val, 1);
13096 emit_move_insn (temp, val);
13099 XEXP (x, 0) = temp;
13107 /* Print an integer constant expression in assembler syntax. Addition
13108 and subtraction are the only arithmetic that may appear in these
13109 expressions. FILE is the stdio stream to write to, X is the rtx, and
13110 CODE is the operand print code from the output string. */
13113 output_pic_addr_const (FILE *file, rtx x, int code)
13117 switch (GET_CODE (x))
13120 gcc_assert (flag_pic);
13125 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13126 output_addr_const (file, x);
13129 const char *name = XSTR (x, 0);
13131 /* Mark the decl as referenced so that cgraph will
13132 output the function. */
13133 if (SYMBOL_REF_DECL (x))
13134 mark_decl_referenced (SYMBOL_REF_DECL (x));
13137 if (MACHOPIC_INDIRECT
13138 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13139 name = machopic_indirection_name (x, /*stub_p=*/true);
13141 assemble_name (file, name);
13143 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13144 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13145 fputs ("@PLT", file);
13152 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13153 assemble_name (asm_out_file, buf);
13157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13161 /* This used to output parentheses around the expression,
13162 but that does not work on the 386 (either ATT or BSD assembler). */
13163 output_pic_addr_const (file, XEXP (x, 0), code);
13167 if (GET_MODE (x) == VOIDmode)
13169 /* We can use %d if the number is <32 bits and positive. */
13170 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13171 fprintf (file, "0x%lx%08lx",
13172 (unsigned long) CONST_DOUBLE_HIGH (x),
13173 (unsigned long) CONST_DOUBLE_LOW (x));
13175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13178 /* We can't handle floating point constants;
13179 TARGET_PRINT_OPERAND must handle them. */
13180 output_operand_lossage ("floating constant misused");
13184 /* Some assemblers need integer constants to appear first. */
13185 if (CONST_INT_P (XEXP (x, 0)))
13187 output_pic_addr_const (file, XEXP (x, 0), code);
13189 output_pic_addr_const (file, XEXP (x, 1), code);
13193 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13194 output_pic_addr_const (file, XEXP (x, 1), code);
13196 output_pic_addr_const (file, XEXP (x, 0), code);
13202 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13203 output_pic_addr_const (file, XEXP (x, 0), code);
13205 output_pic_addr_const (file, XEXP (x, 1), code);
13207 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13211 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13213 bool f = i386_asm_output_addr_const_extra (file, x);
13218 gcc_assert (XVECLEN (x, 0) == 1);
13219 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13220 switch (XINT (x, 1))
13223 fputs ("@GOT", file);
13225 case UNSPEC_GOTOFF:
13226 fputs ("@GOTOFF", file);
13228 case UNSPEC_PLTOFF:
13229 fputs ("@PLTOFF", file);
13232 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13233 "(%rip)" : "[rip]", file);
13235 case UNSPEC_GOTPCREL:
13236 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13237 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13239 case UNSPEC_GOTTPOFF:
13240 /* FIXME: This might be @TPOFF in Sun ld too. */
13241 fputs ("@gottpoff", file);
13244 fputs ("@tpoff", file);
13246 case UNSPEC_NTPOFF:
13248 fputs ("@tpoff", file);
13250 fputs ("@ntpoff", file);
13252 case UNSPEC_DTPOFF:
13253 fputs ("@dtpoff", file);
13255 case UNSPEC_GOTNTPOFF:
13257 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13258 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13260 fputs ("@gotntpoff", file);
13262 case UNSPEC_INDNTPOFF:
13263 fputs ("@indntpoff", file);
13266 case UNSPEC_MACHOPIC_OFFSET:
13268 machopic_output_function_base_name (file);
13272 output_operand_lossage ("invalid UNSPEC as operand");
13278 output_operand_lossage ("invalid expression as operand");
13282 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13283 We need to emit DTP-relative relocations. */
13285 static void ATTRIBUTE_UNUSED
13286 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13288 fputs (ASM_LONG, file);
13289 output_addr_const (file, x);
13290 fputs ("@dtpoff", file);
13296 fputs (", 0", file);
13299 gcc_unreachable ();
13303 /* Return true if X is a representation of the PIC register. This copes
13304 with calls from ix86_find_base_term, where the register might have
13305 been replaced by a cselib value. */
13308 ix86_pic_register_p (rtx x)
13310 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13311 return (pic_offset_table_rtx
13312 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13314 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13317 /* Helper function for ix86_delegitimize_address.
13318 Attempt to delegitimize TLS local-exec accesses. */
13321 ix86_delegitimize_tls_address (rtx orig_x)
13323 rtx x = orig_x, unspec;
13324 struct ix86_address addr;
13326 if (!TARGET_TLS_DIRECT_SEG_REFS)
13330 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13332 if (ix86_decompose_address (x, &addr) == 0
13333 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13334 || addr.disp == NULL_RTX
13335 || GET_CODE (addr.disp) != CONST)
13337 unspec = XEXP (addr.disp, 0);
13338 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13339 unspec = XEXP (unspec, 0);
13340 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13342 x = XVECEXP (unspec, 0, 0);
13343 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13344 if (unspec != XEXP (addr.disp, 0))
13345 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13348 rtx idx = addr.index;
13349 if (addr.scale != 1)
13350 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13351 x = gen_rtx_PLUS (Pmode, idx, x);
13354 x = gen_rtx_PLUS (Pmode, addr.base, x);
13355 if (MEM_P (orig_x))
13356 x = replace_equiv_address_nv (orig_x, x);
13360 /* In the name of slightly smaller debug output, and to cater to
13361 general assembler lossage, recognize PIC+GOTOFF and turn it back
13362 into a direct symbol reference.
13364 On Darwin, this is necessary to avoid a crash, because Darwin
13365 has a different PIC label for each routine but the DWARF debugging
13366 information is not associated with any particular routine, so it's
13367 necessary to remove references to the PIC label from RTL stored by
13368 the DWARF output code. */
13371 ix86_delegitimize_address (rtx x)
13373 rtx orig_x = delegitimize_mem_from_attrs (x);
13374 /* addend is NULL or some rtx if x is something+GOTOFF where
13375 something doesn't include the PIC register. */
13376 rtx addend = NULL_RTX;
13377 /* reg_addend is NULL or a multiple of some register. */
13378 rtx reg_addend = NULL_RTX;
13379 /* const_addend is NULL or a const_int. */
13380 rtx const_addend = NULL_RTX;
13381 /* This is the result, or NULL. */
13382 rtx result = NULL_RTX;
13391 if (GET_CODE (x) == CONST
13392 && GET_CODE (XEXP (x, 0)) == PLUS
13393 && GET_MODE (XEXP (x, 0)) == Pmode
13394 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13396 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13398 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13399 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13400 if (MEM_P (orig_x))
13401 x = replace_equiv_address_nv (orig_x, x);
13404 if (GET_CODE (x) != CONST
13405 || GET_CODE (XEXP (x, 0)) != UNSPEC
13406 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13407 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13408 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13409 return ix86_delegitimize_tls_address (orig_x);
13410 x = XVECEXP (XEXP (x, 0), 0, 0);
13411 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13413 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13421 if (GET_CODE (x) != PLUS
13422 || GET_CODE (XEXP (x, 1)) != CONST)
13423 return ix86_delegitimize_tls_address (orig_x);
13425 if (ix86_pic_register_p (XEXP (x, 0)))
13426 /* %ebx + GOT/GOTOFF */
13428 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13430 /* %ebx + %reg * scale + GOT/GOTOFF */
13431 reg_addend = XEXP (x, 0);
13432 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13433 reg_addend = XEXP (reg_addend, 1);
13434 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13435 reg_addend = XEXP (reg_addend, 0);
13438 reg_addend = NULL_RTX;
13439 addend = XEXP (x, 0);
13443 addend = XEXP (x, 0);
13445 x = XEXP (XEXP (x, 1), 0);
13446 if (GET_CODE (x) == PLUS
13447 && CONST_INT_P (XEXP (x, 1)))
13449 const_addend = XEXP (x, 1);
13453 if (GET_CODE (x) == UNSPEC
13454 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13455 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13456 result = XVECEXP (x, 0, 0);
13458 if (TARGET_MACHO && darwin_local_data_pic (x)
13459 && !MEM_P (orig_x))
13460 result = XVECEXP (x, 0, 0);
13463 return ix86_delegitimize_tls_address (orig_x);
13466 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13468 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13471 /* If the rest of original X doesn't involve the PIC register, add
13472 addend and subtract pic_offset_table_rtx. This can happen e.g.
13474 leal (%ebx, %ecx, 4), %ecx
13476 movl foo@GOTOFF(%ecx), %edx
13477 in which case we return (%ecx - %ebx) + foo. */
13478 if (pic_offset_table_rtx)
13479 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13480 pic_offset_table_rtx),
13485 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13487 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13488 if (result == NULL_RTX)
13494 /* If X is a machine specific address (i.e. a symbol or label being
13495 referenced as a displacement from the GOT implemented using an
13496 UNSPEC), then return the base term. Otherwise return X. */
13499 ix86_find_base_term (rtx x)
13505 if (GET_CODE (x) != CONST)
13507 term = XEXP (x, 0);
13508 if (GET_CODE (term) == PLUS
13509 && (CONST_INT_P (XEXP (term, 1))
13510 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13511 term = XEXP (term, 0);
13512 if (GET_CODE (term) != UNSPEC
13513 || (XINT (term, 1) != UNSPEC_GOTPCREL
13514 && XINT (term, 1) != UNSPEC_PCREL))
13517 return XVECEXP (term, 0, 0);
13520 return ix86_delegitimize_address (x);
13524 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13525 int fp, FILE *file)
13527 const char *suffix;
13529 if (mode == CCFPmode || mode == CCFPUmode)
13531 code = ix86_fp_compare_code_to_integer (code);
13535 code = reverse_condition (code);
13586 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13590 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13591 Those same assemblers have the same but opposite lossage on cmov. */
13592 if (mode == CCmode)
13593 suffix = fp ? "nbe" : "a";
13594 else if (mode == CCCmode)
13597 gcc_unreachable ();
13613 gcc_unreachable ();
13617 gcc_assert (mode == CCmode || mode == CCCmode);
13634 gcc_unreachable ();
13638 /* ??? As above. */
13639 gcc_assert (mode == CCmode || mode == CCCmode);
13640 suffix = fp ? "nb" : "ae";
13643 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13647 /* ??? As above. */
13648 if (mode == CCmode)
13650 else if (mode == CCCmode)
13651 suffix = fp ? "nb" : "ae";
13653 gcc_unreachable ();
13656 suffix = fp ? "u" : "p";
13659 suffix = fp ? "nu" : "np";
13662 gcc_unreachable ();
13664 fputs (suffix, file);
13667 /* Print the name of register X to FILE based on its machine mode and number.
13668 If CODE is 'w', pretend the mode is HImode.
13669 If CODE is 'b', pretend the mode is QImode.
13670 If CODE is 'k', pretend the mode is SImode.
13671 If CODE is 'q', pretend the mode is DImode.
13672 If CODE is 'x', pretend the mode is V4SFmode.
13673 If CODE is 't', pretend the mode is V8SFmode.
13674 If CODE is 'h', pretend the reg is the 'high' byte register.
13675 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13676 If CODE is 'd', duplicate the operand for AVX instruction.
13680 print_reg (rtx x, int code, FILE *file)
13683 bool duplicated = code == 'd' && TARGET_AVX;
13685 gcc_assert (x == pc_rtx
13686 || (REGNO (x) != ARG_POINTER_REGNUM
13687 && REGNO (x) != FRAME_POINTER_REGNUM
13688 && REGNO (x) != FLAGS_REG
13689 && REGNO (x) != FPSR_REG
13690 && REGNO (x) != FPCR_REG));
13692 if (ASSEMBLER_DIALECT == ASM_ATT)
13697 gcc_assert (TARGET_64BIT);
13698 fputs ("rip", file);
13702 if (code == 'w' || MMX_REG_P (x))
13704 else if (code == 'b')
13706 else if (code == 'k')
13708 else if (code == 'q')
13710 else if (code == 'y')
13712 else if (code == 'h')
13714 else if (code == 'x')
13716 else if (code == 't')
13719 code = GET_MODE_SIZE (GET_MODE (x));
13721 /* Irritatingly, AMD extended registers use different naming convention
13722 from the normal registers: "r%d[bwd]" */
13723 if (REX_INT_REG_P (x))
13725 gcc_assert (TARGET_64BIT);
13727 fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
13731 error ("extended registers have no high halves");
13746 error ("unsupported operand size for extended register");
13756 if (STACK_TOP_P (x))
13765 if (! ANY_FP_REG_P (x))
13766 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13771 reg = hi_reg_name[REGNO (x)];
13774 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13776 reg = qi_reg_name[REGNO (x)];
13779 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13781 reg = qi_high_reg_name[REGNO (x)];
13786 gcc_assert (!duplicated);
13788 fputs (hi_reg_name[REGNO (x)] + 1, file);
13793 gcc_unreachable ();
13799 if (ASSEMBLER_DIALECT == ASM_ATT)
13800 fprintf (file, ", %%%s", reg);
13802 fprintf (file, ", %s", reg);
13806 /* Locate some local-dynamic symbol still in use by this function
13807 so that we can print its name in some tls_local_dynamic_base
13811 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13815 if (GET_CODE (x) == SYMBOL_REF
13816 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13818 cfun->machine->some_ld_name = XSTR (x, 0);
13825 static const char *
13826 get_some_local_dynamic_name (void)
13830 if (cfun->machine->some_ld_name)
13831 return cfun->machine->some_ld_name;
13833 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13834 if (NONDEBUG_INSN_P (insn)
13835 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13836 return cfun->machine->some_ld_name;
13841 /* Meaning of CODE:
13842 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13843 C -- print opcode suffix for set/cmov insn.
13844 c -- like C, but print reversed condition
13845 F,f -- likewise, but for floating-point.
13846 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13848 R -- print the prefix for register names.
13849 z -- print the opcode suffix for the size of the current operand.
13850 Z -- likewise, with special suffixes for x87 instructions.
13851 * -- print a star (in certain assembler syntax)
13852 A -- print an absolute memory reference.
13853 E -- print address with DImode register names if TARGET_64BIT.
13854 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13855 s -- print a shift double count, followed by the assemblers argument
13857 b -- print the QImode name of the register for the indicated operand.
13858 %b0 would print %al if operands[0] is reg 0.
13859 w -- likewise, print the HImode name of the register.
13860 k -- likewise, print the SImode name of the register.
13861 q -- likewise, print the DImode name of the register.
13862 x -- likewise, print the V4SFmode name of the register.
13863 t -- likewise, print the V8SFmode name of the register.
13864 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13865 y -- print "st(0)" instead of "st" as a register.
13866 d -- print duplicated register operand for AVX instruction.
13867 D -- print condition for SSE cmp instruction.
13868 P -- if PIC, print an @PLT suffix.
13869 p -- print raw symbol name.
13870 X -- don't print any sort of PIC '@' suffix for a symbol.
13871 & -- print some in-use local-dynamic symbol name.
13872 H -- print a memory address offset by 8; used for sse high-parts
13873 Y -- print condition for XOP pcom* instruction.
13874 + -- print a branch hint as 'cs' or 'ds' prefix
13875 ; -- print a semicolon (after prefixes due to bug in older gas).
13876 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13877 @ -- print a segment register of thread base pointer load
13881 ix86_print_operand (FILE *file, rtx x, int code)
13888 if (ASSEMBLER_DIALECT == ASM_ATT)
13894 const char *name = get_some_local_dynamic_name ();
13896 output_operand_lossage ("'%%&' used without any "
13897 "local dynamic TLS references");
13899 assemble_name (file, name);
13904 switch (ASSEMBLER_DIALECT)
13911 /* Intel syntax. For absolute addresses, registers should not
13912 be surrounded by braces. */
13916 ix86_print_operand (file, x, 0);
13923 gcc_unreachable ();
13926 ix86_print_operand (file, x, 0);
13930 /* Wrap address in an UNSPEC to declare special handling. */
13932 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13934 output_address (x);
13938 if (ASSEMBLER_DIALECT == ASM_ATT)
13943 if (ASSEMBLER_DIALECT == ASM_ATT)
13948 if (ASSEMBLER_DIALECT == ASM_ATT)
13953 if (ASSEMBLER_DIALECT == ASM_ATT)
13958 if (ASSEMBLER_DIALECT == ASM_ATT)
13963 if (ASSEMBLER_DIALECT == ASM_ATT)
13968 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13970 /* Opcodes don't get size suffixes if using Intel opcodes. */
13971 if (ASSEMBLER_DIALECT == ASM_INTEL)
13974 switch (GET_MODE_SIZE (GET_MODE (x)))
13993 output_operand_lossage
13994 ("invalid operand size for operand code '%c'", code);
13999 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14001 (0, "non-integer operand used with operand code '%c'", code);
14005 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14006 if (ASSEMBLER_DIALECT == ASM_INTEL)
14009 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14011 switch (GET_MODE_SIZE (GET_MODE (x)))
14014 #ifdef HAVE_AS_IX86_FILDS
14024 #ifdef HAVE_AS_IX86_FILDQ
14027 fputs ("ll", file);
14035 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14037 /* 387 opcodes don't get size suffixes
14038 if the operands are registers. */
14039 if (STACK_REG_P (x))
14042 switch (GET_MODE_SIZE (GET_MODE (x)))
14063 output_operand_lossage
14064 ("invalid operand type used with operand code '%c'", code);
14068 output_operand_lossage
14069 ("invalid operand size for operand code '%c'", code);
14087 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14089 ix86_print_operand (file, x, 0);
14090 fputs (", ", file);
14095 /* Little bit of braindamage here. The SSE compare instructions
14096 does use completely different names for the comparisons that the
14097 fp conditional moves. */
14100 switch (GET_CODE (x))
14103 fputs ("eq", file);
14106 fputs ("eq_us", file);
14109 fputs ("lt", file);
14112 fputs ("nge", file);
14115 fputs ("le", file);
14118 fputs ("ngt", file);
14121 fputs ("unord", file);
14124 fputs ("neq", file);
14127 fputs ("neq_oq", file);
14130 fputs ("ge", file);
14133 fputs ("nlt", file);
14136 fputs ("gt", file);
14139 fputs ("nle", file);
14142 fputs ("ord", file);
14145 output_operand_lossage ("operand is not a condition code, "
14146 "invalid operand code 'D'");
14152 switch (GET_CODE (x))
14156 fputs ("eq", file);
14160 fputs ("lt", file);
14164 fputs ("le", file);
14167 fputs ("unord", file);
14171 fputs ("neq", file);
14175 fputs ("nlt", file);
14179 fputs ("nle", file);
14182 fputs ("ord", file);
14185 output_operand_lossage ("operand is not a condition code, "
14186 "invalid operand code 'D'");
14192 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14193 if (ASSEMBLER_DIALECT == ASM_ATT)
14195 switch (GET_MODE (x))
14197 case HImode: putc ('w', file); break;
14199 case SFmode: putc ('l', file); break;
14201 case DFmode: putc ('q', file); break;
14202 default: gcc_unreachable ();
14209 if (!COMPARISON_P (x))
14211 output_operand_lossage ("operand is neither a constant nor a "
14212 "condition code, invalid operand code "
14216 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14219 if (!COMPARISON_P (x))
14221 output_operand_lossage ("operand is neither a constant nor a "
14222 "condition code, invalid operand code "
14226 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14227 if (ASSEMBLER_DIALECT == ASM_ATT)
14230 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14233 /* Like above, but reverse condition */
14235 /* Check to see if argument to %c is really a constant
14236 and not a condition code which needs to be reversed. */
14237 if (!COMPARISON_P (x))
14239 output_operand_lossage ("operand is neither a constant nor a "
14240 "condition code, invalid operand "
14244 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14247 if (!COMPARISON_P (x))
14249 output_operand_lossage ("operand is neither a constant nor a "
14250 "condition code, invalid operand "
14254 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14255 if (ASSEMBLER_DIALECT == ASM_ATT)
14258 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14262 if (!offsettable_memref_p (x))
14264 output_operand_lossage ("operand is not an offsettable memory "
14265 "reference, invalid operand "
14269 /* It doesn't actually matter what mode we use here, as we're
14270 only going to use this for printing. */
14271 x = adjust_address_nv (x, DImode, 8);
14279 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14282 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14285 int pred_val = INTVAL (XEXP (x, 0));
14287 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14288 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14290 int taken = pred_val > REG_BR_PROB_BASE / 2;
14291 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14293 /* Emit hints only in the case default branch prediction
14294 heuristics would fail. */
14295 if (taken != cputaken)
14297 /* We use 3e (DS) prefix for taken branches and
14298 2e (CS) prefix for not taken branches. */
14300 fputs ("ds ; ", file);
14302 fputs ("cs ; ", file);
14310 switch (GET_CODE (x))
14313 fputs ("neq", file);
14316 fputs ("eq", file);
14320 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14324 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14328 fputs ("le", file);
14332 fputs ("lt", file);
14335 fputs ("unord", file);
14338 fputs ("ord", file);
14341 fputs ("ueq", file);
14344 fputs ("nlt", file);
14347 fputs ("nle", file);
14350 fputs ("ule", file);
14353 fputs ("ult", file);
14356 fputs ("une", file);
14359 output_operand_lossage ("operand is not a condition code, "
14360 "invalid operand code 'Y'");
14366 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14372 if (ASSEMBLER_DIALECT == ASM_ATT)
14375 /* The kernel uses a different segment register for performance
14376 reasons; a system call would not have to trash the userspace
14377 segment register, which would be expensive. */
14378 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14379 fputs ("fs", file);
14381 fputs ("gs", file);
14385 putc (TARGET_AVX2 ? 'i' : 'f', file);
14389 output_operand_lossage ("invalid operand code '%c'", code);
14394 print_reg (x, code, file);
14396 else if (MEM_P (x))
14398 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14399 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14400 && GET_MODE (x) != BLKmode)
14403 switch (GET_MODE_SIZE (GET_MODE (x)))
14405 case 1: size = "BYTE"; break;
14406 case 2: size = "WORD"; break;
14407 case 4: size = "DWORD"; break;
14408 case 8: size = "QWORD"; break;
14409 case 12: size = "TBYTE"; break;
14411 if (GET_MODE (x) == XFmode)
14416 case 32: size = "YMMWORD"; break;
14418 gcc_unreachable ();
14421 /* Check for explicit size override (codes 'b', 'w', 'k',
14425 else if (code == 'w')
14427 else if (code == 'k')
14429 else if (code == 'q')
14431 else if (code == 'x')
14434 fputs (size, file);
14435 fputs (" PTR ", file);
14439 /* Avoid (%rip) for call operands. */
14440 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14441 && !CONST_INT_P (x))
14442 output_addr_const (file, x);
14443 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14444 output_operand_lossage ("invalid constraints for operand");
14446 output_address (x);
14449 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14454 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14455 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14457 if (ASSEMBLER_DIALECT == ASM_ATT)
14459 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14461 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14463 fprintf (file, "0x%08x", (unsigned int) l);
14466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14471 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14472 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14474 if (ASSEMBLER_DIALECT == ASM_ATT)
14476 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14479 /* These float cases don't actually occur as immediate operands. */
14480 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14484 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14485 fputs (dstr, file);
14490 /* We have patterns that allow zero sets of memory, for instance.
14491 In 64-bit mode, we should probably support all 8-byte vectors,
14492 since we can in fact encode that into an immediate. */
14493 if (GET_CODE (x) == CONST_VECTOR)
14495 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14499 if (code != 'P' && code != 'p')
14501 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14503 if (ASSEMBLER_DIALECT == ASM_ATT)
14506 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14507 || GET_CODE (x) == LABEL_REF)
14509 if (ASSEMBLER_DIALECT == ASM_ATT)
14512 fputs ("OFFSET FLAT:", file);
14515 if (CONST_INT_P (x))
14516 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14517 else if (flag_pic || MACHOPIC_INDIRECT)
14518 output_pic_addr_const (file, x, code);
14520 output_addr_const (file, x);
14525 ix86_print_operand_punct_valid_p (unsigned char code)
14527 return (code == '@' || code == '*' || code == '+'
14528 || code == '&' || code == ';' || code == '~');
14531 /* Print a memory operand whose address is ADDR. */
14534 ix86_print_operand_address (FILE *file, rtx addr)
14536 struct ix86_address parts;
14537 rtx base, index, disp;
14543 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14545 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14546 gcc_assert (parts.index == NULL_RTX);
14547 parts.index = XVECEXP (addr, 0, 1);
14548 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14549 addr = XVECEXP (addr, 0, 0);
14552 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14554 gcc_assert (TARGET_64BIT);
14555 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14559 ok = ix86_decompose_address (addr, &parts);
14563 if (parts.base && GET_CODE (parts.base) == SUBREG)
14565 rtx tmp = SUBREG_REG (parts.base);
14566 parts.base = simplify_subreg (GET_MODE (parts.base),
14567 tmp, GET_MODE (tmp), 0);
14568 gcc_assert (parts.base != NULL_RTX);
14571 if (parts.index && GET_CODE (parts.index) == SUBREG)
14573 rtx tmp = SUBREG_REG (parts.index);
14574 parts.index = simplify_subreg (GET_MODE (parts.index),
14575 tmp, GET_MODE (tmp), 0);
14576 gcc_assert (parts.index != NULL_RTX);
14580 index = parts.index;
14582 scale = parts.scale;
14590 if (ASSEMBLER_DIALECT == ASM_ATT)
14592 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14595 gcc_unreachable ();
14598 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14599 if (TARGET_64BIT && !base && !index)
14603 if (GET_CODE (disp) == CONST
14604 && GET_CODE (XEXP (disp, 0)) == PLUS
14605 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14606 symbol = XEXP (XEXP (disp, 0), 0);
14608 if (GET_CODE (symbol) == LABEL_REF
14609 || (GET_CODE (symbol) == SYMBOL_REF
14610 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14613 if (!base && !index)
14615 /* Displacement only requires special attention. */
14617 if (CONST_INT_P (disp))
14619 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14620 fputs ("ds:", file);
14621 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14624 output_pic_addr_const (file, disp, 0);
14626 output_addr_const (file, disp);
14630 /* Print SImode register names to force addr32 prefix. */
14631 if (GET_CODE (addr) == SUBREG)
14633 gcc_assert (TARGET_64BIT);
14634 gcc_assert (GET_MODE (addr) == SImode);
14635 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14636 gcc_assert (!code);
14639 else if (GET_CODE (addr) == ZERO_EXTEND
14640 || GET_CODE (addr) == AND)
14642 gcc_assert (TARGET_64BIT);
14643 gcc_assert (GET_MODE (addr) == DImode);
14644 gcc_assert (!code);
14648 if (ASSEMBLER_DIALECT == ASM_ATT)
14653 output_pic_addr_const (file, disp, 0);
14654 else if (GET_CODE (disp) == LABEL_REF)
14655 output_asm_label (disp);
14657 output_addr_const (file, disp);
14662 print_reg (base, code, file);
14666 print_reg (index, vsib ? 0 : code, file);
14667 if (scale != 1 || vsib)
14668 fprintf (file, ",%d", scale);
14674 rtx offset = NULL_RTX;
14678 /* Pull out the offset of a symbol; print any symbol itself. */
14679 if (GET_CODE (disp) == CONST
14680 && GET_CODE (XEXP (disp, 0)) == PLUS
14681 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14683 offset = XEXP (XEXP (disp, 0), 1);
14684 disp = gen_rtx_CONST (VOIDmode,
14685 XEXP (XEXP (disp, 0), 0));
14689 output_pic_addr_const (file, disp, 0);
14690 else if (GET_CODE (disp) == LABEL_REF)
14691 output_asm_label (disp);
14692 else if (CONST_INT_P (disp))
14695 output_addr_const (file, disp);
14701 print_reg (base, code, file);
14704 if (INTVAL (offset) >= 0)
14706 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14717 print_reg (index, vsib ? 0 : code, file);
14718 if (scale != 1 || vsib)
14719 fprintf (file, "*%d", scale);
14726 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14729 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14733 if (GET_CODE (x) != UNSPEC)
14736 op = XVECEXP (x, 0, 0);
14737 switch (XINT (x, 1))
14739 case UNSPEC_GOTTPOFF:
14740 output_addr_const (file, op);
14741 /* FIXME: This might be @TPOFF in Sun ld. */
14742 fputs ("@gottpoff", file);
14745 output_addr_const (file, op);
14746 fputs ("@tpoff", file);
14748 case UNSPEC_NTPOFF:
14749 output_addr_const (file, op);
14751 fputs ("@tpoff", file);
14753 fputs ("@ntpoff", file);
14755 case UNSPEC_DTPOFF:
14756 output_addr_const (file, op);
14757 fputs ("@dtpoff", file);
14759 case UNSPEC_GOTNTPOFF:
14760 output_addr_const (file, op);
14762 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14763 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14765 fputs ("@gotntpoff", file);
14767 case UNSPEC_INDNTPOFF:
14768 output_addr_const (file, op);
14769 fputs ("@indntpoff", file);
14772 case UNSPEC_MACHOPIC_OFFSET:
14773 output_addr_const (file, op);
14775 machopic_output_function_base_name (file);
14779 case UNSPEC_STACK_CHECK:
14783 gcc_assert (flag_split_stack);
14785 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14786 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14788 gcc_unreachable ();
14791 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14802 /* Split one or more double-mode RTL references into pairs of half-mode
14803 references. The RTL can be REG, offsettable MEM, integer constant, or
14804 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14805 split and "num" is its length. lo_half and hi_half are output arrays
14806 that parallel "operands". */
14809 split_double_mode (enum machine_mode mode, rtx operands[],
14810 int num, rtx lo_half[], rtx hi_half[])
14812 enum machine_mode half_mode;
14818 half_mode = DImode;
14821 half_mode = SImode;
14824 gcc_unreachable ();
14827 byte = GET_MODE_SIZE (half_mode);
14831 rtx op = operands[num];
14833 /* simplify_subreg refuse to split volatile memory addresses,
14834 but we still have to handle it. */
14837 lo_half[num] = adjust_address (op, half_mode, 0);
14838 hi_half[num] = adjust_address (op, half_mode, byte);
14842 lo_half[num] = simplify_gen_subreg (half_mode, op,
14843 GET_MODE (op) == VOIDmode
14844 ? mode : GET_MODE (op), 0);
14845 hi_half[num] = simplify_gen_subreg (half_mode, op,
14846 GET_MODE (op) == VOIDmode
14847 ? mode : GET_MODE (op), byte);
14852 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14853 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14854 is the expression of the binary operation. The output may either be
14855 emitted here, or returned to the caller, like all output_* functions.
14857 There is no guarantee that the operands are the same mode, as they
14858 might be within FLOAT or FLOAT_EXTEND expressions. */
14860 #ifndef SYSV386_COMPAT
14861 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14862 wants to fix the assemblers because that causes incompatibility
14863 with gcc. No-one wants to fix gcc because that causes
14864 incompatibility with assemblers... You can use the option of
14865 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14866 #define SYSV386_COMPAT 1
14870 output_387_binary_op (rtx insn, rtx *operands)
14872 static char buf[40];
14875 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14877 #ifdef ENABLE_CHECKING
14878 /* Even if we do not want to check the inputs, this documents input
14879 constraints. Which helps in understanding the following code. */
14880 if (STACK_REG_P (operands[0])
14881 && ((REG_P (operands[1])
14882 && REGNO (operands[0]) == REGNO (operands[1])
14883 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14884 || (REG_P (operands[2])
14885 && REGNO (operands[0]) == REGNO (operands[2])
14886 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14887 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14890 gcc_assert (is_sse);
14893 switch (GET_CODE (operands[3]))
14896 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14897 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14905 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14906 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14932 gcc_unreachable ();
14939 strcpy (buf, ssep);
14940 if (GET_MODE (operands[0]) == SFmode)
14941 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14943 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14947 strcpy (buf, ssep + 1);
14948 if (GET_MODE (operands[0]) == SFmode)
14949 strcat (buf, "ss\t{%2, %0|%0, %2}");
14951 strcat (buf, "sd\t{%2, %0|%0, %2}");
14957 switch (GET_CODE (operands[3]))
14961 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14963 rtx temp = operands[2];
14964 operands[2] = operands[1];
14965 operands[1] = temp;
14968 /* know operands[0] == operands[1]. */
14970 if (MEM_P (operands[2]))
14976 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14978 if (STACK_TOP_P (operands[0]))
14979 /* How is it that we are storing to a dead operand[2]?
14980 Well, presumably operands[1] is dead too. We can't
14981 store the result to st(0) as st(0) gets popped on this
14982 instruction. Instead store to operands[2] (which I
14983 think has to be st(1)). st(1) will be popped later.
14984 gcc <= 2.8.1 didn't have this check and generated
14985 assembly code that the Unixware assembler rejected. */
14986 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14988 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14992 if (STACK_TOP_P (operands[0]))
14993 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14995 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15000 if (MEM_P (operands[1]))
15006 if (MEM_P (operands[2]))
15012 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15015 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15016 derived assemblers, confusingly reverse the direction of
15017 the operation for fsub{r} and fdiv{r} when the
15018 destination register is not st(0). The Intel assembler
15019 doesn't have this brain damage. Read !SYSV386_COMPAT to
15020 figure out what the hardware really does. */
15021 if (STACK_TOP_P (operands[0]))
15022 p = "{p\t%0, %2|rp\t%2, %0}";
15024 p = "{rp\t%2, %0|p\t%0, %2}";
15026 if (STACK_TOP_P (operands[0]))
15027 /* As above for fmul/fadd, we can't store to st(0). */
15028 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15030 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15035 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15038 if (STACK_TOP_P (operands[0]))
15039 p = "{rp\t%0, %1|p\t%1, %0}";
15041 p = "{p\t%1, %0|rp\t%0, %1}";
15043 if (STACK_TOP_P (operands[0]))
15044 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15046 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15051 if (STACK_TOP_P (operands[0]))
15053 if (STACK_TOP_P (operands[1]))
15054 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15056 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15059 else if (STACK_TOP_P (operands[1]))
15062 p = "{\t%1, %0|r\t%0, %1}";
15064 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15070 p = "{r\t%2, %0|\t%0, %2}";
15072 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15078 gcc_unreachable ();
15085 /* Return needed mode for entity in optimize_mode_switching pass. */
15088 ix86_mode_needed (int entity, rtx insn)
15090 enum attr_i387_cw mode;
15092 /* The mode UNINITIALIZED is used to store control word after a
15093 function call or ASM pattern. The mode ANY specify that function
15094 has no requirements on the control word and make no changes in the
15095 bits we are interested in. */
15098 || (NONJUMP_INSN_P (insn)
15099 && (asm_noperands (PATTERN (insn)) >= 0
15100 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15101 return I387_CW_UNINITIALIZED;
15103 if (recog_memoized (insn) < 0)
15104 return I387_CW_ANY;
15106 mode = get_attr_i387_cw (insn);
15111 if (mode == I387_CW_TRUNC)
15116 if (mode == I387_CW_FLOOR)
15121 if (mode == I387_CW_CEIL)
15126 if (mode == I387_CW_MASK_PM)
15131 gcc_unreachable ();
15134 return I387_CW_ANY;
15137 /* Output code to initialize control word copies used by trunc?f?i and
15138 rounding patterns. CURRENT_MODE is set to current control word,
15139 while NEW_MODE is set to new control word. */
15142 emit_i387_cw_initialization (int mode)
15144 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15147 enum ix86_stack_slot slot;
15149 rtx reg = gen_reg_rtx (HImode);
15151 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15152 emit_move_insn (reg, copy_rtx (stored_mode));
15154 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15155 || optimize_function_for_size_p (cfun))
15159 case I387_CW_TRUNC:
15160 /* round toward zero (truncate) */
15161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15162 slot = SLOT_CW_TRUNC;
15165 case I387_CW_FLOOR:
15166 /* round down toward -oo */
15167 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15168 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15169 slot = SLOT_CW_FLOOR;
15173 /* round up toward +oo */
15174 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15175 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15176 slot = SLOT_CW_CEIL;
15179 case I387_CW_MASK_PM:
15180 /* mask precision exception for nearbyint() */
15181 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15182 slot = SLOT_CW_MASK_PM;
15186 gcc_unreachable ();
15193 case I387_CW_TRUNC:
15194 /* round toward zero (truncate) */
15195 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15196 slot = SLOT_CW_TRUNC;
15199 case I387_CW_FLOOR:
15200 /* round down toward -oo */
15201 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15202 slot = SLOT_CW_FLOOR;
15206 /* round up toward +oo */
15207 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15208 slot = SLOT_CW_CEIL;
15211 case I387_CW_MASK_PM:
15212 /* mask precision exception for nearbyint() */
15213 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15214 slot = SLOT_CW_MASK_PM;
15218 gcc_unreachable ();
15222 gcc_assert (slot < MAX_386_STACK_LOCALS);
15224 new_mode = assign_386_stack_local (HImode, slot);
15225 emit_move_insn (new_mode, reg);
15228 /* Output code for INSN to convert a float to a signed int. OPERANDS
15229 are the insn operands. The output may be [HSD]Imode and the input
15230 operand may be [SDX]Fmode. */
15233 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15235 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15236 int dimode_p = GET_MODE (operands[0]) == DImode;
15237 int round_mode = get_attr_i387_cw (insn);
15239 /* Jump through a hoop or two for DImode, since the hardware has no
15240 non-popping instruction. We used to do this a different way, but
15241 that was somewhat fragile and broke with post-reload splitters. */
15242 if ((dimode_p || fisttp) && !stack_top_dies)
15243 output_asm_insn ("fld\t%y1", operands);
15245 gcc_assert (STACK_TOP_P (operands[1]));
15246 gcc_assert (MEM_P (operands[0]));
15247 gcc_assert (GET_MODE (operands[1]) != TFmode);
15250 output_asm_insn ("fisttp%Z0\t%0", operands);
15253 if (round_mode != I387_CW_ANY)
15254 output_asm_insn ("fldcw\t%3", operands);
15255 if (stack_top_dies || dimode_p)
15256 output_asm_insn ("fistp%Z0\t%0", operands);
15258 output_asm_insn ("fist%Z0\t%0", operands);
15259 if (round_mode != I387_CW_ANY)
15260 output_asm_insn ("fldcw\t%2", operands);
15266 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15267 have the values zero or one, indicates the ffreep insn's operand
15268 from the OPERANDS array. */
15270 static const char *
15271 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15273 if (TARGET_USE_FFREEP)
15274 #ifdef HAVE_AS_IX86_FFREEP
15275 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15278 static char retval[32];
15279 int regno = REGNO (operands[opno]);
15281 gcc_assert (FP_REGNO_P (regno));
15283 regno -= FIRST_STACK_REG;
15285 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15290 return opno ? "fstp\t%y1" : "fstp\t%y0";
15294 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15295 should be used. UNORDERED_P is true when fucom should be used. */
15298 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15300 int stack_top_dies;
15301 rtx cmp_op0, cmp_op1;
15302 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15306 cmp_op0 = operands[0];
15307 cmp_op1 = operands[1];
15311 cmp_op0 = operands[1];
15312 cmp_op1 = operands[2];
15317 if (GET_MODE (operands[0]) == SFmode)
15319 return "%vucomiss\t{%1, %0|%0, %1}";
15321 return "%vcomiss\t{%1, %0|%0, %1}";
15324 return "%vucomisd\t{%1, %0|%0, %1}";
15326 return "%vcomisd\t{%1, %0|%0, %1}";
15329 gcc_assert (STACK_TOP_P (cmp_op0));
15331 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15333 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15335 if (stack_top_dies)
15337 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15338 return output_387_ffreep (operands, 1);
15341 return "ftst\n\tfnstsw\t%0";
15344 if (STACK_REG_P (cmp_op1)
15346 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15347 && REGNO (cmp_op1) != FIRST_STACK_REG)
15349 /* If both the top of the 387 stack dies, and the other operand
15350 is also a stack register that dies, then this must be a
15351 `fcompp' float compare */
15355 /* There is no double popping fcomi variant. Fortunately,
15356 eflags is immune from the fstp's cc clobbering. */
15358 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15360 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15361 return output_387_ffreep (operands, 0);
15366 return "fucompp\n\tfnstsw\t%0";
15368 return "fcompp\n\tfnstsw\t%0";
15373 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15375 static const char * const alt[16] =
15377 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15378 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15379 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15380 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15382 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15383 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15387 "fcomi\t{%y1, %0|%0, %y1}",
15388 "fcomip\t{%y1, %0|%0, %y1}",
15389 "fucomi\t{%y1, %0|%0, %y1}",
15390 "fucomip\t{%y1, %0|%0, %y1}",
15401 mask = eflags_p << 3;
15402 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15403 mask |= unordered_p << 1;
15404 mask |= stack_top_dies;
15406 gcc_assert (mask < 16);
15415 ix86_output_addr_vec_elt (FILE *file, int value)
15417 const char *directive = ASM_LONG;
15421 directive = ASM_QUAD;
15423 gcc_assert (!TARGET_64BIT);
15426 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15430 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15432 const char *directive = ASM_LONG;
15435 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15436 directive = ASM_QUAD;
15438 gcc_assert (!TARGET_64BIT);
15440 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15441 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15442 fprintf (file, "%s%s%d-%s%d\n",
15443 directive, LPREFIX, value, LPREFIX, rel);
15444 else if (HAVE_AS_GOTOFF_IN_DATA)
15445 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15447 else if (TARGET_MACHO)
15449 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15450 machopic_output_function_base_name (file);
15455 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15456 GOT_SYMBOL_NAME, LPREFIX, value);
15459 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15463 ix86_expand_clear (rtx dest)
15467 /* We play register width games, which are only valid after reload. */
15468 gcc_assert (reload_completed);
15470 /* Avoid HImode and its attendant prefix byte. */
15471 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15472 dest = gen_rtx_REG (SImode, REGNO (dest));
15473 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15475 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15476 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15478 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15479 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15485 /* X is an unchanging MEM. If it is a constant pool reference, return
15486 the constant pool rtx, else NULL. */
15489 maybe_get_pool_constant (rtx x)
15491 x = ix86_delegitimize_address (XEXP (x, 0));
15493 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15494 return get_pool_constant (x);
15500 ix86_expand_move (enum machine_mode mode, rtx operands[])
15503 enum tls_model model;
15508 if (GET_CODE (op1) == SYMBOL_REF)
15510 model = SYMBOL_REF_TLS_MODEL (op1);
15513 op1 = legitimize_tls_address (op1, model, true);
15514 op1 = force_operand (op1, op0);
15517 if (GET_MODE (op1) != mode)
15518 op1 = convert_to_mode (mode, op1, 1);
15520 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15521 && SYMBOL_REF_DLLIMPORT_P (op1))
15522 op1 = legitimize_dllimport_symbol (op1, false);
15524 else if (GET_CODE (op1) == CONST
15525 && GET_CODE (XEXP (op1, 0)) == PLUS
15526 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15528 rtx addend = XEXP (XEXP (op1, 0), 1);
15529 rtx symbol = XEXP (XEXP (op1, 0), 0);
15532 model = SYMBOL_REF_TLS_MODEL (symbol);
15534 tmp = legitimize_tls_address (symbol, model, true);
15535 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15536 && SYMBOL_REF_DLLIMPORT_P (symbol))
15537 tmp = legitimize_dllimport_symbol (symbol, true);
15541 tmp = force_operand (tmp, NULL);
15542 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15543 op0, 1, OPTAB_DIRECT);
15546 if (GET_MODE (tmp) != mode)
15547 op1 = convert_to_mode (mode, tmp, 1);
15551 if ((flag_pic || MACHOPIC_INDIRECT)
15552 && symbolic_operand (op1, mode))
15554 if (TARGET_MACHO && !TARGET_64BIT)
15557 /* dynamic-no-pic */
15558 if (MACHOPIC_INDIRECT)
15560 rtx temp = ((reload_in_progress
15561 || ((op0 && REG_P (op0))
15563 ? op0 : gen_reg_rtx (Pmode));
15564 op1 = machopic_indirect_data_reference (op1, temp);
15566 op1 = machopic_legitimize_pic_address (op1, mode,
15567 temp == op1 ? 0 : temp);
15569 if (op0 != op1 && GET_CODE (op0) != MEM)
15571 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15575 if (GET_CODE (op0) == MEM)
15576 op1 = force_reg (Pmode, op1);
15580 if (GET_CODE (temp) != REG)
15581 temp = gen_reg_rtx (Pmode);
15582 temp = legitimize_pic_address (op1, temp);
15587 /* dynamic-no-pic */
15593 op1 = force_reg (mode, op1);
15594 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15596 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15597 op1 = legitimize_pic_address (op1, reg);
15600 if (GET_MODE (op1) != mode)
15601 op1 = convert_to_mode (mode, op1, 1);
15608 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15609 || !push_operand (op0, mode))
15611 op1 = force_reg (mode, op1);
15613 if (push_operand (op0, mode)
15614 && ! general_no_elim_operand (op1, mode))
15615 op1 = copy_to_mode_reg (mode, op1);
15617 /* Force large constants in 64bit compilation into register
15618 to get them CSEed. */
15619 if (can_create_pseudo_p ()
15620 && (mode == DImode) && TARGET_64BIT
15621 && immediate_operand (op1, mode)
15622 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15623 && !register_operand (op0, mode)
15625 op1 = copy_to_mode_reg (mode, op1);
15627 if (can_create_pseudo_p ()
15628 && FLOAT_MODE_P (mode)
15629 && GET_CODE (op1) == CONST_DOUBLE)
15631 /* If we are loading a floating point constant to a register,
15632 force the value to memory now, since we'll get better code
15633 out the back end. */
15635 op1 = validize_mem (force_const_mem (mode, op1));
15636 if (!register_operand (op0, mode))
15638 rtx temp = gen_reg_rtx (mode);
15639 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15640 emit_move_insn (op0, temp);
15646 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15650 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15652 rtx op0 = operands[0], op1 = operands[1];
15653 unsigned int align = GET_MODE_ALIGNMENT (mode);
15655 /* Force constants other than zero into memory. We do not know how
15656 the instructions used to build constants modify the upper 64 bits
15657 of the register, once we have that information we may be able
15658 to handle some of them more efficiently. */
15659 if (can_create_pseudo_p ()
15660 && register_operand (op0, mode)
15661 && (CONSTANT_P (op1)
15662 || (GET_CODE (op1) == SUBREG
15663 && CONSTANT_P (SUBREG_REG (op1))))
15664 && !standard_sse_constant_p (op1))
15665 op1 = validize_mem (force_const_mem (mode, op1));
15667 /* We need to check memory alignment for SSE mode since attribute
15668 can make operands unaligned. */
15669 if (can_create_pseudo_p ()
15670 && SSE_REG_MODE_P (mode)
15671 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15672 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15676 /* ix86_expand_vector_move_misalign() does not like constants ... */
15677 if (CONSTANT_P (op1)
15678 || (GET_CODE (op1) == SUBREG
15679 && CONSTANT_P (SUBREG_REG (op1))))
15680 op1 = validize_mem (force_const_mem (mode, op1));
15682 /* ... nor both arguments in memory. */
15683 if (!register_operand (op0, mode)
15684 && !register_operand (op1, mode))
15685 op1 = force_reg (mode, op1);
15687 tmp[0] = op0; tmp[1] = op1;
15688 ix86_expand_vector_move_misalign (mode, tmp);
15692 /* Make operand1 a register if it isn't already. */
15693 if (can_create_pseudo_p ()
15694 && !register_operand (op0, mode)
15695 && !register_operand (op1, mode))
15697 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15701 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15704 /* Split 32-byte AVX unaligned load and store if needed. */
15707 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15710 rtx (*extract) (rtx, rtx, rtx);
15711 rtx (*move_unaligned) (rtx, rtx);
15712 enum machine_mode mode;
15714 switch (GET_MODE (op0))
15717 gcc_unreachable ();
15719 extract = gen_avx_vextractf128v32qi;
15720 move_unaligned = gen_avx_movdqu256;
15724 extract = gen_avx_vextractf128v8sf;
15725 move_unaligned = gen_avx_movups256;
15729 extract = gen_avx_vextractf128v4df;
15730 move_unaligned = gen_avx_movupd256;
15735 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15737 rtx r = gen_reg_rtx (mode);
15738 m = adjust_address (op1, mode, 0);
15739 emit_move_insn (r, m);
15740 m = adjust_address (op1, mode, 16);
15741 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15742 emit_move_insn (op0, r);
15744 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15746 m = adjust_address (op0, mode, 0);
15747 emit_insn (extract (m, op1, const0_rtx));
15748 m = adjust_address (op0, mode, 16);
15749 emit_insn (extract (m, op1, const1_rtx));
15752 emit_insn (move_unaligned (op0, op1));
15755 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15756 straight to ix86_expand_vector_move. */
15757 /* Code generation for scalar reg-reg moves of single and double precision data:
15758 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15762 if (x86_sse_partial_reg_dependency == true)
15767 Code generation for scalar loads of double precision data:
15768 if (x86_sse_split_regs == true)
15769 movlpd mem, reg (gas syntax)
15773 Code generation for unaligned packed loads of single precision data
15774 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15775 if (x86_sse_unaligned_move_optimal)
15778 if (x86_sse_partial_reg_dependency == true)
15790 Code generation for unaligned packed loads of double precision data
15791 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15792 if (x86_sse_unaligned_move_optimal)
15795 if (x86_sse_split_regs == true)
15808 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15817 switch (GET_MODE_CLASS (mode))
15819 case MODE_VECTOR_INT:
15821 switch (GET_MODE_SIZE (mode))
15824 /* If we're optimizing for size, movups is the smallest. */
15825 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15827 op0 = gen_lowpart (V4SFmode, op0);
15828 op1 = gen_lowpart (V4SFmode, op1);
15829 emit_insn (gen_sse_movups (op0, op1));
15832 op0 = gen_lowpart (V16QImode, op0);
15833 op1 = gen_lowpart (V16QImode, op1);
15834 emit_insn (gen_sse2_movdqu (op0, op1));
15837 op0 = gen_lowpart (V32QImode, op0);
15838 op1 = gen_lowpart (V32QImode, op1);
15839 ix86_avx256_split_vector_move_misalign (op0, op1);
15842 gcc_unreachable ();
15845 case MODE_VECTOR_FLOAT:
15846 op0 = gen_lowpart (mode, op0);
15847 op1 = gen_lowpart (mode, op1);
15852 emit_insn (gen_sse_movups (op0, op1));
15855 ix86_avx256_split_vector_move_misalign (op0, op1);
15858 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15860 op0 = gen_lowpart (V4SFmode, op0);
15861 op1 = gen_lowpart (V4SFmode, op1);
15862 emit_insn (gen_sse_movups (op0, op1));
15865 emit_insn (gen_sse2_movupd (op0, op1));
15868 ix86_avx256_split_vector_move_misalign (op0, op1);
15871 gcc_unreachable ();
15876 gcc_unreachable ();
15884 /* If we're optimizing for size, movups is the smallest. */
15885 if (optimize_insn_for_size_p ()
15886 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15888 op0 = gen_lowpart (V4SFmode, op0);
15889 op1 = gen_lowpart (V4SFmode, op1);
15890 emit_insn (gen_sse_movups (op0, op1));
15894 /* ??? If we have typed data, then it would appear that using
15895 movdqu is the only way to get unaligned data loaded with
15897 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15899 op0 = gen_lowpart (V16QImode, op0);
15900 op1 = gen_lowpart (V16QImode, op1);
15901 emit_insn (gen_sse2_movdqu (op0, op1));
15905 if (TARGET_SSE2 && mode == V2DFmode)
15909 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15911 op0 = gen_lowpart (V2DFmode, op0);
15912 op1 = gen_lowpart (V2DFmode, op1);
15913 emit_insn (gen_sse2_movupd (op0, op1));
15917 /* When SSE registers are split into halves, we can avoid
15918 writing to the top half twice. */
15919 if (TARGET_SSE_SPLIT_REGS)
15921 emit_clobber (op0);
15926 /* ??? Not sure about the best option for the Intel chips.
15927 The following would seem to satisfy; the register is
15928 entirely cleared, breaking the dependency chain. We
15929 then store to the upper half, with a dependency depth
15930 of one. A rumor has it that Intel recommends two movsd
15931 followed by an unpacklpd, but this is unconfirmed. And
15932 given that the dependency depth of the unpacklpd would
15933 still be one, I'm not sure why this would be better. */
15934 zero = CONST0_RTX (V2DFmode);
15937 m = adjust_address (op1, DFmode, 0);
15938 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15939 m = adjust_address (op1, DFmode, 8);
15940 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15944 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15946 op0 = gen_lowpart (V4SFmode, op0);
15947 op1 = gen_lowpart (V4SFmode, op1);
15948 emit_insn (gen_sse_movups (op0, op1));
15952 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15953 emit_move_insn (op0, CONST0_RTX (mode));
15955 emit_clobber (op0);
15957 if (mode != V4SFmode)
15958 op0 = gen_lowpart (V4SFmode, op0);
15959 m = adjust_address (op1, V2SFmode, 0);
15960 emit_insn (gen_sse_loadlps (op0, op0, m));
15961 m = adjust_address (op1, V2SFmode, 8);
15962 emit_insn (gen_sse_loadhps (op0, op0, m));
15965 else if (MEM_P (op0))
15967 /* If we're optimizing for size, movups is the smallest. */
15968 if (optimize_insn_for_size_p ()
15969 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15971 op0 = gen_lowpart (V4SFmode, op0);
15972 op1 = gen_lowpart (V4SFmode, op1);
15973 emit_insn (gen_sse_movups (op0, op1));
15977 /* ??? Similar to above, only less clear because of quote
15978 typeless stores unquote. */
15979 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15980 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15982 op0 = gen_lowpart (V16QImode, op0);
15983 op1 = gen_lowpart (V16QImode, op1);
15984 emit_insn (gen_sse2_movdqu (op0, op1));
15988 if (TARGET_SSE2 && mode == V2DFmode)
15990 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15992 op0 = gen_lowpart (V2DFmode, op0);
15993 op1 = gen_lowpart (V2DFmode, op1);
15994 emit_insn (gen_sse2_movupd (op0, op1));
15998 m = adjust_address (op0, DFmode, 0);
15999 emit_insn (gen_sse2_storelpd (m, op1));
16000 m = adjust_address (op0, DFmode, 8);
16001 emit_insn (gen_sse2_storehpd (m, op1));
16006 if (mode != V4SFmode)
16007 op1 = gen_lowpart (V4SFmode, op1);
16009 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16011 op0 = gen_lowpart (V4SFmode, op0);
16012 emit_insn (gen_sse_movups (op0, op1));
16016 m = adjust_address (op0, V2SFmode, 0);
16017 emit_insn (gen_sse_storelps (m, op1));
16018 m = adjust_address (op0, V2SFmode, 8);
16019 emit_insn (gen_sse_storehps (m, op1));
16024 gcc_unreachable ();
16027 /* Expand a push in MODE. This is some mode for which we do not support
16028 proper push instructions, at least from the registers that we expect
16029 the value to live in. */
16032 ix86_expand_push (enum machine_mode mode, rtx x)
16036 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16037 GEN_INT (-GET_MODE_SIZE (mode)),
16038 stack_pointer_rtx, 1, OPTAB_DIRECT);
16039 if (tmp != stack_pointer_rtx)
16040 emit_move_insn (stack_pointer_rtx, tmp);
16042 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16044 /* When we push an operand onto stack, it has to be aligned at least
16045 at the function argument boundary. However since we don't have
16046 the argument type, we can't determine the actual argument
16048 emit_move_insn (tmp, x);
16051 /* Helper function of ix86_fixup_binary_operands to canonicalize
16052 operand order. Returns true if the operands should be swapped. */
16055 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16058 rtx dst = operands[0];
16059 rtx src1 = operands[1];
16060 rtx src2 = operands[2];
16062 /* If the operation is not commutative, we can't do anything. */
16063 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16066 /* Highest priority is that src1 should match dst. */
16067 if (rtx_equal_p (dst, src1))
16069 if (rtx_equal_p (dst, src2))
16072 /* Next highest priority is that immediate constants come second. */
16073 if (immediate_operand (src2, mode))
16075 if (immediate_operand (src1, mode))
16078 /* Lowest priority is that memory references should come second. */
16088 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16089 destination to use for the operation. If different from the true
16090 destination in operands[0], a copy operation will be required. */
16093 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16096 rtx dst = operands[0];
16097 rtx src1 = operands[1];
16098 rtx src2 = operands[2];
16100 /* Canonicalize operand order. */
16101 if (ix86_swap_binary_operands_p (code, mode, operands))
16105 /* It is invalid to swap operands of different modes. */
16106 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16113 /* Both source operands cannot be in memory. */
16114 if (MEM_P (src1) && MEM_P (src2))
16116 /* Optimization: Only read from memory once. */
16117 if (rtx_equal_p (src1, src2))
16119 src2 = force_reg (mode, src2);
16123 src2 = force_reg (mode, src2);
16126 /* If the destination is memory, and we do not have matching source
16127 operands, do things in registers. */
16128 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16129 dst = gen_reg_rtx (mode);
16131 /* Source 1 cannot be a constant. */
16132 if (CONSTANT_P (src1))
16133 src1 = force_reg (mode, src1);
16135 /* Source 1 cannot be a non-matching memory. */
16136 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16137 src1 = force_reg (mode, src1);
16139 /* Improve address combine. */
16141 && GET_MODE_CLASS (mode) == MODE_INT
16143 src2 = force_reg (mode, src2);
16145 operands[1] = src1;
16146 operands[2] = src2;
16150 /* Similarly, but assume that the destination has already been
16151 set up properly. */
16154 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16155 enum machine_mode mode, rtx operands[])
16157 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16158 gcc_assert (dst == operands[0]);
16161 /* Attempt to expand a binary operator. Make the expansion closer to the
16162 actual machine, then just general_operand, which will allow 3 separate
16163 memory references (one output, two input) in a single insn. */
16166 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16169 rtx src1, src2, dst, op, clob;
16171 dst = ix86_fixup_binary_operands (code, mode, operands);
16172 src1 = operands[1];
16173 src2 = operands[2];
16175 /* Emit the instruction. */
16177 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16178 if (reload_in_progress)
16180 /* Reload doesn't know about the flags register, and doesn't know that
16181 it doesn't want to clobber it. We can only do this with PLUS. */
16182 gcc_assert (code == PLUS);
16185 else if (reload_completed
16187 && !rtx_equal_p (dst, src1))
16189 /* This is going to be an LEA; avoid splitting it later. */
16194 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16195 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16198 /* Fix up the destination if needed. */
16199 if (dst != operands[0])
16200 emit_move_insn (operands[0], dst);
16203 /* Return TRUE or FALSE depending on whether the binary operator meets the
16204 appropriate constraints. */
16207 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16210 rtx dst = operands[0];
16211 rtx src1 = operands[1];
16212 rtx src2 = operands[2];
16214 /* Both source operands cannot be in memory. */
16215 if (MEM_P (src1) && MEM_P (src2))
16218 /* Canonicalize operand order for commutative operators. */
16219 if (ix86_swap_binary_operands_p (code, mode, operands))
16226 /* If the destination is memory, we must have a matching source operand. */
16227 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16230 /* Source 1 cannot be a constant. */
16231 if (CONSTANT_P (src1))
16234 /* Source 1 cannot be a non-matching memory. */
16235 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16236 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16237 return (code == AND
16240 || (TARGET_64BIT && mode == DImode))
16241 && satisfies_constraint_L (src2));
16246 /* Attempt to expand a unary operator. Make the expansion closer to the
16247 actual machine, then just general_operand, which will allow 2 separate
16248 memory references (one output, one input) in a single insn. */
16251 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16254 int matching_memory;
16255 rtx src, dst, op, clob;
16260 /* If the destination is memory, and we do not have matching source
16261 operands, do things in registers. */
16262 matching_memory = 0;
16265 if (rtx_equal_p (dst, src))
16266 matching_memory = 1;
16268 dst = gen_reg_rtx (mode);
16271 /* When source operand is memory, destination must match. */
16272 if (MEM_P (src) && !matching_memory)
16273 src = force_reg (mode, src);
16275 /* Emit the instruction. */
16277 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16278 if (reload_in_progress || code == NOT)
16280 /* Reload doesn't know about the flags register, and doesn't know that
16281 it doesn't want to clobber it. */
16282 gcc_assert (code == NOT);
16287 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16288 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16291 /* Fix up the destination if needed. */
16292 if (dst != operands[0])
16293 emit_move_insn (operands[0], dst);
16296 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16297 divisor are within the range [0-255]. */
16300 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16303 rtx end_label, qimode_label;
16304 rtx insn, div, mod;
16305 rtx scratch, tmp0, tmp1, tmp2;
16306 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16307 rtx (*gen_zero_extend) (rtx, rtx);
16308 rtx (*gen_test_ccno_1) (rtx, rtx);
16313 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16314 gen_test_ccno_1 = gen_testsi_ccno_1;
16315 gen_zero_extend = gen_zero_extendqisi2;
16318 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16319 gen_test_ccno_1 = gen_testdi_ccno_1;
16320 gen_zero_extend = gen_zero_extendqidi2;
16323 gcc_unreachable ();
16326 end_label = gen_label_rtx ();
16327 qimode_label = gen_label_rtx ();
16329 scratch = gen_reg_rtx (mode);
16331 /* Use 8bit unsigned divimod if dividend and divisor are within
16332 the range [0-255]. */
16333 emit_move_insn (scratch, operands[2]);
16334 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16335 scratch, 1, OPTAB_DIRECT);
16336 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16337 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16338 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16339 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16340 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16342 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16343 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16344 JUMP_LABEL (insn) = qimode_label;
16346 /* Generate original signed/unsigned divimod. */
16347 div = gen_divmod4_1 (operands[0], operands[1],
16348 operands[2], operands[3]);
16351 /* Branch to the end. */
16352 emit_jump_insn (gen_jump (end_label));
16355 /* Generate 8bit unsigned divide. */
16356 emit_label (qimode_label);
16357 /* Don't use operands[0] for result of 8bit divide since not all
16358 registers support QImode ZERO_EXTRACT. */
16359 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16360 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16361 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16362 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16366 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16367 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16371 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16372 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16375 /* Extract remainder from AH. */
16376 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16377 if (REG_P (operands[1]))
16378 insn = emit_move_insn (operands[1], tmp1);
16381 /* Need a new scratch register since the old one has result
16383 scratch = gen_reg_rtx (mode);
16384 emit_move_insn (scratch, tmp1);
16385 insn = emit_move_insn (operands[1], scratch);
16387 set_unique_reg_note (insn, REG_EQUAL, mod);
16389 /* Zero extend quotient from AL. */
16390 tmp1 = gen_lowpart (QImode, tmp0);
16391 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16392 set_unique_reg_note (insn, REG_EQUAL, div);
16394 emit_label (end_label);
16397 #define LEA_MAX_STALL (3)
16398 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16400 /* Increase given DISTANCE in half-cycles according to
16401 dependencies between PREV and NEXT instructions.
16402 Add 1 half-cycle if there is no dependency and
16403 go to next cycle if there is some dependecy. */
16405 static unsigned int
16406 increase_distance (rtx prev, rtx next, unsigned int distance)
16411 if (!prev || !next)
16412 return distance + (distance & 1) + 2;
16414 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16415 return distance + 1;
16417 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16418 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16419 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16420 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16421 return distance + (distance & 1) + 2;
16423 return distance + 1;
16426 /* Function checks if instruction INSN defines register number
16427 REGNO1 or REGNO2. */
16430 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16435 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16436 if (DF_REF_REG_DEF_P (*def_rec)
16437 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16438 && (regno1 == DF_REF_REGNO (*def_rec)
16439 || regno2 == DF_REF_REGNO (*def_rec)))
16447 /* Function checks if instruction INSN uses register number
16448 REGNO as a part of address expression. */
16451 insn_uses_reg_mem (unsigned int regno, rtx insn)
16455 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16456 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16462 /* Search backward for non-agu definition of register number REGNO1
16463 or register number REGNO2 in basic block starting from instruction
16464 START up to head of basic block or instruction INSN.
16466 Function puts true value into *FOUND var if definition was found
16467 and false otherwise.
16469 Distance in half-cycles between START and found instruction or head
16470 of BB is added to DISTANCE and returned. */
16473 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16474 rtx insn, int distance,
16475 rtx start, bool *found)
16477 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16485 && distance < LEA_SEARCH_THRESHOLD)
16487 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16489 distance = increase_distance (prev, next, distance);
16490 if (insn_defines_reg (regno1, regno2, prev))
16492 if (recog_memoized (prev) < 0
16493 || get_attr_type (prev) != TYPE_LEA)
16502 if (prev == BB_HEAD (bb))
16505 prev = PREV_INSN (prev);
16511 /* Search backward for non-agu definition of register number REGNO1
16512 or register number REGNO2 in INSN's basic block until
16513 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16514 2. Reach neighbour BBs boundary, or
16515 3. Reach agu definition.
16516 Returns the distance between the non-agu definition point and INSN.
16517 If no definition point, returns -1. */
16520 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16523 basic_block bb = BLOCK_FOR_INSN (insn);
16525 bool found = false;
16527 if (insn != BB_HEAD (bb))
16528 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16529 distance, PREV_INSN (insn),
16532 if (!found && distance < LEA_SEARCH_THRESHOLD)
16536 bool simple_loop = false;
16538 FOR_EACH_EDGE (e, ei, bb->preds)
16541 simple_loop = true;
16546 distance = distance_non_agu_define_in_bb (regno1, regno2,
16548 BB_END (bb), &found);
16551 int shortest_dist = -1;
16552 bool found_in_bb = false;
16554 FOR_EACH_EDGE (e, ei, bb->preds)
16557 = distance_non_agu_define_in_bb (regno1, regno2,
16563 if (shortest_dist < 0)
16564 shortest_dist = bb_dist;
16565 else if (bb_dist > 0)
16566 shortest_dist = MIN (bb_dist, shortest_dist);
16572 distance = shortest_dist;
16576 /* get_attr_type may modify recog data. We want to make sure
16577 that recog data is valid for instruction INSN, on which
16578 distance_non_agu_define is called. INSN is unchanged here. */
16579 extract_insn_cached (insn);
16584 return distance >> 1;
16587 /* Return the distance in half-cycles between INSN and the next
16588 insn that uses register number REGNO in memory address added
16589 to DISTANCE. Return -1 if REGNO0 is set.
16591 Put true value into *FOUND if register usage was found and
16593 Put true value into *REDEFINED if register redefinition was
16594 found and false otherwise. */
16597 distance_agu_use_in_bb (unsigned int regno,
16598 rtx insn, int distance, rtx start,
16599 bool *found, bool *redefined)
16601 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16606 *redefined = false;
16610 && distance < LEA_SEARCH_THRESHOLD)
16612 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16614 distance = increase_distance(prev, next, distance);
16615 if (insn_uses_reg_mem (regno, next))
16617 /* Return DISTANCE if OP0 is used in memory
16618 address in NEXT. */
16623 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16625 /* Return -1 if OP0 is set in NEXT. */
16633 if (next == BB_END (bb))
16636 next = NEXT_INSN (next);
16642 /* Return the distance between INSN and the next insn that uses
16643 register number REGNO0 in memory address. Return -1 if no such
16644 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16647 distance_agu_use (unsigned int regno0, rtx insn)
16649 basic_block bb = BLOCK_FOR_INSN (insn);
16651 bool found = false;
16652 bool redefined = false;
16654 if (insn != BB_END (bb))
16655 distance = distance_agu_use_in_bb (regno0, insn, distance,
16657 &found, &redefined);
16659 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16663 bool simple_loop = false;
16665 FOR_EACH_EDGE (e, ei, bb->succs)
16668 simple_loop = true;
16673 distance = distance_agu_use_in_bb (regno0, insn,
16674 distance, BB_HEAD (bb),
16675 &found, &redefined);
16678 int shortest_dist = -1;
16679 bool found_in_bb = false;
16680 bool redefined_in_bb = false;
16682 FOR_EACH_EDGE (e, ei, bb->succs)
16685 = distance_agu_use_in_bb (regno0, insn,
16686 distance, BB_HEAD (e->dest),
16687 &found_in_bb, &redefined_in_bb);
16690 if (shortest_dist < 0)
16691 shortest_dist = bb_dist;
16692 else if (bb_dist > 0)
16693 shortest_dist = MIN (bb_dist, shortest_dist);
16699 distance = shortest_dist;
16703 if (!found || redefined)
16706 return distance >> 1;
16709 /* Define this macro to tune LEA priority vs ADD, it take effect when
16710 there is a dilemma of choicing LEA or ADD
16711 Negative value: ADD is more preferred than LEA
16713 Positive value: LEA is more preferred than ADD*/
16714 #define IX86_LEA_PRIORITY 0
16716 /* Return true if usage of lea INSN has performance advantage
16717 over a sequence of instructions. Instructions sequence has
16718 SPLIT_COST cycles higher latency than lea latency. */
16721 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16722 unsigned int regno2, int split_cost)
16724 int dist_define, dist_use;
16726 dist_define = distance_non_agu_define (regno1, regno2, insn);
16727 dist_use = distance_agu_use (regno0, insn);
16729 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16731 /* If there is no non AGU operand definition, no AGU
16732 operand usage and split cost is 0 then both lea
16733 and non lea variants have same priority. Currently
16734 we prefer lea for 64 bit code and non lea on 32 bit
16736 if (dist_use < 0 && split_cost == 0)
16737 return TARGET_64BIT || IX86_LEA_PRIORITY;
16742 /* With longer definitions distance lea is more preferable.
16743 Here we change it to take into account splitting cost and
16745 dist_define += split_cost + IX86_LEA_PRIORITY;
16747 /* If there is no use in memory addess then we just check
16748 that split cost does not exceed AGU stall. */
16750 return dist_define >= LEA_MAX_STALL;
16752 /* If this insn has both backward non-agu dependence and forward
16753 agu dependence, the one with short distance takes effect. */
16754 return dist_define >= dist_use;
16757 /* Return true if it is legal to clobber flags by INSN and
16758 false otherwise. */
16761 ix86_ok_to_clobber_flags (rtx insn)
16763 basic_block bb = BLOCK_FOR_INSN (insn);
16769 if (NONDEBUG_INSN_P (insn))
16771 for (use = DF_INSN_USES (insn); *use; use++)
16772 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16775 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16779 if (insn == BB_END (bb))
16782 insn = NEXT_INSN (insn);
16785 live = df_get_live_out(bb);
16786 return !REGNO_REG_SET_P (live, FLAGS_REG);
16789 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16790 move and add to avoid AGU stalls. */
16793 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16795 unsigned int regno0 = true_regnum (operands[0]);
16796 unsigned int regno1 = true_regnum (operands[1]);
16797 unsigned int regno2 = true_regnum (operands[2]);
16799 /* Check if we need to optimize. */
16800 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16803 /* Check it is correct to split here. */
16804 if (!ix86_ok_to_clobber_flags(insn))
16807 /* We need to split only adds with non destructive
16808 destination operand. */
16809 if (regno0 == regno1 || regno0 == regno2)
16812 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16815 /* Return true if we should emit lea instruction instead of mov
16819 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16821 unsigned int regno0;
16822 unsigned int regno1;
16824 /* Check if we need to optimize. */
16825 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16828 /* Use lea for reg to reg moves only. */
16829 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16832 regno0 = true_regnum (operands[0]);
16833 regno1 = true_regnum (operands[1]);
16835 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
16838 /* Return true if we need to split lea into a sequence of
16839 instructions to avoid AGU stalls. */
16842 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16844 unsigned int regno0 = true_regnum (operands[0]) ;
16845 unsigned int regno1 = INVALID_REGNUM;
16846 unsigned int regno2 = INVALID_REGNUM;
16847 int split_cost = 0;
16848 struct ix86_address parts;
16851 /* FIXME: Handle zero-extended addresses. */
16852 if (GET_CODE (operands[1]) == ZERO_EXTEND
16853 || GET_CODE (operands[1]) == AND)
16856 /* Check we need to optimize. */
16857 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16860 /* Check it is correct to split here. */
16861 if (!ix86_ok_to_clobber_flags(insn))
16864 ok = ix86_decompose_address (operands[1], &parts);
16867 /* There should be at least two components in the address. */
16868 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16869 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16872 /* We should not split into add if non legitimate pic
16873 operand is used as displacement. */
16874 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16878 regno1 = true_regnum (parts.base);
16880 regno2 = true_regnum (parts.index);
16882 /* Compute how many cycles we will add to execution time
16883 if split lea into a sequence of instructions. */
16884 if (parts.base || parts.index)
16886 /* Have to use mov instruction if non desctructive
16887 destination form is used. */
16888 if (regno1 != regno0 && regno2 != regno0)
16891 /* Have to add index to base if both exist. */
16892 if (parts.base && parts.index)
16895 /* Have to use shift and adds if scale is 2 or greater. */
16896 if (parts.scale > 1)
16898 if (regno0 != regno1)
16900 else if (regno2 == regno0)
16903 split_cost += parts.scale;
16906 /* Have to use add instruction with immediate if
16907 disp is non zero. */
16908 if (parts.disp && parts.disp != const0_rtx)
16911 /* Subtract the price of lea. */
16915 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16918 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16919 matches destination. RTX includes clobber of FLAGS_REG. */
16922 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16927 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16928 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16930 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16933 /* Split lea instructions into a sequence of instructions
16934 which are executed on ALU to avoid AGU stalls.
16935 It is assumed that it is allowed to clobber flags register
16936 at lea position. */
16939 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16941 unsigned int regno0 = true_regnum (operands[0]) ;
16942 unsigned int regno1 = INVALID_REGNUM;
16943 unsigned int regno2 = INVALID_REGNUM;
16944 struct ix86_address parts;
16948 ok = ix86_decompose_address (operands[1], &parts);
16953 if (GET_MODE (parts.base) != mode)
16954 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16955 regno1 = true_regnum (parts.base);
16960 if (GET_MODE (parts.index) != mode)
16961 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16962 regno2 = true_regnum (parts.index);
16965 if (parts.scale > 1)
16967 /* Case r1 = r1 + ... */
16968 if (regno1 == regno0)
16970 /* If we have a case r1 = r1 + C * r1 then we
16971 should use multiplication which is very
16972 expensive. Assume cost model is wrong if we
16973 have such case here. */
16974 gcc_assert (regno2 != regno0);
16976 for (adds = parts.scale; adds > 0; adds--)
16977 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16981 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16982 if (regno0 != regno2)
16983 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16985 /* Use shift for scaling. */
16986 ix86_emit_binop (ASHIFT, mode, operands[0],
16987 GEN_INT (exact_log2 (parts.scale)));
16990 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16992 if (parts.disp && parts.disp != const0_rtx)
16993 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16996 else if (!parts.base && !parts.index)
16998 gcc_assert(parts.disp);
16999 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
17005 if (regno0 != regno2)
17006 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17008 else if (!parts.index)
17010 if (regno0 != regno1)
17011 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17015 if (regno0 == regno1)
17017 else if (regno0 == regno2)
17021 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17025 ix86_emit_binop (PLUS, mode, operands[0], tmp);
17028 if (parts.disp && parts.disp != const0_rtx)
17029 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17033 /* Return true if it is ok to optimize an ADD operation to LEA
17034 operation to avoid flag register consumation. For most processors,
17035 ADD is faster than LEA. For the processors like ATOM, if the
17036 destination register of LEA holds an actual address which will be
17037 used soon, LEA is better and otherwise ADD is better. */
17040 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17042 unsigned int regno0 = true_regnum (operands[0]);
17043 unsigned int regno1 = true_regnum (operands[1]);
17044 unsigned int regno2 = true_regnum (operands[2]);
17046 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17047 if (regno0 != regno1 && regno0 != regno2)
17050 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17053 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17056 /* Return true if destination reg of SET_BODY is shift count of
17060 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17066 /* Retrieve destination of SET_BODY. */
17067 switch (GET_CODE (set_body))
17070 set_dest = SET_DEST (set_body);
17071 if (!set_dest || !REG_P (set_dest))
17075 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17076 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17084 /* Retrieve shift count of USE_BODY. */
17085 switch (GET_CODE (use_body))
17088 shift_rtx = XEXP (use_body, 1);
17091 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17092 if (ix86_dep_by_shift_count_body (set_body,
17093 XVECEXP (use_body, 0, i)))
17101 && (GET_CODE (shift_rtx) == ASHIFT
17102 || GET_CODE (shift_rtx) == LSHIFTRT
17103 || GET_CODE (shift_rtx) == ASHIFTRT
17104 || GET_CODE (shift_rtx) == ROTATE
17105 || GET_CODE (shift_rtx) == ROTATERT))
17107 rtx shift_count = XEXP (shift_rtx, 1);
17109 /* Return true if shift count is dest of SET_BODY. */
17110 if (REG_P (shift_count)
17111 && true_regnum (set_dest) == true_regnum (shift_count))
17118 /* Return true if destination reg of SET_INSN is shift count of
17122 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17124 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17125 PATTERN (use_insn));
17128 /* Return TRUE or FALSE depending on whether the unary operator meets the
17129 appropriate constraints. */
17132 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17133 enum machine_mode mode ATTRIBUTE_UNUSED,
17134 rtx operands[2] ATTRIBUTE_UNUSED)
17136 /* If one of operands is memory, source and destination must match. */
17137 if ((MEM_P (operands[0])
17138 || MEM_P (operands[1]))
17139 && ! rtx_equal_p (operands[0], operands[1]))
17144 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17145 are ok, keeping in mind the possible movddup alternative. */
17148 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17150 if (MEM_P (operands[0]))
17151 return rtx_equal_p (operands[0], operands[1 + high]);
17152 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17153 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17157 /* Post-reload splitter for converting an SF or DFmode value in an
17158 SSE register into an unsigned SImode. */
17161 ix86_split_convert_uns_si_sse (rtx operands[])
17163 enum machine_mode vecmode;
17164 rtx value, large, zero_or_two31, input, two31, x;
17166 large = operands[1];
17167 zero_or_two31 = operands[2];
17168 input = operands[3];
17169 two31 = operands[4];
17170 vecmode = GET_MODE (large);
17171 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17173 /* Load up the value into the low element. We must ensure that the other
17174 elements are valid floats -- zero is the easiest such value. */
17177 if (vecmode == V4SFmode)
17178 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17180 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17184 input = gen_rtx_REG (vecmode, REGNO (input));
17185 emit_move_insn (value, CONST0_RTX (vecmode));
17186 if (vecmode == V4SFmode)
17187 emit_insn (gen_sse_movss (value, value, input));
17189 emit_insn (gen_sse2_movsd (value, value, input));
17192 emit_move_insn (large, two31);
17193 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17195 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17196 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17198 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17199 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17201 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17202 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17204 large = gen_rtx_REG (V4SImode, REGNO (large));
17205 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17207 x = gen_rtx_REG (V4SImode, REGNO (value));
17208 if (vecmode == V4SFmode)
17209 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17211 emit_insn (gen_sse2_cvttpd2dq (x, value));
17214 emit_insn (gen_xorv4si3 (value, value, large));
17217 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17218 Expects the 64-bit DImode to be supplied in a pair of integral
17219 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17220 -mfpmath=sse, !optimize_size only. */
17223 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17225 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17226 rtx int_xmm, fp_xmm;
17227 rtx biases, exponents;
17230 int_xmm = gen_reg_rtx (V4SImode);
17231 if (TARGET_INTER_UNIT_MOVES)
17232 emit_insn (gen_movdi_to_sse (int_xmm, input));
17233 else if (TARGET_SSE_SPLIT_REGS)
17235 emit_clobber (int_xmm);
17236 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17240 x = gen_reg_rtx (V2DImode);
17241 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17242 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17245 x = gen_rtx_CONST_VECTOR (V4SImode,
17246 gen_rtvec (4, GEN_INT (0x43300000UL),
17247 GEN_INT (0x45300000UL),
17248 const0_rtx, const0_rtx));
17249 exponents = validize_mem (force_const_mem (V4SImode, x));
17251 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17252 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17254 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17255 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17256 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17257 (0x1.0p84 + double(fp_value_hi_xmm)).
17258 Note these exponents differ by 32. */
17260 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17262 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17263 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17264 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17265 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17266 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17267 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17268 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17269 biases = validize_mem (force_const_mem (V2DFmode, biases));
17270 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17272 /* Add the upper and lower DFmode values together. */
17274 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17277 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17278 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17279 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17282 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17285 /* Not used, but eases macroization of patterns. */
17287 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17288 rtx input ATTRIBUTE_UNUSED)
17290 gcc_unreachable ();
17293 /* Convert an unsigned SImode value into a DFmode. Only currently used
17294 for SSE, but applicable anywhere. */
17297 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17299 REAL_VALUE_TYPE TWO31r;
17302 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17303 NULL, 1, OPTAB_DIRECT);
17305 fp = gen_reg_rtx (DFmode);
17306 emit_insn (gen_floatsidf2 (fp, x));
17308 real_ldexp (&TWO31r, &dconst1, 31);
17309 x = const_double_from_real_value (TWO31r, DFmode);
17311 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17313 emit_move_insn (target, x);
17316 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17317 32-bit mode; otherwise we have a direct convert instruction. */
17320 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17322 REAL_VALUE_TYPE TWO32r;
17323 rtx fp_lo, fp_hi, x;
17325 fp_lo = gen_reg_rtx (DFmode);
17326 fp_hi = gen_reg_rtx (DFmode);
17328 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17330 real_ldexp (&TWO32r, &dconst1, 32);
17331 x = const_double_from_real_value (TWO32r, DFmode);
17332 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17334 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17336 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17339 emit_move_insn (target, x);
17342 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17343 For x86_32, -mfpmath=sse, !optimize_size only. */
17345 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17347 REAL_VALUE_TYPE ONE16r;
17348 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17350 real_ldexp (&ONE16r, &dconst1, 16);
17351 x = const_double_from_real_value (ONE16r, SFmode);
17352 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17353 NULL, 0, OPTAB_DIRECT);
17354 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17355 NULL, 0, OPTAB_DIRECT);
17356 fp_hi = gen_reg_rtx (SFmode);
17357 fp_lo = gen_reg_rtx (SFmode);
17358 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17359 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17360 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17362 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17364 if (!rtx_equal_p (target, fp_hi))
17365 emit_move_insn (target, fp_hi);
17368 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17369 a vector of unsigned ints VAL to vector of floats TARGET. */
17372 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17375 REAL_VALUE_TYPE TWO16r;
17376 enum machine_mode intmode = GET_MODE (val);
17377 enum machine_mode fltmode = GET_MODE (target);
17378 rtx (*cvt) (rtx, rtx);
17380 if (intmode == V4SImode)
17381 cvt = gen_floatv4siv4sf2;
17383 cvt = gen_floatv8siv8sf2;
17384 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17385 tmp[0] = force_reg (intmode, tmp[0]);
17386 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17388 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17389 NULL_RTX, 1, OPTAB_DIRECT);
17390 tmp[3] = gen_reg_rtx (fltmode);
17391 emit_insn (cvt (tmp[3], tmp[1]));
17392 tmp[4] = gen_reg_rtx (fltmode);
17393 emit_insn (cvt (tmp[4], tmp[2]));
17394 real_ldexp (&TWO16r, &dconst1, 16);
17395 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17396 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17397 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17399 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17401 if (tmp[7] != target)
17402 emit_move_insn (target, tmp[7]);
17405 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17406 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17407 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17408 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17411 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17413 REAL_VALUE_TYPE TWO31r;
17414 rtx two31r, tmp[4];
17415 enum machine_mode mode = GET_MODE (val);
17416 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17417 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17418 rtx (*cmp) (rtx, rtx, rtx, rtx);
17421 for (i = 0; i < 3; i++)
17422 tmp[i] = gen_reg_rtx (mode);
17423 real_ldexp (&TWO31r, &dconst1, 31);
17424 two31r = const_double_from_real_value (TWO31r, scalarmode);
17425 two31r = ix86_build_const_vector (mode, 1, two31r);
17426 two31r = force_reg (mode, two31r);
17429 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17430 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17431 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17432 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17433 default: gcc_unreachable ();
17435 tmp[3] = gen_rtx_LE (mode, two31r, val);
17436 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17437 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17439 if (intmode == V4SImode || TARGET_AVX2)
17440 *xorp = expand_simple_binop (intmode, ASHIFT,
17441 gen_lowpart (intmode, tmp[0]),
17442 GEN_INT (31), NULL_RTX, 0,
17446 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17447 two31 = ix86_build_const_vector (intmode, 1, two31);
17448 *xorp = expand_simple_binop (intmode, AND,
17449 gen_lowpart (intmode, tmp[0]),
17450 two31, NULL_RTX, 0,
17453 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17457 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17458 then replicate the value for all elements of the vector
17462 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17466 enum machine_mode scalar_mode;
17483 n_elt = GET_MODE_NUNITS (mode);
17484 v = rtvec_alloc (n_elt);
17485 scalar_mode = GET_MODE_INNER (mode);
17487 RTVEC_ELT (v, 0) = value;
17489 for (i = 1; i < n_elt; ++i)
17490 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17492 return gen_rtx_CONST_VECTOR (mode, v);
17495 gcc_unreachable ();
17499 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17500 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17501 for an SSE register. If VECT is true, then replicate the mask for
17502 all elements of the vector register. If INVERT is true, then create
17503 a mask excluding the sign bit. */
17506 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17508 enum machine_mode vec_mode, imode;
17509 HOST_WIDE_INT hi, lo;
17514 /* Find the sign bit, sign extended to 2*HWI. */
17522 mode = GET_MODE_INNER (mode);
17524 lo = 0x80000000, hi = lo < 0;
17532 mode = GET_MODE_INNER (mode);
17534 if (HOST_BITS_PER_WIDE_INT >= 64)
17535 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17537 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17542 vec_mode = VOIDmode;
17543 if (HOST_BITS_PER_WIDE_INT >= 64)
17546 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17553 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17557 lo = ~lo, hi = ~hi;
17563 mask = immed_double_const (lo, hi, imode);
17565 vec = gen_rtvec (2, v, mask);
17566 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17567 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17574 gcc_unreachable ();
17578 lo = ~lo, hi = ~hi;
17580 /* Force this value into the low part of a fp vector constant. */
17581 mask = immed_double_const (lo, hi, imode);
17582 mask = gen_lowpart (mode, mask);
17584 if (vec_mode == VOIDmode)
17585 return force_reg (mode, mask);
17587 v = ix86_build_const_vector (vec_mode, vect, mask);
17588 return force_reg (vec_mode, v);
17591 /* Generate code for floating point ABS or NEG. */
17594 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17597 rtx mask, set, dst, src;
17598 bool use_sse = false;
17599 bool vector_mode = VECTOR_MODE_P (mode);
17600 enum machine_mode vmode = mode;
17604 else if (mode == TFmode)
17606 else if (TARGET_SSE_MATH)
17608 use_sse = SSE_FLOAT_MODE_P (mode);
17609 if (mode == SFmode)
17611 else if (mode == DFmode)
17615 /* NEG and ABS performed with SSE use bitwise mask operations.
17616 Create the appropriate mask now. */
17618 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17625 set = gen_rtx_fmt_e (code, mode, src);
17626 set = gen_rtx_SET (VOIDmode, dst, set);
17633 use = gen_rtx_USE (VOIDmode, mask);
17635 par = gen_rtvec (2, set, use);
17638 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17639 par = gen_rtvec (3, set, use, clob);
17641 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17647 /* Expand a copysign operation. Special case operand 0 being a constant. */
17650 ix86_expand_copysign (rtx operands[])
17652 enum machine_mode mode, vmode;
17653 rtx dest, op0, op1, mask, nmask;
17655 dest = operands[0];
17659 mode = GET_MODE (dest);
17661 if (mode == SFmode)
17663 else if (mode == DFmode)
17668 if (GET_CODE (op0) == CONST_DOUBLE)
17670 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17672 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17673 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17675 if (mode == SFmode || mode == DFmode)
17677 if (op0 == CONST0_RTX (mode))
17678 op0 = CONST0_RTX (vmode);
17681 rtx v = ix86_build_const_vector (vmode, false, op0);
17683 op0 = force_reg (vmode, v);
17686 else if (op0 != CONST0_RTX (mode))
17687 op0 = force_reg (mode, op0);
17689 mask = ix86_build_signbit_mask (vmode, 0, 0);
17691 if (mode == SFmode)
17692 copysign_insn = gen_copysignsf3_const;
17693 else if (mode == DFmode)
17694 copysign_insn = gen_copysigndf3_const;
17696 copysign_insn = gen_copysigntf3_const;
17698 emit_insn (copysign_insn (dest, op0, op1, mask));
17702 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17704 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17705 mask = ix86_build_signbit_mask (vmode, 0, 0);
17707 if (mode == SFmode)
17708 copysign_insn = gen_copysignsf3_var;
17709 else if (mode == DFmode)
17710 copysign_insn = gen_copysigndf3_var;
17712 copysign_insn = gen_copysigntf3_var;
17714 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17718 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17719 be a constant, and so has already been expanded into a vector constant. */
17722 ix86_split_copysign_const (rtx operands[])
17724 enum machine_mode mode, vmode;
17725 rtx dest, op0, mask, x;
17727 dest = operands[0];
17729 mask = operands[3];
17731 mode = GET_MODE (dest);
17732 vmode = GET_MODE (mask);
17734 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17735 x = gen_rtx_AND (vmode, dest, mask);
17736 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17738 if (op0 != CONST0_RTX (vmode))
17740 x = gen_rtx_IOR (vmode, dest, op0);
17741 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17745 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17746 so we have to do two masks. */
17749 ix86_split_copysign_var (rtx operands[])
17751 enum machine_mode mode, vmode;
17752 rtx dest, scratch, op0, op1, mask, nmask, x;
17754 dest = operands[0];
17755 scratch = operands[1];
17758 nmask = operands[4];
17759 mask = operands[5];
17761 mode = GET_MODE (dest);
17762 vmode = GET_MODE (mask);
17764 if (rtx_equal_p (op0, op1))
17766 /* Shouldn't happen often (it's useless, obviously), but when it does
17767 we'd generate incorrect code if we continue below. */
17768 emit_move_insn (dest, op0);
17772 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17774 gcc_assert (REGNO (op1) == REGNO (scratch));
17776 x = gen_rtx_AND (vmode, scratch, mask);
17777 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17780 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17781 x = gen_rtx_NOT (vmode, dest);
17782 x = gen_rtx_AND (vmode, x, op0);
17783 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17787 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17789 x = gen_rtx_AND (vmode, scratch, mask);
17791 else /* alternative 2,4 */
17793 gcc_assert (REGNO (mask) == REGNO (scratch));
17794 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17795 x = gen_rtx_AND (vmode, scratch, op1);
17797 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17799 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17801 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17802 x = gen_rtx_AND (vmode, dest, nmask);
17804 else /* alternative 3,4 */
17806 gcc_assert (REGNO (nmask) == REGNO (dest));
17808 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17809 x = gen_rtx_AND (vmode, dest, op0);
17811 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17814 x = gen_rtx_IOR (vmode, dest, scratch);
17815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17818 /* Return TRUE or FALSE depending on whether the first SET in INSN
17819 has source and destination with matching CC modes, and that the
17820 CC mode is at least as constrained as REQ_MODE. */
17823 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17826 enum machine_mode set_mode;
17828 set = PATTERN (insn);
17829 if (GET_CODE (set) == PARALLEL)
17830 set = XVECEXP (set, 0, 0);
17831 gcc_assert (GET_CODE (set) == SET);
17832 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17834 set_mode = GET_MODE (SET_DEST (set));
17838 if (req_mode != CCNOmode
17839 && (req_mode != CCmode
17840 || XEXP (SET_SRC (set), 1) != const0_rtx))
17844 if (req_mode == CCGCmode)
17848 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17852 if (req_mode == CCZmode)
17862 if (set_mode != req_mode)
17867 gcc_unreachable ();
17870 return GET_MODE (SET_SRC (set)) == set_mode;
17873 /* Generate insn patterns to do an integer compare of OPERANDS. */
17876 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17878 enum machine_mode cmpmode;
17881 cmpmode = SELECT_CC_MODE (code, op0, op1);
17882 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17884 /* This is very simple, but making the interface the same as in the
17885 FP case makes the rest of the code easier. */
17886 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17887 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17889 /* Return the test that should be put into the flags user, i.e.
17890 the bcc, scc, or cmov instruction. */
17891 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17894 /* Figure out whether to use ordered or unordered fp comparisons.
17895 Return the appropriate mode to use. */
17898 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17900 /* ??? In order to make all comparisons reversible, we do all comparisons
17901 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17902 all forms trapping and nontrapping comparisons, we can make inequality
17903 comparisons trapping again, since it results in better code when using
17904 FCOM based compares. */
17905 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17909 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17911 enum machine_mode mode = GET_MODE (op0);
17913 if (SCALAR_FLOAT_MODE_P (mode))
17915 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17916 return ix86_fp_compare_mode (code);
17921 /* Only zero flag is needed. */
17922 case EQ: /* ZF=0 */
17923 case NE: /* ZF!=0 */
17925 /* Codes needing carry flag. */
17926 case GEU: /* CF=0 */
17927 case LTU: /* CF=1 */
17928 /* Detect overflow checks. They need just the carry flag. */
17929 if (GET_CODE (op0) == PLUS
17930 && rtx_equal_p (op1, XEXP (op0, 0)))
17934 case GTU: /* CF=0 & ZF=0 */
17935 case LEU: /* CF=1 | ZF=1 */
17936 /* Detect overflow checks. They need just the carry flag. */
17937 if (GET_CODE (op0) == MINUS
17938 && rtx_equal_p (op1, XEXP (op0, 0)))
17942 /* Codes possibly doable only with sign flag when
17943 comparing against zero. */
17944 case GE: /* SF=OF or SF=0 */
17945 case LT: /* SF<>OF or SF=1 */
17946 if (op1 == const0_rtx)
17949 /* For other cases Carry flag is not required. */
17951 /* Codes doable only with sign flag when comparing
17952 against zero, but we miss jump instruction for it
17953 so we need to use relational tests against overflow
17954 that thus needs to be zero. */
17955 case GT: /* ZF=0 & SF=OF */
17956 case LE: /* ZF=1 | SF<>OF */
17957 if (op1 == const0_rtx)
17961 /* strcmp pattern do (use flags) and combine may ask us for proper
17966 gcc_unreachable ();
17970 /* Return the fixed registers used for condition codes. */
17973 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17980 /* If two condition code modes are compatible, return a condition code
17981 mode which is compatible with both. Otherwise, return
17984 static enum machine_mode
17985 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17990 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17993 if ((m1 == CCGCmode && m2 == CCGOCmode)
17994 || (m1 == CCGOCmode && m2 == CCGCmode))
18000 gcc_unreachable ();
18030 /* These are only compatible with themselves, which we already
18037 /* Return a comparison we can do and that it is equivalent to
18038 swap_condition (code) apart possibly from orderedness.
18039 But, never change orderedness if TARGET_IEEE_FP, returning
18040 UNKNOWN in that case if necessary. */
18042 static enum rtx_code
18043 ix86_fp_swap_condition (enum rtx_code code)
18047 case GT: /* GTU - CF=0 & ZF=0 */
18048 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18049 case GE: /* GEU - CF=0 */
18050 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18051 case UNLT: /* LTU - CF=1 */
18052 return TARGET_IEEE_FP ? UNKNOWN : GT;
18053 case UNLE: /* LEU - CF=1 | ZF=1 */
18054 return TARGET_IEEE_FP ? UNKNOWN : GE;
18056 return swap_condition (code);
18060 /* Return cost of comparison CODE using the best strategy for performance.
18061 All following functions do use number of instructions as a cost metrics.
18062 In future this should be tweaked to compute bytes for optimize_size and
18063 take into account performance of various instructions on various CPUs. */
18066 ix86_fp_comparison_cost (enum rtx_code code)
18070 /* The cost of code using bit-twiddling on %ah. */
18087 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18091 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18094 gcc_unreachable ();
18097 switch (ix86_fp_comparison_strategy (code))
18099 case IX86_FPCMP_COMI:
18100 return arith_cost > 4 ? 3 : 2;
18101 case IX86_FPCMP_SAHF:
18102 return arith_cost > 4 ? 4 : 3;
18108 /* Return strategy to use for floating-point. We assume that fcomi is always
18109 preferrable where available, since that is also true when looking at size
18110 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18112 enum ix86_fpcmp_strategy
18113 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18115 /* Do fcomi/sahf based test when profitable. */
18118 return IX86_FPCMP_COMI;
18120 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18121 return IX86_FPCMP_SAHF;
18123 return IX86_FPCMP_ARITH;
18126 /* Swap, force into registers, or otherwise massage the two operands
18127 to a fp comparison. The operands are updated in place; the new
18128 comparison code is returned. */
18130 static enum rtx_code
18131 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18133 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18134 rtx op0 = *pop0, op1 = *pop1;
18135 enum machine_mode op_mode = GET_MODE (op0);
18136 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18138 /* All of the unordered compare instructions only work on registers.
18139 The same is true of the fcomi compare instructions. The XFmode
18140 compare instructions require registers except when comparing
18141 against zero or when converting operand 1 from fixed point to
18145 && (fpcmp_mode == CCFPUmode
18146 || (op_mode == XFmode
18147 && ! (standard_80387_constant_p (op0) == 1
18148 || standard_80387_constant_p (op1) == 1)
18149 && GET_CODE (op1) != FLOAT)
18150 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18152 op0 = force_reg (op_mode, op0);
18153 op1 = force_reg (op_mode, op1);
18157 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18158 things around if they appear profitable, otherwise force op0
18159 into a register. */
18161 if (standard_80387_constant_p (op0) == 0
18163 && ! (standard_80387_constant_p (op1) == 0
18166 enum rtx_code new_code = ix86_fp_swap_condition (code);
18167 if (new_code != UNKNOWN)
18170 tmp = op0, op0 = op1, op1 = tmp;
18176 op0 = force_reg (op_mode, op0);
18178 if (CONSTANT_P (op1))
18180 int tmp = standard_80387_constant_p (op1);
18182 op1 = validize_mem (force_const_mem (op_mode, op1));
18186 op1 = force_reg (op_mode, op1);
18189 op1 = force_reg (op_mode, op1);
18193 /* Try to rearrange the comparison to make it cheaper. */
18194 if (ix86_fp_comparison_cost (code)
18195 > ix86_fp_comparison_cost (swap_condition (code))
18196 && (REG_P (op1) || can_create_pseudo_p ()))
18199 tmp = op0, op0 = op1, op1 = tmp;
18200 code = swap_condition (code);
18202 op0 = force_reg (op_mode, op0);
18210 /* Convert comparison codes we use to represent FP comparison to integer
18211 code that will result in proper branch. Return UNKNOWN if no such code
18215 ix86_fp_compare_code_to_integer (enum rtx_code code)
18244 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18247 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18249 enum machine_mode fpcmp_mode, intcmp_mode;
18252 fpcmp_mode = ix86_fp_compare_mode (code);
18253 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18255 /* Do fcomi/sahf based test when profitable. */
18256 switch (ix86_fp_comparison_strategy (code))
18258 case IX86_FPCMP_COMI:
18259 intcmp_mode = fpcmp_mode;
18260 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18261 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18266 case IX86_FPCMP_SAHF:
18267 intcmp_mode = fpcmp_mode;
18268 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18269 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18273 scratch = gen_reg_rtx (HImode);
18274 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18275 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18278 case IX86_FPCMP_ARITH:
18279 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18280 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18281 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18283 scratch = gen_reg_rtx (HImode);
18284 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18286 /* In the unordered case, we have to check C2 for NaN's, which
18287 doesn't happen to work out to anything nice combination-wise.
18288 So do some bit twiddling on the value we've got in AH to come
18289 up with an appropriate set of condition codes. */
18291 intcmp_mode = CCNOmode;
18296 if (code == GT || !TARGET_IEEE_FP)
18298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18304 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18305 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18306 intcmp_mode = CCmode;
18312 if (code == LT && TARGET_IEEE_FP)
18314 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18315 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18316 intcmp_mode = CCmode;
18321 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18327 if (code == GE || !TARGET_IEEE_FP)
18329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18334 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18335 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18341 if (code == LE && TARGET_IEEE_FP)
18343 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18344 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18345 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18346 intcmp_mode = CCmode;
18351 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18357 if (code == EQ && TARGET_IEEE_FP)
18359 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18360 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18361 intcmp_mode = CCmode;
18366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18372 if (code == NE && TARGET_IEEE_FP)
18374 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18375 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18381 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18387 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18391 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18396 gcc_unreachable ();
18404 /* Return the test that should be put into the flags user, i.e.
18405 the bcc, scc, or cmov instruction. */
18406 return gen_rtx_fmt_ee (code, VOIDmode,
18407 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18412 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18416 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18417 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18419 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18421 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18422 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18425 ret = ix86_expand_int_compare (code, op0, op1);
18431 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18433 enum machine_mode mode = GET_MODE (op0);
18445 tmp = ix86_expand_compare (code, op0, op1);
18446 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18447 gen_rtx_LABEL_REF (VOIDmode, label),
18449 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18456 /* Expand DImode branch into multiple compare+branch. */
18458 rtx lo[2], hi[2], label2;
18459 enum rtx_code code1, code2, code3;
18460 enum machine_mode submode;
18462 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18464 tmp = op0, op0 = op1, op1 = tmp;
18465 code = swap_condition (code);
18468 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18469 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18471 submode = mode == DImode ? SImode : DImode;
18473 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18474 avoid two branches. This costs one extra insn, so disable when
18475 optimizing for size. */
18477 if ((code == EQ || code == NE)
18478 && (!optimize_insn_for_size_p ()
18479 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18484 if (hi[1] != const0_rtx)
18485 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18486 NULL_RTX, 0, OPTAB_WIDEN);
18489 if (lo[1] != const0_rtx)
18490 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18491 NULL_RTX, 0, OPTAB_WIDEN);
18493 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18494 NULL_RTX, 0, OPTAB_WIDEN);
18496 ix86_expand_branch (code, tmp, const0_rtx, label);
18500 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18501 op1 is a constant and the low word is zero, then we can just
18502 examine the high word. Similarly for low word -1 and
18503 less-or-equal-than or greater-than. */
18505 if (CONST_INT_P (hi[1]))
18508 case LT: case LTU: case GE: case GEU:
18509 if (lo[1] == const0_rtx)
18511 ix86_expand_branch (code, hi[0], hi[1], label);
18515 case LE: case LEU: case GT: case GTU:
18516 if (lo[1] == constm1_rtx)
18518 ix86_expand_branch (code, hi[0], hi[1], label);
18526 /* Otherwise, we need two or three jumps. */
18528 label2 = gen_label_rtx ();
18531 code2 = swap_condition (code);
18532 code3 = unsigned_condition (code);
18536 case LT: case GT: case LTU: case GTU:
18539 case LE: code1 = LT; code2 = GT; break;
18540 case GE: code1 = GT; code2 = LT; break;
18541 case LEU: code1 = LTU; code2 = GTU; break;
18542 case GEU: code1 = GTU; code2 = LTU; break;
18544 case EQ: code1 = UNKNOWN; code2 = NE; break;
18545 case NE: code2 = UNKNOWN; break;
18548 gcc_unreachable ();
18553 * if (hi(a) < hi(b)) goto true;
18554 * if (hi(a) > hi(b)) goto false;
18555 * if (lo(a) < lo(b)) goto true;
18559 if (code1 != UNKNOWN)
18560 ix86_expand_branch (code1, hi[0], hi[1], label);
18561 if (code2 != UNKNOWN)
18562 ix86_expand_branch (code2, hi[0], hi[1], label2);
18564 ix86_expand_branch (code3, lo[0], lo[1], label);
18566 if (code2 != UNKNOWN)
18567 emit_label (label2);
18572 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18577 /* Split branch based on floating point condition. */
18579 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18580 rtx target1, rtx target2, rtx tmp, rtx pushed)
18585 if (target2 != pc_rtx)
18588 code = reverse_condition_maybe_unordered (code);
18593 condition = ix86_expand_fp_compare (code, op1, op2,
18596 /* Remove pushed operand from stack. */
18598 ix86_free_from_memory (GET_MODE (pushed));
18600 i = emit_jump_insn (gen_rtx_SET
18602 gen_rtx_IF_THEN_ELSE (VOIDmode,
18603 condition, target1, target2)));
18604 if (split_branch_probability >= 0)
18605 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18609 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18613 gcc_assert (GET_MODE (dest) == QImode);
18615 ret = ix86_expand_compare (code, op0, op1);
18616 PUT_MODE (ret, QImode);
18617 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18620 /* Expand comparison setting or clearing carry flag. Return true when
18621 successful and set pop for the operation. */
18623 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18625 enum machine_mode mode =
18626 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18628 /* Do not handle double-mode compares that go through special path. */
18629 if (mode == (TARGET_64BIT ? TImode : DImode))
18632 if (SCALAR_FLOAT_MODE_P (mode))
18634 rtx compare_op, compare_seq;
18636 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18638 /* Shortcut: following common codes never translate
18639 into carry flag compares. */
18640 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18641 || code == ORDERED || code == UNORDERED)
18644 /* These comparisons require zero flag; swap operands so they won't. */
18645 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18646 && !TARGET_IEEE_FP)
18651 code = swap_condition (code);
18654 /* Try to expand the comparison and verify that we end up with
18655 carry flag based comparison. This fails to be true only when
18656 we decide to expand comparison using arithmetic that is not
18657 too common scenario. */
18659 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18660 compare_seq = get_insns ();
18663 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18664 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18665 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18667 code = GET_CODE (compare_op);
18669 if (code != LTU && code != GEU)
18672 emit_insn (compare_seq);
18677 if (!INTEGRAL_MODE_P (mode))
18686 /* Convert a==0 into (unsigned)a<1. */
18689 if (op1 != const0_rtx)
18692 code = (code == EQ ? LTU : GEU);
18695 /* Convert a>b into b<a or a>=b-1. */
18698 if (CONST_INT_P (op1))
18700 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18701 /* Bail out on overflow. We still can swap operands but that
18702 would force loading of the constant into register. */
18703 if (op1 == const0_rtx
18704 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18706 code = (code == GTU ? GEU : LTU);
18713 code = (code == GTU ? LTU : GEU);
18717 /* Convert a>=0 into (unsigned)a<0x80000000. */
18720 if (mode == DImode || op1 != const0_rtx)
18722 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18723 code = (code == LT ? GEU : LTU);
18727 if (mode == DImode || op1 != constm1_rtx)
18729 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18730 code = (code == LE ? GEU : LTU);
18736 /* Swapping operands may cause constant to appear as first operand. */
18737 if (!nonimmediate_operand (op0, VOIDmode))
18739 if (!can_create_pseudo_p ())
18741 op0 = force_reg (mode, op0);
18743 *pop = ix86_expand_compare (code, op0, op1);
18744 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18749 ix86_expand_int_movcc (rtx operands[])
18751 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18752 rtx compare_seq, compare_op;
18753 enum machine_mode mode = GET_MODE (operands[0]);
18754 bool sign_bit_compare_p = false;
18755 rtx op0 = XEXP (operands[1], 0);
18756 rtx op1 = XEXP (operands[1], 1);
18759 compare_op = ix86_expand_compare (code, op0, op1);
18760 compare_seq = get_insns ();
18763 compare_code = GET_CODE (compare_op);
18765 if ((op1 == const0_rtx && (code == GE || code == LT))
18766 || (op1 == constm1_rtx && (code == GT || code == LE)))
18767 sign_bit_compare_p = true;
18769 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18770 HImode insns, we'd be swallowed in word prefix ops. */
18772 if ((mode != HImode || TARGET_FAST_PREFIX)
18773 && (mode != (TARGET_64BIT ? TImode : DImode))
18774 && CONST_INT_P (operands[2])
18775 && CONST_INT_P (operands[3]))
18777 rtx out = operands[0];
18778 HOST_WIDE_INT ct = INTVAL (operands[2]);
18779 HOST_WIDE_INT cf = INTVAL (operands[3]);
18780 HOST_WIDE_INT diff;
18783 /* Sign bit compares are better done using shifts than we do by using
18785 if (sign_bit_compare_p
18786 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18788 /* Detect overlap between destination and compare sources. */
18791 if (!sign_bit_compare_p)
18794 bool fpcmp = false;
18796 compare_code = GET_CODE (compare_op);
18798 flags = XEXP (compare_op, 0);
18800 if (GET_MODE (flags) == CCFPmode
18801 || GET_MODE (flags) == CCFPUmode)
18805 = ix86_fp_compare_code_to_integer (compare_code);
18808 /* To simplify rest of code, restrict to the GEU case. */
18809 if (compare_code == LTU)
18811 HOST_WIDE_INT tmp = ct;
18814 compare_code = reverse_condition (compare_code);
18815 code = reverse_condition (code);
18820 PUT_CODE (compare_op,
18821 reverse_condition_maybe_unordered
18822 (GET_CODE (compare_op)));
18824 PUT_CODE (compare_op,
18825 reverse_condition (GET_CODE (compare_op)));
18829 if (reg_overlap_mentioned_p (out, op0)
18830 || reg_overlap_mentioned_p (out, op1))
18831 tmp = gen_reg_rtx (mode);
18833 if (mode == DImode)
18834 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18836 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18837 flags, compare_op));
18841 if (code == GT || code == GE)
18842 code = reverse_condition (code);
18845 HOST_WIDE_INT tmp = ct;
18850 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18863 tmp = expand_simple_binop (mode, PLUS,
18865 copy_rtx (tmp), 1, OPTAB_DIRECT);
18876 tmp = expand_simple_binop (mode, IOR,
18878 copy_rtx (tmp), 1, OPTAB_DIRECT);
18880 else if (diff == -1 && ct)
18890 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18892 tmp = expand_simple_binop (mode, PLUS,
18893 copy_rtx (tmp), GEN_INT (cf),
18894 copy_rtx (tmp), 1, OPTAB_DIRECT);
18902 * andl cf - ct, dest
18912 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18915 tmp = expand_simple_binop (mode, AND,
18917 gen_int_mode (cf - ct, mode),
18918 copy_rtx (tmp), 1, OPTAB_DIRECT);
18920 tmp = expand_simple_binop (mode, PLUS,
18921 copy_rtx (tmp), GEN_INT (ct),
18922 copy_rtx (tmp), 1, OPTAB_DIRECT);
18925 if (!rtx_equal_p (tmp, out))
18926 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18933 enum machine_mode cmp_mode = GET_MODE (op0);
18936 tmp = ct, ct = cf, cf = tmp;
18939 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18941 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18943 /* We may be reversing unordered compare to normal compare, that
18944 is not valid in general (we may convert non-trapping condition
18945 to trapping one), however on i386 we currently emit all
18946 comparisons unordered. */
18947 compare_code = reverse_condition_maybe_unordered (compare_code);
18948 code = reverse_condition_maybe_unordered (code);
18952 compare_code = reverse_condition (compare_code);
18953 code = reverse_condition (code);
18957 compare_code = UNKNOWN;
18958 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18959 && CONST_INT_P (op1))
18961 if (op1 == const0_rtx
18962 && (code == LT || code == GE))
18963 compare_code = code;
18964 else if (op1 == constm1_rtx)
18968 else if (code == GT)
18973 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18974 if (compare_code != UNKNOWN
18975 && GET_MODE (op0) == GET_MODE (out)
18976 && (cf == -1 || ct == -1))
18978 /* If lea code below could be used, only optimize
18979 if it results in a 2 insn sequence. */
18981 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18982 || diff == 3 || diff == 5 || diff == 9)
18983 || (compare_code == LT && ct == -1)
18984 || (compare_code == GE && cf == -1))
18987 * notl op1 (if necessary)
18995 code = reverse_condition (code);
18998 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19000 out = expand_simple_binop (mode, IOR,
19002 out, 1, OPTAB_DIRECT);
19003 if (out != operands[0])
19004 emit_move_insn (operands[0], out);
19011 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19012 || diff == 3 || diff == 5 || diff == 9)
19013 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19015 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19021 * lea cf(dest*(ct-cf)),dest
19025 * This also catches the degenerate setcc-only case.
19031 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19034 /* On x86_64 the lea instruction operates on Pmode, so we need
19035 to get arithmetics done in proper mode to match. */
19037 tmp = copy_rtx (out);
19041 out1 = copy_rtx (out);
19042 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19046 tmp = gen_rtx_PLUS (mode, tmp, out1);
19052 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19055 if (!rtx_equal_p (tmp, out))
19058 out = force_operand (tmp, copy_rtx (out));
19060 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19062 if (!rtx_equal_p (out, operands[0]))
19063 emit_move_insn (operands[0], copy_rtx (out));
19069 * General case: Jumpful:
19070 * xorl dest,dest cmpl op1, op2
19071 * cmpl op1, op2 movl ct, dest
19072 * setcc dest jcc 1f
19073 * decl dest movl cf, dest
19074 * andl (cf-ct),dest 1:
19077 * Size 20. Size 14.
19079 * This is reasonably steep, but branch mispredict costs are
19080 * high on modern cpus, so consider failing only if optimizing
19084 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19085 && BRANCH_COST (optimize_insn_for_speed_p (),
19090 enum machine_mode cmp_mode = GET_MODE (op0);
19095 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19097 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19099 /* We may be reversing unordered compare to normal compare,
19100 that is not valid in general (we may convert non-trapping
19101 condition to trapping one), however on i386 we currently
19102 emit all comparisons unordered. */
19103 code = reverse_condition_maybe_unordered (code);
19107 code = reverse_condition (code);
19108 if (compare_code != UNKNOWN)
19109 compare_code = reverse_condition (compare_code);
19113 if (compare_code != UNKNOWN)
19115 /* notl op1 (if needed)
19120 For x < 0 (resp. x <= -1) there will be no notl,
19121 so if possible swap the constants to get rid of the
19123 True/false will be -1/0 while code below (store flag
19124 followed by decrement) is 0/-1, so the constants need
19125 to be exchanged once more. */
19127 if (compare_code == GE || !cf)
19129 code = reverse_condition (code);
19134 HOST_WIDE_INT tmp = cf;
19139 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19143 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19145 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19147 copy_rtx (out), 1, OPTAB_DIRECT);
19150 out = expand_simple_binop (mode, AND, copy_rtx (out),
19151 gen_int_mode (cf - ct, mode),
19152 copy_rtx (out), 1, OPTAB_DIRECT);
19154 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19155 copy_rtx (out), 1, OPTAB_DIRECT);
19156 if (!rtx_equal_p (out, operands[0]))
19157 emit_move_insn (operands[0], copy_rtx (out));
19163 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19165 /* Try a few things more with specific constants and a variable. */
19168 rtx var, orig_out, out, tmp;
19170 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19173 /* If one of the two operands is an interesting constant, load a
19174 constant with the above and mask it in with a logical operation. */
19176 if (CONST_INT_P (operands[2]))
19179 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19180 operands[3] = constm1_rtx, op = and_optab;
19181 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19182 operands[3] = const0_rtx, op = ior_optab;
19186 else if (CONST_INT_P (operands[3]))
19189 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19190 operands[2] = constm1_rtx, op = and_optab;
19191 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19192 operands[2] = const0_rtx, op = ior_optab;
19199 orig_out = operands[0];
19200 tmp = gen_reg_rtx (mode);
19203 /* Recurse to get the constant loaded. */
19204 if (ix86_expand_int_movcc (operands) == 0)
19207 /* Mask in the interesting variable. */
19208 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19210 if (!rtx_equal_p (out, orig_out))
19211 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19217 * For comparison with above,
19227 if (! nonimmediate_operand (operands[2], mode))
19228 operands[2] = force_reg (mode, operands[2]);
19229 if (! nonimmediate_operand (operands[3], mode))
19230 operands[3] = force_reg (mode, operands[3]);
19232 if (! register_operand (operands[2], VOIDmode)
19234 || ! register_operand (operands[3], VOIDmode)))
19235 operands[2] = force_reg (mode, operands[2]);
19238 && ! register_operand (operands[3], VOIDmode))
19239 operands[3] = force_reg (mode, operands[3]);
19241 emit_insn (compare_seq);
19242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19243 gen_rtx_IF_THEN_ELSE (mode,
19244 compare_op, operands[2],
19249 /* Swap, force into registers, or otherwise massage the two operands
19250 to an sse comparison with a mask result. Thus we differ a bit from
19251 ix86_prepare_fp_compare_args which expects to produce a flags result.
19253 The DEST operand exists to help determine whether to commute commutative
19254 operators. The POP0/POP1 operands are updated in place. The new
19255 comparison code is returned, or UNKNOWN if not implementable. */
19257 static enum rtx_code
19258 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19259 rtx *pop0, rtx *pop1)
19267 /* AVX supports all the needed comparisons. */
19270 /* We have no LTGT as an operator. We could implement it with
19271 NE & ORDERED, but this requires an extra temporary. It's
19272 not clear that it's worth it. */
19279 /* These are supported directly. */
19286 /* AVX has 3 operand comparisons, no need to swap anything. */
19289 /* For commutative operators, try to canonicalize the destination
19290 operand to be first in the comparison - this helps reload to
19291 avoid extra moves. */
19292 if (!dest || !rtx_equal_p (dest, *pop1))
19300 /* These are not supported directly before AVX, and furthermore
19301 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19302 comparison operands to transform into something that is
19307 code = swap_condition (code);
19311 gcc_unreachable ();
19317 /* Detect conditional moves that exactly match min/max operational
19318 semantics. Note that this is IEEE safe, as long as we don't
19319 interchange the operands.
19321 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19322 and TRUE if the operation is successful and instructions are emitted. */
19325 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19326 rtx cmp_op1, rtx if_true, rtx if_false)
19328 enum machine_mode mode;
19334 else if (code == UNGE)
19337 if_true = if_false;
19343 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19345 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19350 mode = GET_MODE (dest);
19352 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19353 but MODE may be a vector mode and thus not appropriate. */
19354 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19356 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19359 if_true = force_reg (mode, if_true);
19360 v = gen_rtvec (2, if_true, if_false);
19361 tmp = gen_rtx_UNSPEC (mode, v, u);
19365 code = is_min ? SMIN : SMAX;
19366 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19369 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19373 /* Expand an sse vector comparison. Return the register with the result. */
19376 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19377 rtx op_true, rtx op_false)
19379 enum machine_mode mode = GET_MODE (dest);
19380 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19383 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19384 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19385 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19388 || reg_overlap_mentioned_p (dest, op_true)
19389 || reg_overlap_mentioned_p (dest, op_false))
19390 dest = gen_reg_rtx (mode);
19392 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19393 if (cmp_mode != mode)
19395 x = force_reg (cmp_mode, x);
19396 convert_move (dest, x, false);
19399 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19404 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19405 operations. This is used for both scalar and vector conditional moves. */
19408 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19410 enum machine_mode mode = GET_MODE (dest);
19413 if (vector_all_ones_operand (op_true, mode)
19414 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19416 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19418 else if (op_false == CONST0_RTX (mode))
19420 op_true = force_reg (mode, op_true);
19421 x = gen_rtx_AND (mode, cmp, op_true);
19422 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19424 else if (op_true == CONST0_RTX (mode))
19426 op_false = force_reg (mode, op_false);
19427 x = gen_rtx_NOT (mode, cmp);
19428 x = gen_rtx_AND (mode, x, op_false);
19429 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19431 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19433 op_false = force_reg (mode, op_false);
19434 x = gen_rtx_IOR (mode, cmp, op_false);
19435 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19437 else if (TARGET_XOP)
19439 op_true = force_reg (mode, op_true);
19441 if (!nonimmediate_operand (op_false, mode))
19442 op_false = force_reg (mode, op_false);
19444 emit_insn (gen_rtx_SET (mode, dest,
19445 gen_rtx_IF_THEN_ELSE (mode, cmp,
19451 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19453 if (!nonimmediate_operand (op_true, mode))
19454 op_true = force_reg (mode, op_true);
19456 op_false = force_reg (mode, op_false);
19462 gen = gen_sse4_1_blendvps;
19466 gen = gen_sse4_1_blendvpd;
19474 gen = gen_sse4_1_pblendvb;
19475 dest = gen_lowpart (V16QImode, dest);
19476 op_false = gen_lowpart (V16QImode, op_false);
19477 op_true = gen_lowpart (V16QImode, op_true);
19478 cmp = gen_lowpart (V16QImode, cmp);
19483 gen = gen_avx_blendvps256;
19487 gen = gen_avx_blendvpd256;
19495 gen = gen_avx2_pblendvb;
19496 dest = gen_lowpart (V32QImode, dest);
19497 op_false = gen_lowpart (V32QImode, op_false);
19498 op_true = gen_lowpart (V32QImode, op_true);
19499 cmp = gen_lowpart (V32QImode, cmp);
19507 emit_insn (gen (dest, op_false, op_true, cmp));
19510 op_true = force_reg (mode, op_true);
19512 t2 = gen_reg_rtx (mode);
19514 t3 = gen_reg_rtx (mode);
19518 x = gen_rtx_AND (mode, op_true, cmp);
19519 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19521 x = gen_rtx_NOT (mode, cmp);
19522 x = gen_rtx_AND (mode, x, op_false);
19523 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19525 x = gen_rtx_IOR (mode, t3, t2);
19526 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19531 /* Expand a floating-point conditional move. Return true if successful. */
19534 ix86_expand_fp_movcc (rtx operands[])
19536 enum machine_mode mode = GET_MODE (operands[0]);
19537 enum rtx_code code = GET_CODE (operands[1]);
19538 rtx tmp, compare_op;
19539 rtx op0 = XEXP (operands[1], 0);
19540 rtx op1 = XEXP (operands[1], 1);
19542 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19544 enum machine_mode cmode;
19546 /* Since we've no cmove for sse registers, don't force bad register
19547 allocation just to gain access to it. Deny movcc when the
19548 comparison mode doesn't match the move mode. */
19549 cmode = GET_MODE (op0);
19550 if (cmode == VOIDmode)
19551 cmode = GET_MODE (op1);
19555 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19556 if (code == UNKNOWN)
19559 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19560 operands[2], operands[3]))
19563 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19564 operands[2], operands[3]);
19565 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19569 /* The floating point conditional move instructions don't directly
19570 support conditions resulting from a signed integer comparison. */
19572 compare_op = ix86_expand_compare (code, op0, op1);
19573 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19575 tmp = gen_reg_rtx (QImode);
19576 ix86_expand_setcc (tmp, code, op0, op1);
19578 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19581 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19582 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19583 operands[2], operands[3])));
19588 /* Expand a floating-point vector conditional move; a vcond operation
19589 rather than a movcc operation. */
19592 ix86_expand_fp_vcond (rtx operands[])
19594 enum rtx_code code = GET_CODE (operands[3]);
19597 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19598 &operands[4], &operands[5]);
19599 if (code == UNKNOWN)
19602 switch (GET_CODE (operands[3]))
19605 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19606 operands[5], operands[0], operands[0]);
19607 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19608 operands[5], operands[1], operands[2]);
19612 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19613 operands[5], operands[0], operands[0]);
19614 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19615 operands[5], operands[1], operands[2]);
19619 gcc_unreachable ();
19621 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19623 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19627 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19628 operands[5], operands[1], operands[2]))
19631 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19632 operands[1], operands[2]);
19633 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19637 /* Expand a signed/unsigned integral vector conditional move. */
19640 ix86_expand_int_vcond (rtx operands[])
19642 enum machine_mode data_mode = GET_MODE (operands[0]);
19643 enum machine_mode mode = GET_MODE (operands[4]);
19644 enum rtx_code code = GET_CODE (operands[3]);
19645 bool negate = false;
19648 cop0 = operands[4];
19649 cop1 = operands[5];
19651 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19652 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19653 if ((code == LT || code == GE)
19654 && data_mode == mode
19655 && cop1 == CONST0_RTX (mode)
19656 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
19657 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
19658 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
19659 && (GET_MODE_SIZE (data_mode) == 16
19660 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
19662 rtx negop = operands[2 - (code == LT)];
19663 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
19664 if (negop == CONST1_RTX (data_mode))
19666 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
19667 operands[0], 1, OPTAB_DIRECT);
19668 if (res != operands[0])
19669 emit_move_insn (operands[0], res);
19672 else if (GET_MODE_INNER (data_mode) != DImode
19673 && vector_all_ones_operand (negop, data_mode))
19675 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
19676 operands[0], 0, OPTAB_DIRECT);
19677 if (res != operands[0])
19678 emit_move_insn (operands[0], res);
19683 if (!nonimmediate_operand (cop1, mode))
19684 cop1 = force_reg (mode, cop1);
19685 if (!general_operand (operands[1], data_mode))
19686 operands[1] = force_reg (data_mode, operands[1]);
19687 if (!general_operand (operands[2], data_mode))
19688 operands[2] = force_reg (data_mode, operands[2]);
19690 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19692 && (mode == V16QImode || mode == V8HImode
19693 || mode == V4SImode || mode == V2DImode))
19697 /* Canonicalize the comparison to EQ, GT, GTU. */
19708 code = reverse_condition (code);
19714 code = reverse_condition (code);
19720 code = swap_condition (code);
19721 x = cop0, cop0 = cop1, cop1 = x;
19725 gcc_unreachable ();
19728 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19729 if (mode == V2DImode)
19734 /* SSE4.1 supports EQ. */
19735 if (!TARGET_SSE4_1)
19741 /* SSE4.2 supports GT/GTU. */
19742 if (!TARGET_SSE4_2)
19747 gcc_unreachable ();
19751 /* Unsigned parallel compare is not supported by the hardware.
19752 Play some tricks to turn this into a signed comparison
19756 cop0 = force_reg (mode, cop0);
19766 rtx (*gen_sub3) (rtx, rtx, rtx);
19770 case V8SImode: gen_sub3 = gen_subv8si3; break;
19771 case V4DImode: gen_sub3 = gen_subv4di3; break;
19772 case V4SImode: gen_sub3 = gen_subv4si3; break;
19773 case V2DImode: gen_sub3 = gen_subv2di3; break;
19775 gcc_unreachable ();
19777 /* Subtract (-(INT MAX) - 1) from both operands to make
19779 mask = ix86_build_signbit_mask (mode, true, false);
19780 t1 = gen_reg_rtx (mode);
19781 emit_insn (gen_sub3 (t1, cop0, mask));
19783 t2 = gen_reg_rtx (mode);
19784 emit_insn (gen_sub3 (t2, cop1, mask));
19796 /* Perform a parallel unsigned saturating subtraction. */
19797 x = gen_reg_rtx (mode);
19798 emit_insn (gen_rtx_SET (VOIDmode, x,
19799 gen_rtx_US_MINUS (mode, cop0, cop1)));
19802 cop1 = CONST0_RTX (mode);
19808 gcc_unreachable ();
19813 /* Allow the comparison to be done in one mode, but the movcc to
19814 happen in another mode. */
19815 if (data_mode == mode)
19817 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19818 operands[1+negate], operands[2-negate]);
19822 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19823 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19825 operands[1+negate], operands[2-negate]);
19826 x = gen_lowpart (data_mode, x);
19829 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19830 operands[2-negate]);
19834 /* Expand a variable vector permutation. */
19837 ix86_expand_vec_perm (rtx operands[])
19839 rtx target = operands[0];
19840 rtx op0 = operands[1];
19841 rtx op1 = operands[2];
19842 rtx mask = operands[3];
19843 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19844 enum machine_mode mode = GET_MODE (op0);
19845 enum machine_mode maskmode = GET_MODE (mask);
19847 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19849 /* Number of elements in the vector. */
19850 w = GET_MODE_NUNITS (mode);
19851 e = GET_MODE_UNIT_SIZE (mode);
19852 gcc_assert (w <= 32);
19856 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19858 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19859 an constant shuffle operand. With a tiny bit of effort we can
19860 use VPERMD instead. A re-interpretation stall for V4DFmode is
19861 unfortunate but there's no avoiding it.
19862 Similarly for V16HImode we don't have instructions for variable
19863 shuffling, while for V32QImode we can use after preparing suitable
19864 masks vpshufb; vpshufb; vpermq; vpor. */
19866 if (mode == V16HImode)
19868 maskmode = mode = V32QImode;
19874 maskmode = mode = V8SImode;
19878 t1 = gen_reg_rtx (maskmode);
19880 /* Replicate the low bits of the V4DImode mask into V8SImode:
19882 t1 = { A A B B C C D D }. */
19883 for (i = 0; i < w / 2; ++i)
19884 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19885 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19886 vt = force_reg (maskmode, vt);
19887 mask = gen_lowpart (maskmode, mask);
19888 if (maskmode == V8SImode)
19889 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
19891 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19893 /* Multiply the shuffle indicies by two. */
19894 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19897 /* Add one to the odd shuffle indicies:
19898 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19899 for (i = 0; i < w / 2; ++i)
19901 vec[i * 2] = const0_rtx;
19902 vec[i * 2 + 1] = const1_rtx;
19904 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19905 vt = force_const_mem (maskmode, vt);
19906 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19909 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19910 operands[3] = mask = t1;
19911 target = gen_lowpart (mode, target);
19912 op0 = gen_lowpart (mode, op0);
19913 op1 = gen_lowpart (mode, op1);
19919 /* The VPERMD and VPERMPS instructions already properly ignore
19920 the high bits of the shuffle elements. No need for us to
19921 perform an AND ourselves. */
19922 if (one_operand_shuffle)
19923 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
19926 t1 = gen_reg_rtx (V8SImode);
19927 t2 = gen_reg_rtx (V8SImode);
19928 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
19929 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
19935 mask = gen_lowpart (V8SFmode, mask);
19936 if (one_operand_shuffle)
19937 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
19940 t1 = gen_reg_rtx (V8SFmode);
19941 t2 = gen_reg_rtx (V8SFmode);
19942 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
19943 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
19949 /* By combining the two 128-bit input vectors into one 256-bit
19950 input vector, we can use VPERMD and VPERMPS for the full
19951 two-operand shuffle. */
19952 t1 = gen_reg_rtx (V8SImode);
19953 t2 = gen_reg_rtx (V8SImode);
19954 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
19955 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19956 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
19957 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
19961 t1 = gen_reg_rtx (V8SFmode);
19962 t2 = gen_reg_rtx (V8SImode);
19963 mask = gen_lowpart (V4SImode, mask);
19964 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
19965 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19966 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
19967 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
19971 t1 = gen_reg_rtx (V32QImode);
19972 t2 = gen_reg_rtx (V32QImode);
19973 t3 = gen_reg_rtx (V32QImode);
19974 vt2 = GEN_INT (128);
19975 for (i = 0; i < 32; i++)
19977 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19978 vt = force_reg (V32QImode, vt);
19979 for (i = 0; i < 32; i++)
19980 vec[i] = i < 16 ? vt2 : const0_rtx;
19981 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19982 vt2 = force_reg (V32QImode, vt2);
19983 /* From mask create two adjusted masks, which contain the same
19984 bits as mask in the low 7 bits of each vector element.
19985 The first mask will have the most significant bit clear
19986 if it requests element from the same 128-bit lane
19987 and MSB set if it requests element from the other 128-bit lane.
19988 The second mask will have the opposite values of the MSB,
19989 and additionally will have its 128-bit lanes swapped.
19990 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19991 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19992 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19993 stands for other 12 bytes. */
19994 /* The bit whether element is from the same lane or the other
19995 lane is bit 4, so shift it up by 3 to the MSB position. */
19996 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
19997 gen_lowpart (V4DImode, mask),
19999 /* Clear MSB bits from the mask just in case it had them set. */
20000 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20001 /* After this t1 will have MSB set for elements from other lane. */
20002 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20003 /* Clear bits other than MSB. */
20004 emit_insn (gen_andv32qi3 (t1, t1, vt));
20005 /* Or in the lower bits from mask into t3. */
20006 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20007 /* And invert MSB bits in t1, so MSB is set for elements from the same
20009 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20010 /* Swap 128-bit lanes in t3. */
20011 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20012 gen_lowpart (V4DImode, t3),
20013 const2_rtx, GEN_INT (3),
20014 const0_rtx, const1_rtx));
20015 /* And or in the lower bits from mask into t1. */
20016 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20017 if (one_operand_shuffle)
20019 /* Each of these shuffles will put 0s in places where
20020 element from the other 128-bit lane is needed, otherwise
20021 will shuffle in the requested value. */
20022 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20023 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20024 /* For t3 the 128-bit lanes are swapped again. */
20025 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20026 gen_lowpart (V4DImode, t3),
20027 const2_rtx, GEN_INT (3),
20028 const0_rtx, const1_rtx));
20029 /* And oring both together leads to the result. */
20030 emit_insn (gen_iorv32qi3 (target, t1, t3));
20034 t4 = gen_reg_rtx (V32QImode);
20035 /* Similarly to the above one_operand_shuffle code,
20036 just for repeated twice for each operand. merge_two:
20037 code will merge the two results together. */
20038 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20039 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20040 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20041 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20042 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20043 gen_lowpart (V4DImode, t4),
20044 const2_rtx, GEN_INT (3),
20045 const0_rtx, const1_rtx));
20046 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20047 gen_lowpart (V4DImode, t3),
20048 const2_rtx, GEN_INT (3),
20049 const0_rtx, const1_rtx));
20050 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20051 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20057 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20064 /* The XOP VPPERM insn supports three inputs. By ignoring the
20065 one_operand_shuffle special case, we avoid creating another
20066 set of constant vectors in memory. */
20067 one_operand_shuffle = false;
20069 /* mask = mask & {2*w-1, ...} */
20070 vt = GEN_INT (2*w - 1);
20074 /* mask = mask & {w-1, ...} */
20075 vt = GEN_INT (w - 1);
20078 for (i = 0; i < w; i++)
20080 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20081 mask = expand_simple_binop (maskmode, AND, mask, vt,
20082 NULL_RTX, 0, OPTAB_DIRECT);
20084 /* For non-QImode operations, convert the word permutation control
20085 into a byte permutation control. */
20086 if (mode != V16QImode)
20088 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20089 GEN_INT (exact_log2 (e)),
20090 NULL_RTX, 0, OPTAB_DIRECT);
20092 /* Convert mask to vector of chars. */
20093 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20095 /* Replicate each of the input bytes into byte positions:
20096 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20097 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20098 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20099 for (i = 0; i < 16; ++i)
20100 vec[i] = GEN_INT (i/e * e);
20101 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20102 vt = force_const_mem (V16QImode, vt);
20104 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20106 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20108 /* Convert it into the byte positions by doing
20109 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20110 for (i = 0; i < 16; ++i)
20111 vec[i] = GEN_INT (i % e);
20112 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20113 vt = force_const_mem (V16QImode, vt);
20114 emit_insn (gen_addv16qi3 (mask, mask, vt));
20117 /* The actual shuffle operations all operate on V16QImode. */
20118 op0 = gen_lowpart (V16QImode, op0);
20119 op1 = gen_lowpart (V16QImode, op1);
20120 target = gen_lowpart (V16QImode, target);
20124 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20126 else if (one_operand_shuffle)
20128 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20135 /* Shuffle the two input vectors independently. */
20136 t1 = gen_reg_rtx (V16QImode);
20137 t2 = gen_reg_rtx (V16QImode);
20138 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20139 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20142 /* Then merge them together. The key is whether any given control
20143 element contained a bit set that indicates the second word. */
20144 mask = operands[3];
20146 if (maskmode == V2DImode && !TARGET_SSE4_1)
20148 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20149 more shuffle to convert the V2DI input mask into a V4SI
20150 input mask. At which point the masking that expand_int_vcond
20151 will work as desired. */
20152 rtx t3 = gen_reg_rtx (V4SImode);
20153 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20154 const0_rtx, const0_rtx,
20155 const2_rtx, const2_rtx));
20157 maskmode = V4SImode;
20161 for (i = 0; i < w; i++)
20163 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20164 vt = force_reg (maskmode, vt);
20165 mask = expand_simple_binop (maskmode, AND, mask, vt,
20166 NULL_RTX, 0, OPTAB_DIRECT);
20168 xops[0] = gen_lowpart (mode, operands[0]);
20169 xops[1] = gen_lowpart (mode, t2);
20170 xops[2] = gen_lowpart (mode, t1);
20171 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20174 ok = ix86_expand_int_vcond (xops);
20179 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20180 true if we should do zero extension, else sign extension. HIGH_P is
20181 true if we want the N/2 high elements, else the low elements. */
20184 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
20186 enum machine_mode imode = GET_MODE (operands[1]);
20191 rtx (*unpack)(rtx, rtx);
20192 rtx (*extract)(rtx, rtx) = NULL;
20193 enum machine_mode halfmode = BLKmode;
20199 unpack = gen_avx2_zero_extendv16qiv16hi2;
20201 unpack = gen_avx2_sign_extendv16qiv16hi2;
20202 halfmode = V16QImode;
20204 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20208 unpack = gen_avx2_zero_extendv8hiv8si2;
20210 unpack = gen_avx2_sign_extendv8hiv8si2;
20211 halfmode = V8HImode;
20213 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20217 unpack = gen_avx2_zero_extendv4siv4di2;
20219 unpack = gen_avx2_sign_extendv4siv4di2;
20220 halfmode = V4SImode;
20222 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20226 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20228 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20232 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20234 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20238 unpack = gen_sse4_1_zero_extendv2siv2di2;
20240 unpack = gen_sse4_1_sign_extendv2siv2di2;
20243 gcc_unreachable ();
20246 if (GET_MODE_SIZE (imode) == 32)
20248 tmp = gen_reg_rtx (halfmode);
20249 emit_insn (extract (tmp, operands[1]));
20253 /* Shift higher 8 bytes to lower 8 bytes. */
20254 tmp = gen_reg_rtx (imode);
20255 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20256 gen_lowpart (V1TImode, operands[1]),
20262 emit_insn (unpack (operands[0], tmp));
20266 rtx (*unpack)(rtx, rtx, rtx);
20272 unpack = gen_vec_interleave_highv16qi;
20274 unpack = gen_vec_interleave_lowv16qi;
20278 unpack = gen_vec_interleave_highv8hi;
20280 unpack = gen_vec_interleave_lowv8hi;
20284 unpack = gen_vec_interleave_highv4si;
20286 unpack = gen_vec_interleave_lowv4si;
20289 gcc_unreachable ();
20292 dest = gen_lowpart (imode, operands[0]);
20295 tmp = force_reg (imode, CONST0_RTX (imode));
20297 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20298 operands[1], pc_rtx, pc_rtx);
20300 emit_insn (unpack (dest, operands[1], tmp));
20304 /* Expand conditional increment or decrement using adb/sbb instructions.
20305 The default case using setcc followed by the conditional move can be
20306 done by generic code. */
20308 ix86_expand_int_addcc (rtx operands[])
20310 enum rtx_code code = GET_CODE (operands[1]);
20312 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20314 rtx val = const0_rtx;
20315 bool fpcmp = false;
20316 enum machine_mode mode;
20317 rtx op0 = XEXP (operands[1], 0);
20318 rtx op1 = XEXP (operands[1], 1);
20320 if (operands[3] != const1_rtx
20321 && operands[3] != constm1_rtx)
20323 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20325 code = GET_CODE (compare_op);
20327 flags = XEXP (compare_op, 0);
20329 if (GET_MODE (flags) == CCFPmode
20330 || GET_MODE (flags) == CCFPUmode)
20333 code = ix86_fp_compare_code_to_integer (code);
20340 PUT_CODE (compare_op,
20341 reverse_condition_maybe_unordered
20342 (GET_CODE (compare_op)));
20344 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20347 mode = GET_MODE (operands[0]);
20349 /* Construct either adc or sbb insn. */
20350 if ((code == LTU) == (operands[3] == constm1_rtx))
20355 insn = gen_subqi3_carry;
20358 insn = gen_subhi3_carry;
20361 insn = gen_subsi3_carry;
20364 insn = gen_subdi3_carry;
20367 gcc_unreachable ();
20375 insn = gen_addqi3_carry;
20378 insn = gen_addhi3_carry;
20381 insn = gen_addsi3_carry;
20384 insn = gen_adddi3_carry;
20387 gcc_unreachable ();
20390 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20396 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20397 but works for floating pointer parameters and nonoffsetable memories.
20398 For pushes, it returns just stack offsets; the values will be saved
20399 in the right order. Maximally three parts are generated. */
20402 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20407 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20409 size = (GET_MODE_SIZE (mode) + 4) / 8;
20411 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20412 gcc_assert (size >= 2 && size <= 4);
20414 /* Optimize constant pool reference to immediates. This is used by fp
20415 moves, that force all constants to memory to allow combining. */
20416 if (MEM_P (operand) && MEM_READONLY_P (operand))
20418 rtx tmp = maybe_get_pool_constant (operand);
20423 if (MEM_P (operand) && !offsettable_memref_p (operand))
20425 /* The only non-offsetable memories we handle are pushes. */
20426 int ok = push_operand (operand, VOIDmode);
20430 operand = copy_rtx (operand);
20431 PUT_MODE (operand, Pmode);
20432 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20436 if (GET_CODE (operand) == CONST_VECTOR)
20438 enum machine_mode imode = int_mode_for_mode (mode);
20439 /* Caution: if we looked through a constant pool memory above,
20440 the operand may actually have a different mode now. That's
20441 ok, since we want to pun this all the way back to an integer. */
20442 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20443 gcc_assert (operand != NULL);
20449 if (mode == DImode)
20450 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20455 if (REG_P (operand))
20457 gcc_assert (reload_completed);
20458 for (i = 0; i < size; i++)
20459 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20461 else if (offsettable_memref_p (operand))
20463 operand = adjust_address (operand, SImode, 0);
20464 parts[0] = operand;
20465 for (i = 1; i < size; i++)
20466 parts[i] = adjust_address (operand, SImode, 4 * i);
20468 else if (GET_CODE (operand) == CONST_DOUBLE)
20473 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20477 real_to_target (l, &r, mode);
20478 parts[3] = gen_int_mode (l[3], SImode);
20479 parts[2] = gen_int_mode (l[2], SImode);
20482 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20483 parts[2] = gen_int_mode (l[2], SImode);
20486 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20489 gcc_unreachable ();
20491 parts[1] = gen_int_mode (l[1], SImode);
20492 parts[0] = gen_int_mode (l[0], SImode);
20495 gcc_unreachable ();
20500 if (mode == TImode)
20501 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20502 if (mode == XFmode || mode == TFmode)
20504 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20505 if (REG_P (operand))
20507 gcc_assert (reload_completed);
20508 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20509 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20511 else if (offsettable_memref_p (operand))
20513 operand = adjust_address (operand, DImode, 0);
20514 parts[0] = operand;
20515 parts[1] = adjust_address (operand, upper_mode, 8);
20517 else if (GET_CODE (operand) == CONST_DOUBLE)
20522 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20523 real_to_target (l, &r, mode);
20525 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20526 if (HOST_BITS_PER_WIDE_INT >= 64)
20529 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20530 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20533 parts[0] = immed_double_const (l[0], l[1], DImode);
20535 if (upper_mode == SImode)
20536 parts[1] = gen_int_mode (l[2], SImode);
20537 else if (HOST_BITS_PER_WIDE_INT >= 64)
20540 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20541 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20544 parts[1] = immed_double_const (l[2], l[3], DImode);
20547 gcc_unreachable ();
20554 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20555 Return false when normal moves are needed; true when all required
20556 insns have been emitted. Operands 2-4 contain the input values
20557 int the correct order; operands 5-7 contain the output values. */
20560 ix86_split_long_move (rtx operands[])
20565 int collisions = 0;
20566 enum machine_mode mode = GET_MODE (operands[0]);
20567 bool collisionparts[4];
20569 /* The DFmode expanders may ask us to move double.
20570 For 64bit target this is single move. By hiding the fact
20571 here we simplify i386.md splitters. */
20572 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20574 /* Optimize constant pool reference to immediates. This is used by
20575 fp moves, that force all constants to memory to allow combining. */
20577 if (MEM_P (operands[1])
20578 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20579 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20580 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20581 if (push_operand (operands[0], VOIDmode))
20583 operands[0] = copy_rtx (operands[0]);
20584 PUT_MODE (operands[0], Pmode);
20587 operands[0] = gen_lowpart (DImode, operands[0]);
20588 operands[1] = gen_lowpart (DImode, operands[1]);
20589 emit_move_insn (operands[0], operands[1]);
20593 /* The only non-offsettable memory we handle is push. */
20594 if (push_operand (operands[0], VOIDmode))
20597 gcc_assert (!MEM_P (operands[0])
20598 || offsettable_memref_p (operands[0]));
20600 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20601 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20603 /* When emitting push, take care for source operands on the stack. */
20604 if (push && MEM_P (operands[1])
20605 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20607 rtx src_base = XEXP (part[1][nparts - 1], 0);
20609 /* Compensate for the stack decrement by 4. */
20610 if (!TARGET_64BIT && nparts == 3
20611 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20612 src_base = plus_constant (src_base, 4);
20614 /* src_base refers to the stack pointer and is
20615 automatically decreased by emitted push. */
20616 for (i = 0; i < nparts; i++)
20617 part[1][i] = change_address (part[1][i],
20618 GET_MODE (part[1][i]), src_base);
20621 /* We need to do copy in the right order in case an address register
20622 of the source overlaps the destination. */
20623 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20627 for (i = 0; i < nparts; i++)
20630 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20631 if (collisionparts[i])
20635 /* Collision in the middle part can be handled by reordering. */
20636 if (collisions == 1 && nparts == 3 && collisionparts [1])
20638 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20639 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20641 else if (collisions == 1
20643 && (collisionparts [1] || collisionparts [2]))
20645 if (collisionparts [1])
20647 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20648 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20652 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20653 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20657 /* If there are more collisions, we can't handle it by reordering.
20658 Do an lea to the last part and use only one colliding move. */
20659 else if (collisions > 1)
20665 base = part[0][nparts - 1];
20667 /* Handle the case when the last part isn't valid for lea.
20668 Happens in 64-bit mode storing the 12-byte XFmode. */
20669 if (GET_MODE (base) != Pmode)
20670 base = gen_rtx_REG (Pmode, REGNO (base));
20672 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20673 part[1][0] = replace_equiv_address (part[1][0], base);
20674 for (i = 1; i < nparts; i++)
20676 tmp = plus_constant (base, UNITS_PER_WORD * i);
20677 part[1][i] = replace_equiv_address (part[1][i], tmp);
20688 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20689 emit_insn (gen_addsi3 (stack_pointer_rtx,
20690 stack_pointer_rtx, GEN_INT (-4)));
20691 emit_move_insn (part[0][2], part[1][2]);
20693 else if (nparts == 4)
20695 emit_move_insn (part[0][3], part[1][3]);
20696 emit_move_insn (part[0][2], part[1][2]);
20701 /* In 64bit mode we don't have 32bit push available. In case this is
20702 register, it is OK - we will just use larger counterpart. We also
20703 retype memory - these comes from attempt to avoid REX prefix on
20704 moving of second half of TFmode value. */
20705 if (GET_MODE (part[1][1]) == SImode)
20707 switch (GET_CODE (part[1][1]))
20710 part[1][1] = adjust_address (part[1][1], DImode, 0);
20714 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20718 gcc_unreachable ();
20721 if (GET_MODE (part[1][0]) == SImode)
20722 part[1][0] = part[1][1];
20725 emit_move_insn (part[0][1], part[1][1]);
20726 emit_move_insn (part[0][0], part[1][0]);
20730 /* Choose correct order to not overwrite the source before it is copied. */
20731 if ((REG_P (part[0][0])
20732 && REG_P (part[1][1])
20733 && (REGNO (part[0][0]) == REGNO (part[1][1])
20735 && REGNO (part[0][0]) == REGNO (part[1][2]))
20737 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20739 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20741 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20743 operands[2 + i] = part[0][j];
20744 operands[6 + i] = part[1][j];
20749 for (i = 0; i < nparts; i++)
20751 operands[2 + i] = part[0][i];
20752 operands[6 + i] = part[1][i];
20756 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20757 if (optimize_insn_for_size_p ())
20759 for (j = 0; j < nparts - 1; j++)
20760 if (CONST_INT_P (operands[6 + j])
20761 && operands[6 + j] != const0_rtx
20762 && REG_P (operands[2 + j]))
20763 for (i = j; i < nparts - 1; i++)
20764 if (CONST_INT_P (operands[7 + i])
20765 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20766 operands[7 + i] = operands[2 + j];
20769 for (i = 0; i < nparts; i++)
20770 emit_move_insn (operands[2 + i], operands[6 + i]);
20775 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20776 left shift by a constant, either using a single shift or
20777 a sequence of add instructions. */
20780 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20782 rtx (*insn)(rtx, rtx, rtx);
20785 || (count * ix86_cost->add <= ix86_cost->shift_const
20786 && !optimize_insn_for_size_p ()))
20788 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20789 while (count-- > 0)
20790 emit_insn (insn (operand, operand, operand));
20794 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20795 emit_insn (insn (operand, operand, GEN_INT (count)));
20800 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20802 rtx (*gen_ashl3)(rtx, rtx, rtx);
20803 rtx (*gen_shld)(rtx, rtx, rtx);
20804 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20806 rtx low[2], high[2];
20809 if (CONST_INT_P (operands[2]))
20811 split_double_mode (mode, operands, 2, low, high);
20812 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20814 if (count >= half_width)
20816 emit_move_insn (high[0], low[1]);
20817 emit_move_insn (low[0], const0_rtx);
20819 if (count > half_width)
20820 ix86_expand_ashl_const (high[0], count - half_width, mode);
20824 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20826 if (!rtx_equal_p (operands[0], operands[1]))
20827 emit_move_insn (operands[0], operands[1]);
20829 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20830 ix86_expand_ashl_const (low[0], count, mode);
20835 split_double_mode (mode, operands, 1, low, high);
20837 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20839 if (operands[1] == const1_rtx)
20841 /* Assuming we've chosen a QImode capable registers, then 1 << N
20842 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20843 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20845 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20847 ix86_expand_clear (low[0]);
20848 ix86_expand_clear (high[0]);
20849 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20851 d = gen_lowpart (QImode, low[0]);
20852 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20853 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20854 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20856 d = gen_lowpart (QImode, high[0]);
20857 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20858 s = gen_rtx_NE (QImode, flags, const0_rtx);
20859 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20862 /* Otherwise, we can get the same results by manually performing
20863 a bit extract operation on bit 5/6, and then performing the two
20864 shifts. The two methods of getting 0/1 into low/high are exactly
20865 the same size. Avoiding the shift in the bit extract case helps
20866 pentium4 a bit; no one else seems to care much either way. */
20869 enum machine_mode half_mode;
20870 rtx (*gen_lshr3)(rtx, rtx, rtx);
20871 rtx (*gen_and3)(rtx, rtx, rtx);
20872 rtx (*gen_xor3)(rtx, rtx, rtx);
20873 HOST_WIDE_INT bits;
20876 if (mode == DImode)
20878 half_mode = SImode;
20879 gen_lshr3 = gen_lshrsi3;
20880 gen_and3 = gen_andsi3;
20881 gen_xor3 = gen_xorsi3;
20886 half_mode = DImode;
20887 gen_lshr3 = gen_lshrdi3;
20888 gen_and3 = gen_anddi3;
20889 gen_xor3 = gen_xordi3;
20893 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20894 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20896 x = gen_lowpart (half_mode, operands[2]);
20897 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20899 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20900 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20901 emit_move_insn (low[0], high[0]);
20902 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20905 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20906 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20910 if (operands[1] == constm1_rtx)
20912 /* For -1 << N, we can avoid the shld instruction, because we
20913 know that we're shifting 0...31/63 ones into a -1. */
20914 emit_move_insn (low[0], constm1_rtx);
20915 if (optimize_insn_for_size_p ())
20916 emit_move_insn (high[0], low[0]);
20918 emit_move_insn (high[0], constm1_rtx);
20922 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20924 if (!rtx_equal_p (operands[0], operands[1]))
20925 emit_move_insn (operands[0], operands[1]);
20927 split_double_mode (mode, operands, 1, low, high);
20928 emit_insn (gen_shld (high[0], low[0], operands[2]));
20931 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20933 if (TARGET_CMOVE && scratch)
20935 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20936 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20938 ix86_expand_clear (scratch);
20939 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20943 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20944 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20946 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20951 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20953 rtx (*gen_ashr3)(rtx, rtx, rtx)
20954 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20955 rtx (*gen_shrd)(rtx, rtx, rtx);
20956 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20958 rtx low[2], high[2];
20961 if (CONST_INT_P (operands[2]))
20963 split_double_mode (mode, operands, 2, low, high);
20964 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20966 if (count == GET_MODE_BITSIZE (mode) - 1)
20968 emit_move_insn (high[0], high[1]);
20969 emit_insn (gen_ashr3 (high[0], high[0],
20970 GEN_INT (half_width - 1)));
20971 emit_move_insn (low[0], high[0]);
20974 else if (count >= half_width)
20976 emit_move_insn (low[0], high[1]);
20977 emit_move_insn (high[0], low[0]);
20978 emit_insn (gen_ashr3 (high[0], high[0],
20979 GEN_INT (half_width - 1)));
20981 if (count > half_width)
20982 emit_insn (gen_ashr3 (low[0], low[0],
20983 GEN_INT (count - half_width)));
20987 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20989 if (!rtx_equal_p (operands[0], operands[1]))
20990 emit_move_insn (operands[0], operands[1]);
20992 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20993 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20998 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21000 if (!rtx_equal_p (operands[0], operands[1]))
21001 emit_move_insn (operands[0], operands[1]);
21003 split_double_mode (mode, operands, 1, low, high);
21005 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21006 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21008 if (TARGET_CMOVE && scratch)
21010 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21011 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21013 emit_move_insn (scratch, high[0]);
21014 emit_insn (gen_ashr3 (scratch, scratch,
21015 GEN_INT (half_width - 1)));
21016 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21021 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21022 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21024 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21030 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21032 rtx (*gen_lshr3)(rtx, rtx, rtx)
21033 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21034 rtx (*gen_shrd)(rtx, rtx, rtx);
21035 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21037 rtx low[2], high[2];
21040 if (CONST_INT_P (operands[2]))
21042 split_double_mode (mode, operands, 2, low, high);
21043 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21045 if (count >= half_width)
21047 emit_move_insn (low[0], high[1]);
21048 ix86_expand_clear (high[0]);
21050 if (count > half_width)
21051 emit_insn (gen_lshr3 (low[0], low[0],
21052 GEN_INT (count - half_width)));
21056 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21058 if (!rtx_equal_p (operands[0], operands[1]))
21059 emit_move_insn (operands[0], operands[1]);
21061 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21062 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21067 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21069 if (!rtx_equal_p (operands[0], operands[1]))
21070 emit_move_insn (operands[0], operands[1]);
21072 split_double_mode (mode, operands, 1, low, high);
21074 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21075 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21077 if (TARGET_CMOVE && scratch)
21079 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21080 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21082 ix86_expand_clear (scratch);
21083 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21088 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21089 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21091 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21096 /* Predict just emitted jump instruction to be taken with probability PROB. */
21098 predict_jump (int prob)
21100 rtx insn = get_last_insn ();
21101 gcc_assert (JUMP_P (insn));
21102 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21105 /* Helper function for the string operations below. Dest VARIABLE whether
21106 it is aligned to VALUE bytes. If true, jump to the label. */
21108 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21110 rtx label = gen_label_rtx ();
21111 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21112 if (GET_MODE (variable) == DImode)
21113 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21115 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21116 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21119 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21121 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21125 /* Adjust COUNTER by the VALUE. */
21127 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21129 rtx (*gen_add)(rtx, rtx, rtx)
21130 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21132 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21135 /* Zero extend possibly SImode EXP to Pmode register. */
21137 ix86_zero_extend_to_Pmode (rtx exp)
21140 if (GET_MODE (exp) == VOIDmode)
21141 return force_reg (Pmode, exp);
21142 if (GET_MODE (exp) == Pmode)
21143 return copy_to_mode_reg (Pmode, exp);
21144 r = gen_reg_rtx (Pmode);
21145 emit_insn (gen_zero_extendsidi2 (r, exp));
21149 /* Divide COUNTREG by SCALE. */
21151 scale_counter (rtx countreg, int scale)
21157 if (CONST_INT_P (countreg))
21158 return GEN_INT (INTVAL (countreg) / scale);
21159 gcc_assert (REG_P (countreg));
21161 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21162 GEN_INT (exact_log2 (scale)),
21163 NULL, 1, OPTAB_DIRECT);
21167 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21168 DImode for constant loop counts. */
21170 static enum machine_mode
21171 counter_mode (rtx count_exp)
21173 if (GET_MODE (count_exp) != VOIDmode)
21174 return GET_MODE (count_exp);
21175 if (!CONST_INT_P (count_exp))
21177 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21182 /* When SRCPTR is non-NULL, output simple loop to move memory
21183 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21184 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21185 equivalent loop to set memory by VALUE (supposed to be in MODE).
21187 The size is rounded down to whole number of chunk size moved at once.
21188 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21192 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21193 rtx destptr, rtx srcptr, rtx value,
21194 rtx count, enum machine_mode mode, int unroll,
21197 rtx out_label, top_label, iter, tmp;
21198 enum machine_mode iter_mode = counter_mode (count);
21199 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21200 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21206 top_label = gen_label_rtx ();
21207 out_label = gen_label_rtx ();
21208 iter = gen_reg_rtx (iter_mode);
21210 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21211 NULL, 1, OPTAB_DIRECT);
21212 /* Those two should combine. */
21213 if (piece_size == const1_rtx)
21215 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21217 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21219 emit_move_insn (iter, const0_rtx);
21221 emit_label (top_label);
21223 tmp = convert_modes (Pmode, iter_mode, iter, true);
21224 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21225 destmem = change_address (destmem, mode, x_addr);
21229 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21230 srcmem = change_address (srcmem, mode, y_addr);
21232 /* When unrolling for chips that reorder memory reads and writes,
21233 we can save registers by using single temporary.
21234 Also using 4 temporaries is overkill in 32bit mode. */
21235 if (!TARGET_64BIT && 0)
21237 for (i = 0; i < unroll; i++)
21242 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21244 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21246 emit_move_insn (destmem, srcmem);
21252 gcc_assert (unroll <= 4);
21253 for (i = 0; i < unroll; i++)
21255 tmpreg[i] = gen_reg_rtx (mode);
21259 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21261 emit_move_insn (tmpreg[i], srcmem);
21263 for (i = 0; i < unroll; i++)
21268 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21270 emit_move_insn (destmem, tmpreg[i]);
21275 for (i = 0; i < unroll; i++)
21279 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21280 emit_move_insn (destmem, value);
21283 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21284 true, OPTAB_LIB_WIDEN);
21286 emit_move_insn (iter, tmp);
21288 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21290 if (expected_size != -1)
21292 expected_size /= GET_MODE_SIZE (mode) * unroll;
21293 if (expected_size == 0)
21295 else if (expected_size > REG_BR_PROB_BASE)
21296 predict_jump (REG_BR_PROB_BASE - 1);
21298 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21301 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21302 iter = ix86_zero_extend_to_Pmode (iter);
21303 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21304 true, OPTAB_LIB_WIDEN);
21305 if (tmp != destptr)
21306 emit_move_insn (destptr, tmp);
21309 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21310 true, OPTAB_LIB_WIDEN);
21312 emit_move_insn (srcptr, tmp);
21314 emit_label (out_label);
21317 /* Output "rep; mov" instruction.
21318 Arguments have same meaning as for previous function */
21320 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21321 rtx destptr, rtx srcptr,
21323 enum machine_mode mode)
21328 HOST_WIDE_INT rounded_count;
21330 /* If the size is known, it is shorter to use rep movs. */
21331 if (mode == QImode && CONST_INT_P (count)
21332 && !(INTVAL (count) & 3))
21335 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21336 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21337 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21338 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21339 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21340 if (mode != QImode)
21342 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21343 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21344 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21345 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21346 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21347 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21351 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21352 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21354 if (CONST_INT_P (count))
21356 rounded_count = (INTVAL (count)
21357 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21358 destmem = shallow_copy_rtx (destmem);
21359 srcmem = shallow_copy_rtx (srcmem);
21360 set_mem_size (destmem, rounded_count);
21361 set_mem_size (srcmem, rounded_count);
21365 if (MEM_SIZE_KNOWN_P (destmem))
21366 clear_mem_size (destmem);
21367 if (MEM_SIZE_KNOWN_P (srcmem))
21368 clear_mem_size (srcmem);
21370 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21374 /* Output "rep; stos" instruction.
21375 Arguments have same meaning as for previous function */
21377 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21378 rtx count, enum machine_mode mode,
21383 HOST_WIDE_INT rounded_count;
21385 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21386 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21387 value = force_reg (mode, gen_lowpart (mode, value));
21388 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21389 if (mode != QImode)
21391 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21392 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21393 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21396 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21397 if (orig_value == const0_rtx && CONST_INT_P (count))
21399 rounded_count = (INTVAL (count)
21400 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21401 destmem = shallow_copy_rtx (destmem);
21402 set_mem_size (destmem, rounded_count);
21404 else if (MEM_SIZE_KNOWN_P (destmem))
21405 clear_mem_size (destmem);
21406 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21410 emit_strmov (rtx destmem, rtx srcmem,
21411 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21413 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21414 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21415 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21418 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21420 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21421 rtx destptr, rtx srcptr, rtx count, int max_size)
21424 if (CONST_INT_P (count))
21426 HOST_WIDE_INT countval = INTVAL (count);
21429 if ((countval & 0x10) && max_size > 16)
21433 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21434 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21437 gcc_unreachable ();
21440 if ((countval & 0x08) && max_size > 8)
21443 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21446 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21447 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21451 if ((countval & 0x04) && max_size > 4)
21453 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21456 if ((countval & 0x02) && max_size > 2)
21458 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21461 if ((countval & 0x01) && max_size > 1)
21463 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21470 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21471 count, 1, OPTAB_DIRECT);
21472 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21473 count, QImode, 1, 4);
21477 /* When there are stringops, we can cheaply increase dest and src pointers.
21478 Otherwise we save code size by maintaining offset (zero is readily
21479 available from preceding rep operation) and using x86 addressing modes.
21481 if (TARGET_SINGLE_STRINGOP)
21485 rtx label = ix86_expand_aligntest (count, 4, true);
21486 src = change_address (srcmem, SImode, srcptr);
21487 dest = change_address (destmem, SImode, destptr);
21488 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21489 emit_label (label);
21490 LABEL_NUSES (label) = 1;
21494 rtx label = ix86_expand_aligntest (count, 2, true);
21495 src = change_address (srcmem, HImode, srcptr);
21496 dest = change_address (destmem, HImode, destptr);
21497 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21498 emit_label (label);
21499 LABEL_NUSES (label) = 1;
21503 rtx label = ix86_expand_aligntest (count, 1, true);
21504 src = change_address (srcmem, QImode, srcptr);
21505 dest = change_address (destmem, QImode, destptr);
21506 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21507 emit_label (label);
21508 LABEL_NUSES (label) = 1;
21513 rtx offset = force_reg (Pmode, const0_rtx);
21518 rtx label = ix86_expand_aligntest (count, 4, true);
21519 src = change_address (srcmem, SImode, srcptr);
21520 dest = change_address (destmem, SImode, destptr);
21521 emit_move_insn (dest, src);
21522 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21523 true, OPTAB_LIB_WIDEN);
21525 emit_move_insn (offset, tmp);
21526 emit_label (label);
21527 LABEL_NUSES (label) = 1;
21531 rtx label = ix86_expand_aligntest (count, 2, true);
21532 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21533 src = change_address (srcmem, HImode, tmp);
21534 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21535 dest = change_address (destmem, HImode, tmp);
21536 emit_move_insn (dest, src);
21537 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21538 true, OPTAB_LIB_WIDEN);
21540 emit_move_insn (offset, tmp);
21541 emit_label (label);
21542 LABEL_NUSES (label) = 1;
21546 rtx label = ix86_expand_aligntest (count, 1, true);
21547 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21548 src = change_address (srcmem, QImode, tmp);
21549 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21550 dest = change_address (destmem, QImode, tmp);
21551 emit_move_insn (dest, src);
21552 emit_label (label);
21553 LABEL_NUSES (label) = 1;
21558 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21560 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21561 rtx count, int max_size)
21564 expand_simple_binop (counter_mode (count), AND, count,
21565 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21566 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21567 gen_lowpart (QImode, value), count, QImode,
21571 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21573 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21577 if (CONST_INT_P (count))
21579 HOST_WIDE_INT countval = INTVAL (count);
21582 if ((countval & 0x10) && max_size > 16)
21586 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21587 emit_insn (gen_strset (destptr, dest, value));
21588 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21589 emit_insn (gen_strset (destptr, dest, value));
21592 gcc_unreachable ();
21595 if ((countval & 0x08) && max_size > 8)
21599 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21600 emit_insn (gen_strset (destptr, dest, value));
21604 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21605 emit_insn (gen_strset (destptr, dest, value));
21606 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21607 emit_insn (gen_strset (destptr, dest, value));
21611 if ((countval & 0x04) && max_size > 4)
21613 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21614 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21617 if ((countval & 0x02) && max_size > 2)
21619 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21620 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21623 if ((countval & 0x01) && max_size > 1)
21625 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21626 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21633 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21638 rtx label = ix86_expand_aligntest (count, 16, true);
21641 dest = change_address (destmem, DImode, destptr);
21642 emit_insn (gen_strset (destptr, dest, value));
21643 emit_insn (gen_strset (destptr, dest, value));
21647 dest = change_address (destmem, SImode, destptr);
21648 emit_insn (gen_strset (destptr, dest, value));
21649 emit_insn (gen_strset (destptr, dest, value));
21650 emit_insn (gen_strset (destptr, dest, value));
21651 emit_insn (gen_strset (destptr, dest, value));
21653 emit_label (label);
21654 LABEL_NUSES (label) = 1;
21658 rtx label = ix86_expand_aligntest (count, 8, true);
21661 dest = change_address (destmem, DImode, destptr);
21662 emit_insn (gen_strset (destptr, dest, value));
21666 dest = change_address (destmem, SImode, destptr);
21667 emit_insn (gen_strset (destptr, dest, value));
21668 emit_insn (gen_strset (destptr, dest, value));
21670 emit_label (label);
21671 LABEL_NUSES (label) = 1;
21675 rtx label = ix86_expand_aligntest (count, 4, true);
21676 dest = change_address (destmem, SImode, destptr);
21677 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21678 emit_label (label);
21679 LABEL_NUSES (label) = 1;
21683 rtx label = ix86_expand_aligntest (count, 2, true);
21684 dest = change_address (destmem, HImode, destptr);
21685 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21686 emit_label (label);
21687 LABEL_NUSES (label) = 1;
21691 rtx label = ix86_expand_aligntest (count, 1, true);
21692 dest = change_address (destmem, QImode, destptr);
21693 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21694 emit_label (label);
21695 LABEL_NUSES (label) = 1;
21699 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21700 DESIRED_ALIGNMENT. */
21702 expand_movmem_prologue (rtx destmem, rtx srcmem,
21703 rtx destptr, rtx srcptr, rtx count,
21704 int align, int desired_alignment)
21706 if (align <= 1 && desired_alignment > 1)
21708 rtx label = ix86_expand_aligntest (destptr, 1, false);
21709 srcmem = change_address (srcmem, QImode, srcptr);
21710 destmem = change_address (destmem, QImode, destptr);
21711 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21712 ix86_adjust_counter (count, 1);
21713 emit_label (label);
21714 LABEL_NUSES (label) = 1;
21716 if (align <= 2 && desired_alignment > 2)
21718 rtx label = ix86_expand_aligntest (destptr, 2, false);
21719 srcmem = change_address (srcmem, HImode, srcptr);
21720 destmem = change_address (destmem, HImode, destptr);
21721 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21722 ix86_adjust_counter (count, 2);
21723 emit_label (label);
21724 LABEL_NUSES (label) = 1;
21726 if (align <= 4 && desired_alignment > 4)
21728 rtx label = ix86_expand_aligntest (destptr, 4, false);
21729 srcmem = change_address (srcmem, SImode, srcptr);
21730 destmem = change_address (destmem, SImode, destptr);
21731 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21732 ix86_adjust_counter (count, 4);
21733 emit_label (label);
21734 LABEL_NUSES (label) = 1;
21736 gcc_assert (desired_alignment <= 8);
21739 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21740 ALIGN_BYTES is how many bytes need to be copied. */
21742 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21743 int desired_align, int align_bytes)
21746 rtx orig_dst = dst;
21747 rtx orig_src = src;
21749 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21750 if (src_align_bytes >= 0)
21751 src_align_bytes = desired_align - src_align_bytes;
21752 if (align_bytes & 1)
21754 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21755 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21757 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21759 if (align_bytes & 2)
21761 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21762 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21763 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21764 set_mem_align (dst, 2 * BITS_PER_UNIT);
21765 if (src_align_bytes >= 0
21766 && (src_align_bytes & 1) == (align_bytes & 1)
21767 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21768 set_mem_align (src, 2 * BITS_PER_UNIT);
21770 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21772 if (align_bytes & 4)
21774 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21775 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21776 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21777 set_mem_align (dst, 4 * BITS_PER_UNIT);
21778 if (src_align_bytes >= 0)
21780 unsigned int src_align = 0;
21781 if ((src_align_bytes & 3) == (align_bytes & 3))
21783 else if ((src_align_bytes & 1) == (align_bytes & 1))
21785 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21786 set_mem_align (src, src_align * BITS_PER_UNIT);
21789 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21791 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21792 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21793 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21794 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21795 if (src_align_bytes >= 0)
21797 unsigned int src_align = 0;
21798 if ((src_align_bytes & 7) == (align_bytes & 7))
21800 else if ((src_align_bytes & 3) == (align_bytes & 3))
21802 else if ((src_align_bytes & 1) == (align_bytes & 1))
21804 if (src_align > (unsigned int) desired_align)
21805 src_align = desired_align;
21806 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21807 set_mem_align (src, src_align * BITS_PER_UNIT);
21809 if (MEM_SIZE_KNOWN_P (orig_dst))
21810 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21811 if (MEM_SIZE_KNOWN_P (orig_src))
21812 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21817 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21818 DESIRED_ALIGNMENT. */
21820 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21821 int align, int desired_alignment)
21823 if (align <= 1 && desired_alignment > 1)
21825 rtx label = ix86_expand_aligntest (destptr, 1, false);
21826 destmem = change_address (destmem, QImode, destptr);
21827 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21828 ix86_adjust_counter (count, 1);
21829 emit_label (label);
21830 LABEL_NUSES (label) = 1;
21832 if (align <= 2 && desired_alignment > 2)
21834 rtx label = ix86_expand_aligntest (destptr, 2, false);
21835 destmem = change_address (destmem, HImode, destptr);
21836 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21837 ix86_adjust_counter (count, 2);
21838 emit_label (label);
21839 LABEL_NUSES (label) = 1;
21841 if (align <= 4 && desired_alignment > 4)
21843 rtx label = ix86_expand_aligntest (destptr, 4, false);
21844 destmem = change_address (destmem, SImode, destptr);
21845 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21846 ix86_adjust_counter (count, 4);
21847 emit_label (label);
21848 LABEL_NUSES (label) = 1;
21850 gcc_assert (desired_alignment <= 8);
21853 /* Set enough from DST to align DST known to by aligned by ALIGN to
21854 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21856 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21857 int desired_align, int align_bytes)
21860 rtx orig_dst = dst;
21861 if (align_bytes & 1)
21863 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21865 emit_insn (gen_strset (destreg, dst,
21866 gen_lowpart (QImode, value)));
21868 if (align_bytes & 2)
21870 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21871 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21872 set_mem_align (dst, 2 * BITS_PER_UNIT);
21874 emit_insn (gen_strset (destreg, dst,
21875 gen_lowpart (HImode, value)));
21877 if (align_bytes & 4)
21879 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21880 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21881 set_mem_align (dst, 4 * BITS_PER_UNIT);
21883 emit_insn (gen_strset (destreg, dst,
21884 gen_lowpart (SImode, value)));
21886 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21887 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21888 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21889 if (MEM_SIZE_KNOWN_P (orig_dst))
21890 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21894 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21895 static enum stringop_alg
21896 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21897 int *dynamic_check)
21899 const struct stringop_algs * algs;
21900 bool optimize_for_speed;
21901 /* Algorithms using the rep prefix want at least edi and ecx;
21902 additionally, memset wants eax and memcpy wants esi. Don't
21903 consider such algorithms if the user has appropriated those
21904 registers for their own purposes. */
21905 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21907 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21909 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21910 || (alg != rep_prefix_1_byte \
21911 && alg != rep_prefix_4_byte \
21912 && alg != rep_prefix_8_byte))
21913 const struct processor_costs *cost;
21915 /* Even if the string operation call is cold, we still might spend a lot
21916 of time processing large blocks. */
21917 if (optimize_function_for_size_p (cfun)
21918 || (optimize_insn_for_size_p ()
21919 && expected_size != -1 && expected_size < 256))
21920 optimize_for_speed = false;
21922 optimize_for_speed = true;
21924 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21926 *dynamic_check = -1;
21928 algs = &cost->memset[TARGET_64BIT != 0];
21930 algs = &cost->memcpy[TARGET_64BIT != 0];
21931 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21932 return ix86_stringop_alg;
21933 /* rep; movq or rep; movl is the smallest variant. */
21934 else if (!optimize_for_speed)
21936 if (!count || (count & 3))
21937 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21939 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21941 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21943 else if (expected_size != -1 && expected_size < 4)
21944 return loop_1_byte;
21945 else if (expected_size != -1)
21948 enum stringop_alg alg = libcall;
21949 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21951 /* We get here if the algorithms that were not libcall-based
21952 were rep-prefix based and we are unable to use rep prefixes
21953 based on global register usage. Break out of the loop and
21954 use the heuristic below. */
21955 if (algs->size[i].max == 0)
21957 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
21959 enum stringop_alg candidate = algs->size[i].alg;
21961 if (candidate != libcall && ALG_USABLE_P (candidate))
21963 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21964 last non-libcall inline algorithm. */
21965 if (TARGET_INLINE_ALL_STRINGOPS)
21967 /* When the current size is best to be copied by a libcall,
21968 but we are still forced to inline, run the heuristic below
21969 that will pick code for medium sized blocks. */
21970 if (alg != libcall)
21974 else if (ALG_USABLE_P (candidate))
21978 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21980 /* When asked to inline the call anyway, try to pick meaningful choice.
21981 We look for maximal size of block that is faster to copy by hand and
21982 take blocks of at most of that size guessing that average size will
21983 be roughly half of the block.
21985 If this turns out to be bad, we might simply specify the preferred
21986 choice in ix86_costs. */
21987 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21988 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21991 enum stringop_alg alg;
21993 bool any_alg_usable_p = true;
21995 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21997 enum stringop_alg candidate = algs->size[i].alg;
21998 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22000 if (candidate != libcall && candidate
22001 && ALG_USABLE_P (candidate))
22002 max = algs->size[i].max;
22004 /* If there aren't any usable algorithms, then recursing on
22005 smaller sizes isn't going to find anything. Just return the
22006 simple byte-at-a-time copy loop. */
22007 if (!any_alg_usable_p)
22009 /* Pick something reasonable. */
22010 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22011 *dynamic_check = 128;
22012 return loop_1_byte;
22016 alg = decide_alg (count, max / 2, memset, dynamic_check);
22017 gcc_assert (*dynamic_check == -1);
22018 gcc_assert (alg != libcall);
22019 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22020 *dynamic_check = max;
22023 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22024 #undef ALG_USABLE_P
22027 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22028 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22030 decide_alignment (int align,
22031 enum stringop_alg alg,
22034 int desired_align = 0;
22038 gcc_unreachable ();
22040 case unrolled_loop:
22041 desired_align = GET_MODE_SIZE (Pmode);
22043 case rep_prefix_8_byte:
22046 case rep_prefix_4_byte:
22047 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22048 copying whole cacheline at once. */
22049 if (TARGET_PENTIUMPRO)
22054 case rep_prefix_1_byte:
22055 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22056 copying whole cacheline at once. */
22057 if (TARGET_PENTIUMPRO)
22071 if (desired_align < align)
22072 desired_align = align;
22073 if (expected_size != -1 && expected_size < 4)
22074 desired_align = align;
22075 return desired_align;
22078 /* Return the smallest power of 2 greater than VAL. */
22080 smallest_pow2_greater_than (int val)
22088 /* Expand string move (memcpy) operation. Use i386 string operations
22089 when profitable. expand_setmem contains similar code. The code
22090 depends upon architecture, block size and alignment, but always has
22091 the same overall structure:
22093 1) Prologue guard: Conditional that jumps up to epilogues for small
22094 blocks that can be handled by epilogue alone. This is faster
22095 but also needed for correctness, since prologue assume the block
22096 is larger than the desired alignment.
22098 Optional dynamic check for size and libcall for large
22099 blocks is emitted here too, with -minline-stringops-dynamically.
22101 2) Prologue: copy first few bytes in order to get destination
22102 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22103 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22104 copied. We emit either a jump tree on power of two sized
22105 blocks, or a byte loop.
22107 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22108 with specified algorithm.
22110 4) Epilogue: code copying tail of the block that is too small to be
22111 handled by main body (or up to size guarded by prologue guard). */
22114 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22115 rtx expected_align_exp, rtx expected_size_exp)
22121 rtx jump_around_label = NULL;
22122 HOST_WIDE_INT align = 1;
22123 unsigned HOST_WIDE_INT count = 0;
22124 HOST_WIDE_INT expected_size = -1;
22125 int size_needed = 0, epilogue_size_needed;
22126 int desired_align = 0, align_bytes = 0;
22127 enum stringop_alg alg;
22129 bool need_zero_guard = false;
22131 if (CONST_INT_P (align_exp))
22132 align = INTVAL (align_exp);
22133 /* i386 can do misaligned access on reasonably increased cost. */
22134 if (CONST_INT_P (expected_align_exp)
22135 && INTVAL (expected_align_exp) > align)
22136 align = INTVAL (expected_align_exp);
22137 /* ALIGN is the minimum of destination and source alignment, but we care here
22138 just about destination alignment. */
22139 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22140 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22142 if (CONST_INT_P (count_exp))
22143 count = expected_size = INTVAL (count_exp);
22144 if (CONST_INT_P (expected_size_exp) && count == 0)
22145 expected_size = INTVAL (expected_size_exp);
22147 /* Make sure we don't need to care about overflow later on. */
22148 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22151 /* Step 0: Decide on preferred algorithm, desired alignment and
22152 size of chunks to be copied by main loop. */
22154 alg = decide_alg (count, expected_size, false, &dynamic_check);
22155 desired_align = decide_alignment (align, alg, expected_size);
22157 if (!TARGET_ALIGN_STRINGOPS)
22158 align = desired_align;
22160 if (alg == libcall)
22162 gcc_assert (alg != no_stringop);
22164 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22165 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22166 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
22171 gcc_unreachable ();
22173 need_zero_guard = true;
22174 size_needed = GET_MODE_SIZE (Pmode);
22176 case unrolled_loop:
22177 need_zero_guard = true;
22178 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
22180 case rep_prefix_8_byte:
22183 case rep_prefix_4_byte:
22186 case rep_prefix_1_byte:
22190 need_zero_guard = true;
22195 epilogue_size_needed = size_needed;
22197 /* Step 1: Prologue guard. */
22199 /* Alignment code needs count to be in register. */
22200 if (CONST_INT_P (count_exp) && desired_align > align)
22202 if (INTVAL (count_exp) > desired_align
22203 && INTVAL (count_exp) > size_needed)
22206 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22207 if (align_bytes <= 0)
22210 align_bytes = desired_align - align_bytes;
22212 if (align_bytes == 0)
22213 count_exp = force_reg (counter_mode (count_exp), count_exp);
22215 gcc_assert (desired_align >= 1 && align >= 1);
22217 /* Ensure that alignment prologue won't copy past end of block. */
22218 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22220 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22221 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22222 Make sure it is power of 2. */
22223 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22227 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22229 /* If main algorithm works on QImode, no epilogue is needed.
22230 For small sizes just don't align anything. */
22231 if (size_needed == 1)
22232 desired_align = align;
22239 label = gen_label_rtx ();
22240 emit_cmp_and_jump_insns (count_exp,
22241 GEN_INT (epilogue_size_needed),
22242 LTU, 0, counter_mode (count_exp), 1, label);
22243 if (expected_size == -1 || expected_size < epilogue_size_needed)
22244 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22246 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22250 /* Emit code to decide on runtime whether library call or inline should be
22252 if (dynamic_check != -1)
22254 if (CONST_INT_P (count_exp))
22256 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22258 emit_block_move_via_libcall (dst, src, count_exp, false);
22259 count_exp = const0_rtx;
22265 rtx hot_label = gen_label_rtx ();
22266 jump_around_label = gen_label_rtx ();
22267 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22268 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22269 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22270 emit_block_move_via_libcall (dst, src, count_exp, false);
22271 emit_jump (jump_around_label);
22272 emit_label (hot_label);
22276 /* Step 2: Alignment prologue. */
22278 if (desired_align > align)
22280 if (align_bytes == 0)
22282 /* Except for the first move in epilogue, we no longer know
22283 constant offset in aliasing info. It don't seems to worth
22284 the pain to maintain it for the first move, so throw away
22286 src = change_address (src, BLKmode, srcreg);
22287 dst = change_address (dst, BLKmode, destreg);
22288 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22293 /* If we know how many bytes need to be stored before dst is
22294 sufficiently aligned, maintain aliasing info accurately. */
22295 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22296 desired_align, align_bytes);
22297 count_exp = plus_constant (count_exp, -align_bytes);
22298 count -= align_bytes;
22300 if (need_zero_guard
22301 && (count < (unsigned HOST_WIDE_INT) size_needed
22302 || (align_bytes == 0
22303 && count < ((unsigned HOST_WIDE_INT) size_needed
22304 + desired_align - align))))
22306 /* It is possible that we copied enough so the main loop will not
22308 gcc_assert (size_needed > 1);
22309 if (label == NULL_RTX)
22310 label = gen_label_rtx ();
22311 emit_cmp_and_jump_insns (count_exp,
22312 GEN_INT (size_needed),
22313 LTU, 0, counter_mode (count_exp), 1, label);
22314 if (expected_size == -1
22315 || expected_size < (desired_align - align) / 2 + size_needed)
22316 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22318 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22321 if (label && size_needed == 1)
22323 emit_label (label);
22324 LABEL_NUSES (label) = 1;
22326 epilogue_size_needed = 1;
22328 else if (label == NULL_RTX)
22329 epilogue_size_needed = size_needed;
22331 /* Step 3: Main loop. */
22337 gcc_unreachable ();
22339 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22340 count_exp, QImode, 1, expected_size);
22343 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22344 count_exp, Pmode, 1, expected_size);
22346 case unrolled_loop:
22347 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22348 registers for 4 temporaries anyway. */
22349 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22350 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
22353 case rep_prefix_8_byte:
22354 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22357 case rep_prefix_4_byte:
22358 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22361 case rep_prefix_1_byte:
22362 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22366 /* Adjust properly the offset of src and dest memory for aliasing. */
22367 if (CONST_INT_P (count_exp))
22369 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22370 (count / size_needed) * size_needed);
22371 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22372 (count / size_needed) * size_needed);
22376 src = change_address (src, BLKmode, srcreg);
22377 dst = change_address (dst, BLKmode, destreg);
22380 /* Step 4: Epilogue to copy the remaining bytes. */
22384 /* When the main loop is done, COUNT_EXP might hold original count,
22385 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22386 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22387 bytes. Compensate if needed. */
22389 if (size_needed < epilogue_size_needed)
22392 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22393 GEN_INT (size_needed - 1), count_exp, 1,
22395 if (tmp != count_exp)
22396 emit_move_insn (count_exp, tmp);
22398 emit_label (label);
22399 LABEL_NUSES (label) = 1;
22402 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22403 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22404 epilogue_size_needed);
22405 if (jump_around_label)
22406 emit_label (jump_around_label);
22410 /* Helper function for memcpy. For QImode value 0xXY produce
22411 0xXYXYXYXY of wide specified by MODE. This is essentially
22412 a * 0x10101010, but we can do slightly better than
22413 synth_mult by unwinding the sequence by hand on CPUs with
22416 promote_duplicated_reg (enum machine_mode mode, rtx val)
22418 enum machine_mode valmode = GET_MODE (val);
22420 int nops = mode == DImode ? 3 : 2;
22422 gcc_assert (mode == SImode || mode == DImode);
22423 if (val == const0_rtx)
22424 return copy_to_mode_reg (mode, const0_rtx);
22425 if (CONST_INT_P (val))
22427 HOST_WIDE_INT v = INTVAL (val) & 255;
22431 if (mode == DImode)
22432 v |= (v << 16) << 16;
22433 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22436 if (valmode == VOIDmode)
22438 if (valmode != QImode)
22439 val = gen_lowpart (QImode, val);
22440 if (mode == QImode)
22442 if (!TARGET_PARTIAL_REG_STALL)
22444 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22445 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22446 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22447 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22449 rtx reg = convert_modes (mode, QImode, val, true);
22450 tmp = promote_duplicated_reg (mode, const1_rtx);
22451 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22456 rtx reg = convert_modes (mode, QImode, val, true);
22458 if (!TARGET_PARTIAL_REG_STALL)
22459 if (mode == SImode)
22460 emit_insn (gen_movsi_insv_1 (reg, reg));
22462 emit_insn (gen_movdi_insv_1 (reg, reg));
22465 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22466 NULL, 1, OPTAB_DIRECT);
22468 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22470 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22471 NULL, 1, OPTAB_DIRECT);
22472 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22473 if (mode == SImode)
22475 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22476 NULL, 1, OPTAB_DIRECT);
22477 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22482 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22483 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22484 alignment from ALIGN to DESIRED_ALIGN. */
22486 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22491 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22492 promoted_val = promote_duplicated_reg (DImode, val);
22493 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22494 promoted_val = promote_duplicated_reg (SImode, val);
22495 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22496 promoted_val = promote_duplicated_reg (HImode, val);
22498 promoted_val = val;
22500 return promoted_val;
22503 /* Expand string clear operation (bzero). Use i386 string operations when
22504 profitable. See expand_movmem comment for explanation of individual
22505 steps performed. */
22507 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22508 rtx expected_align_exp, rtx expected_size_exp)
22513 rtx jump_around_label = NULL;
22514 HOST_WIDE_INT align = 1;
22515 unsigned HOST_WIDE_INT count = 0;
22516 HOST_WIDE_INT expected_size = -1;
22517 int size_needed = 0, epilogue_size_needed;
22518 int desired_align = 0, align_bytes = 0;
22519 enum stringop_alg alg;
22520 rtx promoted_val = NULL;
22521 bool force_loopy_epilogue = false;
22523 bool need_zero_guard = false;
22525 if (CONST_INT_P (align_exp))
22526 align = INTVAL (align_exp);
22527 /* i386 can do misaligned access on reasonably increased cost. */
22528 if (CONST_INT_P (expected_align_exp)
22529 && INTVAL (expected_align_exp) > align)
22530 align = INTVAL (expected_align_exp);
22531 if (CONST_INT_P (count_exp))
22532 count = expected_size = INTVAL (count_exp);
22533 if (CONST_INT_P (expected_size_exp) && count == 0)
22534 expected_size = INTVAL (expected_size_exp);
22536 /* Make sure we don't need to care about overflow later on. */
22537 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22540 /* Step 0: Decide on preferred algorithm, desired alignment and
22541 size of chunks to be copied by main loop. */
22543 alg = decide_alg (count, expected_size, true, &dynamic_check);
22544 desired_align = decide_alignment (align, alg, expected_size);
22546 if (!TARGET_ALIGN_STRINGOPS)
22547 align = desired_align;
22549 if (alg == libcall)
22551 gcc_assert (alg != no_stringop);
22553 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22554 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22559 gcc_unreachable ();
22561 need_zero_guard = true;
22562 size_needed = GET_MODE_SIZE (Pmode);
22564 case unrolled_loop:
22565 need_zero_guard = true;
22566 size_needed = GET_MODE_SIZE (Pmode) * 4;
22568 case rep_prefix_8_byte:
22571 case rep_prefix_4_byte:
22574 case rep_prefix_1_byte:
22578 need_zero_guard = true;
22582 epilogue_size_needed = size_needed;
22584 /* Step 1: Prologue guard. */
22586 /* Alignment code needs count to be in register. */
22587 if (CONST_INT_P (count_exp) && desired_align > align)
22589 if (INTVAL (count_exp) > desired_align
22590 && INTVAL (count_exp) > size_needed)
22593 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22594 if (align_bytes <= 0)
22597 align_bytes = desired_align - align_bytes;
22599 if (align_bytes == 0)
22601 enum machine_mode mode = SImode;
22602 if (TARGET_64BIT && (count & ~0xffffffff))
22604 count_exp = force_reg (mode, count_exp);
22607 /* Do the cheap promotion to allow better CSE across the
22608 main loop and epilogue (ie one load of the big constant in the
22609 front of all code. */
22610 if (CONST_INT_P (val_exp))
22611 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22612 desired_align, align);
22613 /* Ensure that alignment prologue won't copy past end of block. */
22614 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22616 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22617 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22618 Make sure it is power of 2. */
22619 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22621 /* To improve performance of small blocks, we jump around the VAL
22622 promoting mode. This mean that if the promoted VAL is not constant,
22623 we might not use it in the epilogue and have to use byte
22625 if (epilogue_size_needed > 2 && !promoted_val)
22626 force_loopy_epilogue = true;
22629 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22631 /* If main algorithm works on QImode, no epilogue is needed.
22632 For small sizes just don't align anything. */
22633 if (size_needed == 1)
22634 desired_align = align;
22641 label = gen_label_rtx ();
22642 emit_cmp_and_jump_insns (count_exp,
22643 GEN_INT (epilogue_size_needed),
22644 LTU, 0, counter_mode (count_exp), 1, label);
22645 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22646 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22648 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22651 if (dynamic_check != -1)
22653 rtx hot_label = gen_label_rtx ();
22654 jump_around_label = gen_label_rtx ();
22655 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22656 LEU, 0, counter_mode (count_exp), 1, hot_label);
22657 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22658 set_storage_via_libcall (dst, count_exp, val_exp, false);
22659 emit_jump (jump_around_label);
22660 emit_label (hot_label);
22663 /* Step 2: Alignment prologue. */
22665 /* Do the expensive promotion once we branched off the small blocks. */
22667 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22668 desired_align, align);
22669 gcc_assert (desired_align >= 1 && align >= 1);
22671 if (desired_align > align)
22673 if (align_bytes == 0)
22675 /* Except for the first move in epilogue, we no longer know
22676 constant offset in aliasing info. It don't seems to worth
22677 the pain to maintain it for the first move, so throw away
22679 dst = change_address (dst, BLKmode, destreg);
22680 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22685 /* If we know how many bytes need to be stored before dst is
22686 sufficiently aligned, maintain aliasing info accurately. */
22687 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22688 desired_align, align_bytes);
22689 count_exp = plus_constant (count_exp, -align_bytes);
22690 count -= align_bytes;
22692 if (need_zero_guard
22693 && (count < (unsigned HOST_WIDE_INT) size_needed
22694 || (align_bytes == 0
22695 && count < ((unsigned HOST_WIDE_INT) size_needed
22696 + desired_align - align))))
22698 /* It is possible that we copied enough so the main loop will not
22700 gcc_assert (size_needed > 1);
22701 if (label == NULL_RTX)
22702 label = gen_label_rtx ();
22703 emit_cmp_and_jump_insns (count_exp,
22704 GEN_INT (size_needed),
22705 LTU, 0, counter_mode (count_exp), 1, label);
22706 if (expected_size == -1
22707 || expected_size < (desired_align - align) / 2 + size_needed)
22708 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22710 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22713 if (label && size_needed == 1)
22715 emit_label (label);
22716 LABEL_NUSES (label) = 1;
22718 promoted_val = val_exp;
22719 epilogue_size_needed = 1;
22721 else if (label == NULL_RTX)
22722 epilogue_size_needed = size_needed;
22724 /* Step 3: Main loop. */
22730 gcc_unreachable ();
22732 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22733 count_exp, QImode, 1, expected_size);
22736 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22737 count_exp, Pmode, 1, expected_size);
22739 case unrolled_loop:
22740 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22741 count_exp, Pmode, 4, expected_size);
22743 case rep_prefix_8_byte:
22744 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22747 case rep_prefix_4_byte:
22748 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22751 case rep_prefix_1_byte:
22752 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22756 /* Adjust properly the offset of src and dest memory for aliasing. */
22757 if (CONST_INT_P (count_exp))
22758 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22759 (count / size_needed) * size_needed);
22761 dst = change_address (dst, BLKmode, destreg);
22763 /* Step 4: Epilogue to copy the remaining bytes. */
22767 /* When the main loop is done, COUNT_EXP might hold original count,
22768 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22769 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22770 bytes. Compensate if needed. */
22772 if (size_needed < epilogue_size_needed)
22775 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22776 GEN_INT (size_needed - 1), count_exp, 1,
22778 if (tmp != count_exp)
22779 emit_move_insn (count_exp, tmp);
22781 emit_label (label);
22782 LABEL_NUSES (label) = 1;
22785 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22787 if (force_loopy_epilogue)
22788 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22789 epilogue_size_needed);
22791 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22792 epilogue_size_needed);
22794 if (jump_around_label)
22795 emit_label (jump_around_label);
22799 /* Expand the appropriate insns for doing strlen if not just doing
22802 out = result, initialized with the start address
22803 align_rtx = alignment of the address.
22804 scratch = scratch register, initialized with the startaddress when
22805 not aligned, otherwise undefined
22807 This is just the body. It needs the initializations mentioned above and
22808 some address computing at the end. These things are done in i386.md. */
22811 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22815 rtx align_2_label = NULL_RTX;
22816 rtx align_3_label = NULL_RTX;
22817 rtx align_4_label = gen_label_rtx ();
22818 rtx end_0_label = gen_label_rtx ();
22820 rtx tmpreg = gen_reg_rtx (SImode);
22821 rtx scratch = gen_reg_rtx (SImode);
22825 if (CONST_INT_P (align_rtx))
22826 align = INTVAL (align_rtx);
22828 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22830 /* Is there a known alignment and is it less than 4? */
22833 rtx scratch1 = gen_reg_rtx (Pmode);
22834 emit_move_insn (scratch1, out);
22835 /* Is there a known alignment and is it not 2? */
22838 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22839 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22841 /* Leave just the 3 lower bits. */
22842 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22843 NULL_RTX, 0, OPTAB_WIDEN);
22845 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22846 Pmode, 1, align_4_label);
22847 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22848 Pmode, 1, align_2_label);
22849 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22850 Pmode, 1, align_3_label);
22854 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22855 check if is aligned to 4 - byte. */
22857 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22858 NULL_RTX, 0, OPTAB_WIDEN);
22860 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22861 Pmode, 1, align_4_label);
22864 mem = change_address (src, QImode, out);
22866 /* Now compare the bytes. */
22868 /* Compare the first n unaligned byte on a byte per byte basis. */
22869 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22870 QImode, 1, end_0_label);
22872 /* Increment the address. */
22873 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22875 /* Not needed with an alignment of 2 */
22878 emit_label (align_2_label);
22880 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22883 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22885 emit_label (align_3_label);
22888 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22891 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22894 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22895 align this loop. It gives only huge programs, but does not help to
22897 emit_label (align_4_label);
22899 mem = change_address (src, SImode, out);
22900 emit_move_insn (scratch, mem);
22901 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22903 /* This formula yields a nonzero result iff one of the bytes is zero.
22904 This saves three branches inside loop and many cycles. */
22906 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22907 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22908 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22909 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22910 gen_int_mode (0x80808080, SImode)));
22911 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22916 rtx reg = gen_reg_rtx (SImode);
22917 rtx reg2 = gen_reg_rtx (Pmode);
22918 emit_move_insn (reg, tmpreg);
22919 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22921 /* If zero is not in the first two bytes, move two bytes forward. */
22922 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22923 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22924 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22925 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22926 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22929 /* Emit lea manually to avoid clobbering of flags. */
22930 emit_insn (gen_rtx_SET (SImode, reg2,
22931 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22933 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22934 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22935 emit_insn (gen_rtx_SET (VOIDmode, out,
22936 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22942 rtx end_2_label = gen_label_rtx ();
22943 /* Is zero in the first two bytes? */
22945 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22946 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22947 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22949 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22951 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22952 JUMP_LABEL (tmp) = end_2_label;
22954 /* Not in the first two. Move two bytes forward. */
22955 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
22956 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
22958 emit_label (end_2_label);
22962 /* Avoid branch in fixing the byte. */
22963 tmpreg = gen_lowpart (QImode, tmpreg);
22964 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
22965 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
22966 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
22967 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
22969 emit_label (end_0_label);
22972 /* Expand strlen. */
22975 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
22977 rtx addr, scratch1, scratch2, scratch3, scratch4;
22979 /* The generic case of strlen expander is long. Avoid it's
22980 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22982 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22983 && !TARGET_INLINE_ALL_STRINGOPS
22984 && !optimize_insn_for_size_p ()
22985 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22988 addr = force_reg (Pmode, XEXP (src, 0));
22989 scratch1 = gen_reg_rtx (Pmode);
22991 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22992 && !optimize_insn_for_size_p ())
22994 /* Well it seems that some optimizer does not combine a call like
22995 foo(strlen(bar), strlen(bar));
22996 when the move and the subtraction is done here. It does calculate
22997 the length just once when these instructions are done inside of
22998 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22999 often used and I use one fewer register for the lifetime of
23000 output_strlen_unroll() this is better. */
23002 emit_move_insn (out, addr);
23004 ix86_expand_strlensi_unroll_1 (out, src, align);
23006 /* strlensi_unroll_1 returns the address of the zero at the end of
23007 the string, like memchr(), so compute the length by subtracting
23008 the start address. */
23009 emit_insn (ix86_gen_sub3 (out, out, addr));
23015 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23016 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23019 scratch2 = gen_reg_rtx (Pmode);
23020 scratch3 = gen_reg_rtx (Pmode);
23021 scratch4 = force_reg (Pmode, constm1_rtx);
23023 emit_move_insn (scratch3, addr);
23024 eoschar = force_reg (QImode, eoschar);
23026 src = replace_equiv_address_nv (src, scratch3);
23028 /* If .md starts supporting :P, this can be done in .md. */
23029 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23030 scratch4), UNSPEC_SCAS);
23031 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23032 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23033 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23038 /* For given symbol (function) construct code to compute address of it's PLT
23039 entry in large x86-64 PIC model. */
23041 construct_plt_address (rtx symbol)
23043 rtx tmp = gen_reg_rtx (Pmode);
23044 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23046 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23047 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23049 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23050 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
23055 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23057 rtx pop, bool sibcall)
23059 /* We need to represent that SI and DI registers are clobbered
23061 static int clobbered_registers[] = {
23062 XMM6_REG, XMM7_REG, XMM8_REG,
23063 XMM9_REG, XMM10_REG, XMM11_REG,
23064 XMM12_REG, XMM13_REG, XMM14_REG,
23065 XMM15_REG, SI_REG, DI_REG
23067 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23068 rtx use = NULL, call;
23069 unsigned int vec_len;
23071 if (pop == const0_rtx)
23073 gcc_assert (!TARGET_64BIT || !pop);
23075 if (TARGET_MACHO && !TARGET_64BIT)
23078 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23079 fnaddr = machopic_indirect_call_target (fnaddr);
23084 /* Static functions and indirect calls don't need the pic register. */
23085 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23086 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23087 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23088 use_reg (&use, pic_offset_table_rtx);
23091 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23093 rtx al = gen_rtx_REG (QImode, AX_REG);
23094 emit_move_insn (al, callarg2);
23095 use_reg (&use, al);
23098 if (ix86_cmodel == CM_LARGE_PIC
23100 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23101 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23102 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23104 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
23105 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
23107 fnaddr = XEXP (fnaddr, 0);
23108 if (GET_MODE (fnaddr) != Pmode)
23109 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
23110 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
23114 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23116 call = gen_rtx_SET (VOIDmode, retval, call);
23117 vec[vec_len++] = call;
23121 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23122 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23123 vec[vec_len++] = pop;
23126 if (TARGET_64BIT_MS_ABI
23127 && (!callarg2 || INTVAL (callarg2) != -2))
23131 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23132 UNSPEC_MS_TO_SYSV_CALL);
23134 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23136 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23138 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23140 clobbered_registers[i]));
23143 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23144 if (TARGET_VZEROUPPER)
23147 if (cfun->machine->callee_pass_avx256_p)
23149 if (cfun->machine->callee_return_avx256_p)
23150 avx256 = callee_return_pass_avx256;
23152 avx256 = callee_pass_avx256;
23154 else if (cfun->machine->callee_return_avx256_p)
23155 avx256 = callee_return_avx256;
23157 avx256 = call_no_avx256;
23159 if (reload_completed)
23160 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23162 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23163 gen_rtvec (1, GEN_INT (avx256)),
23164 UNSPEC_CALL_NEEDS_VZEROUPPER);
23168 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23169 call = emit_call_insn (call);
23171 CALL_INSN_FUNCTION_USAGE (call) = use;
23177 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23179 rtx pat = PATTERN (insn);
23180 rtvec vec = XVEC (pat, 0);
23181 int len = GET_NUM_ELEM (vec) - 1;
23183 /* Strip off the last entry of the parallel. */
23184 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23185 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23187 pat = RTVEC_ELT (vec, 0);
23189 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23191 emit_insn (gen_avx_vzeroupper (vzeroupper));
23192 emit_call_insn (pat);
23195 /* Output the assembly for a call instruction. */
23198 ix86_output_call_insn (rtx insn, rtx call_op)
23200 bool direct_p = constant_call_address_operand (call_op, Pmode);
23201 bool seh_nop_p = false;
23204 if (SIBLING_CALL_P (insn))
23208 /* SEH epilogue detection requires the indirect branch case
23209 to include REX.W. */
23210 else if (TARGET_SEH)
23211 xasm = "rex.W jmp %A0";
23215 output_asm_insn (xasm, &call_op);
23219 /* SEH unwinding can require an extra nop to be emitted in several
23220 circumstances. Determine if we have one of those. */
23225 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23227 /* If we get to another real insn, we don't need the nop. */
23231 /* If we get to the epilogue note, prevent a catch region from
23232 being adjacent to the standard epilogue sequence. If non-
23233 call-exceptions, we'll have done this during epilogue emission. */
23234 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23235 && !flag_non_call_exceptions
23236 && !can_throw_internal (insn))
23243 /* If we didn't find a real insn following the call, prevent the
23244 unwinder from looking into the next function. */
23250 xasm = "call\t%P0";
23252 xasm = "call\t%A0";
23254 output_asm_insn (xasm, &call_op);
23262 /* Clear stack slot assignments remembered from previous functions.
23263 This is called from INIT_EXPANDERS once before RTL is emitted for each
23266 static struct machine_function *
23267 ix86_init_machine_status (void)
23269 struct machine_function *f;
23271 f = ggc_alloc_cleared_machine_function ();
23272 f->use_fast_prologue_epilogue_nregs = -1;
23273 f->tls_descriptor_call_expanded_p = 0;
23274 f->call_abi = ix86_abi;
23279 /* Return a MEM corresponding to a stack slot with mode MODE.
23280 Allocate a new slot if necessary.
23282 The RTL for a function can have several slots available: N is
23283 which slot to use. */
23286 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23288 struct stack_local_entry *s;
23290 gcc_assert (n < MAX_386_STACK_LOCALS);
23292 /* Virtual slot is valid only before vregs are instantiated. */
23293 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
23295 for (s = ix86_stack_locals; s; s = s->next)
23296 if (s->mode == mode && s->n == n)
23297 return validize_mem (copy_rtx (s->rtl));
23299 s = ggc_alloc_stack_local_entry ();
23302 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23304 s->next = ix86_stack_locals;
23305 ix86_stack_locals = s;
23306 return validize_mem (s->rtl);
23309 /* Calculate the length of the memory address in the instruction encoding.
23310 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23311 or other prefixes. */
23314 memory_address_length (rtx addr)
23316 struct ix86_address parts;
23317 rtx base, index, disp;
23321 if (GET_CODE (addr) == PRE_DEC
23322 || GET_CODE (addr) == POST_INC
23323 || GET_CODE (addr) == PRE_MODIFY
23324 || GET_CODE (addr) == POST_MODIFY)
23327 ok = ix86_decompose_address (addr, &parts);
23330 if (parts.base && GET_CODE (parts.base) == SUBREG)
23331 parts.base = SUBREG_REG (parts.base);
23332 if (parts.index && GET_CODE (parts.index) == SUBREG)
23333 parts.index = SUBREG_REG (parts.index);
23336 index = parts.index;
23339 /* Add length of addr32 prefix. */
23340 len = (GET_CODE (addr) == ZERO_EXTEND
23341 || GET_CODE (addr) == AND);
23344 - esp as the base always wants an index,
23345 - ebp as the base always wants a displacement,
23346 - r12 as the base always wants an index,
23347 - r13 as the base always wants a displacement. */
23349 /* Register Indirect. */
23350 if (base && !index && !disp)
23352 /* esp (for its index) and ebp (for its displacement) need
23353 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23356 && (addr == arg_pointer_rtx
23357 || addr == frame_pointer_rtx
23358 || REGNO (addr) == SP_REG
23359 || REGNO (addr) == BP_REG
23360 || REGNO (addr) == R12_REG
23361 || REGNO (addr) == R13_REG))
23365 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23366 is not disp32, but disp32(%rip), so for disp32
23367 SIB byte is needed, unless print_operand_address
23368 optimizes it into disp32(%rip) or (%rip) is implied
23370 else if (disp && !base && !index)
23377 if (GET_CODE (disp) == CONST)
23378 symbol = XEXP (disp, 0);
23379 if (GET_CODE (symbol) == PLUS
23380 && CONST_INT_P (XEXP (symbol, 1)))
23381 symbol = XEXP (symbol, 0);
23383 if (GET_CODE (symbol) != LABEL_REF
23384 && (GET_CODE (symbol) != SYMBOL_REF
23385 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23386 && (GET_CODE (symbol) != UNSPEC
23387 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23388 && XINT (symbol, 1) != UNSPEC_PCREL
23389 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23396 /* Find the length of the displacement constant. */
23399 if (base && satisfies_constraint_K (disp))
23404 /* ebp always wants a displacement. Similarly r13. */
23405 else if (base && REG_P (base)
23406 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23409 /* An index requires the two-byte modrm form.... */
23411 /* ...like esp (or r12), which always wants an index. */
23412 || base == arg_pointer_rtx
23413 || base == frame_pointer_rtx
23414 || (base && REG_P (base)
23415 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23432 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23433 is set, expect that insn have 8bit immediate alternative. */
23435 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23439 extract_insn_cached (insn);
23440 for (i = recog_data.n_operands - 1; i >= 0; --i)
23441 if (CONSTANT_P (recog_data.operand[i]))
23443 enum attr_mode mode = get_attr_mode (insn);
23446 if (shortform && CONST_INT_P (recog_data.operand[i]))
23448 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23455 ival = trunc_int_for_mode (ival, HImode);
23458 ival = trunc_int_for_mode (ival, SImode);
23463 if (IN_RANGE (ival, -128, 127))
23480 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23485 fatal_insn ("unknown insn mode", insn);
23490 /* Compute default value for "length_address" attribute. */
23492 ix86_attr_length_address_default (rtx insn)
23496 if (get_attr_type (insn) == TYPE_LEA)
23498 rtx set = PATTERN (insn), addr;
23500 if (GET_CODE (set) == PARALLEL)
23501 set = XVECEXP (set, 0, 0);
23503 gcc_assert (GET_CODE (set) == SET);
23505 addr = SET_SRC (set);
23506 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
23508 if (GET_CODE (addr) == ZERO_EXTEND)
23509 addr = XEXP (addr, 0);
23510 if (GET_CODE (addr) == SUBREG)
23511 addr = SUBREG_REG (addr);
23514 return memory_address_length (addr);
23517 extract_insn_cached (insn);
23518 for (i = recog_data.n_operands - 1; i >= 0; --i)
23519 if (MEM_P (recog_data.operand[i]))
23521 constrain_operands_cached (reload_completed);
23522 if (which_alternative != -1)
23524 const char *constraints = recog_data.constraints[i];
23525 int alt = which_alternative;
23527 while (*constraints == '=' || *constraints == '+')
23530 while (*constraints++ != ',')
23532 /* Skip ignored operands. */
23533 if (*constraints == 'X')
23536 return memory_address_length (XEXP (recog_data.operand[i], 0));
23541 /* Compute default value for "length_vex" attribute. It includes
23542 2 or 3 byte VEX prefix and 1 opcode byte. */
23545 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23549 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23550 byte VEX prefix. */
23551 if (!has_0f_opcode || has_vex_w)
23554 /* We can always use 2 byte VEX prefix in 32bit. */
23558 extract_insn_cached (insn);
23560 for (i = recog_data.n_operands - 1; i >= 0; --i)
23561 if (REG_P (recog_data.operand[i]))
23563 /* REX.W bit uses 3 byte VEX prefix. */
23564 if (GET_MODE (recog_data.operand[i]) == DImode
23565 && GENERAL_REG_P (recog_data.operand[i]))
23570 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23571 if (MEM_P (recog_data.operand[i])
23572 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23579 /* Return the maximum number of instructions a cpu can issue. */
23582 ix86_issue_rate (void)
23586 case PROCESSOR_PENTIUM:
23587 case PROCESSOR_ATOM:
23591 case PROCESSOR_PENTIUMPRO:
23592 case PROCESSOR_PENTIUM4:
23593 case PROCESSOR_CORE2_32:
23594 case PROCESSOR_CORE2_64:
23595 case PROCESSOR_COREI7_32:
23596 case PROCESSOR_COREI7_64:
23597 case PROCESSOR_ATHLON:
23599 case PROCESSOR_AMDFAM10:
23600 case PROCESSOR_NOCONA:
23601 case PROCESSOR_GENERIC32:
23602 case PROCESSOR_GENERIC64:
23603 case PROCESSOR_BDVER1:
23604 case PROCESSOR_BDVER2:
23605 case PROCESSOR_BTVER1:
23613 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23614 by DEP_INSN and nothing set by DEP_INSN. */
23617 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23621 /* Simplify the test for uninteresting insns. */
23622 if (insn_type != TYPE_SETCC
23623 && insn_type != TYPE_ICMOV
23624 && insn_type != TYPE_FCMOV
23625 && insn_type != TYPE_IBR)
23628 if ((set = single_set (dep_insn)) != 0)
23630 set = SET_DEST (set);
23633 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23634 && XVECLEN (PATTERN (dep_insn), 0) == 2
23635 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23636 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23638 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23639 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23644 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23647 /* This test is true if the dependent insn reads the flags but
23648 not any other potentially set register. */
23649 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23652 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23658 /* Return true iff USE_INSN has a memory address with operands set by
23662 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23665 extract_insn_cached (use_insn);
23666 for (i = recog_data.n_operands - 1; i >= 0; --i)
23667 if (MEM_P (recog_data.operand[i]))
23669 rtx addr = XEXP (recog_data.operand[i], 0);
23670 return modified_in_p (addr, set_insn) != 0;
23676 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23678 enum attr_type insn_type, dep_insn_type;
23679 enum attr_memory memory;
23681 int dep_insn_code_number;
23683 /* Anti and output dependencies have zero cost on all CPUs. */
23684 if (REG_NOTE_KIND (link) != 0)
23687 dep_insn_code_number = recog_memoized (dep_insn);
23689 /* If we can't recognize the insns, we can't really do anything. */
23690 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23693 insn_type = get_attr_type (insn);
23694 dep_insn_type = get_attr_type (dep_insn);
23698 case PROCESSOR_PENTIUM:
23699 /* Address Generation Interlock adds a cycle of latency. */
23700 if (insn_type == TYPE_LEA)
23702 rtx addr = PATTERN (insn);
23704 if (GET_CODE (addr) == PARALLEL)
23705 addr = XVECEXP (addr, 0, 0);
23707 gcc_assert (GET_CODE (addr) == SET);
23709 addr = SET_SRC (addr);
23710 if (modified_in_p (addr, dep_insn))
23713 else if (ix86_agi_dependent (dep_insn, insn))
23716 /* ??? Compares pair with jump/setcc. */
23717 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23720 /* Floating point stores require value to be ready one cycle earlier. */
23721 if (insn_type == TYPE_FMOV
23722 && get_attr_memory (insn) == MEMORY_STORE
23723 && !ix86_agi_dependent (dep_insn, insn))
23727 case PROCESSOR_PENTIUMPRO:
23728 memory = get_attr_memory (insn);
23730 /* INT->FP conversion is expensive. */
23731 if (get_attr_fp_int_src (dep_insn))
23734 /* There is one cycle extra latency between an FP op and a store. */
23735 if (insn_type == TYPE_FMOV
23736 && (set = single_set (dep_insn)) != NULL_RTX
23737 && (set2 = single_set (insn)) != NULL_RTX
23738 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23739 && MEM_P (SET_DEST (set2)))
23742 /* Show ability of reorder buffer to hide latency of load by executing
23743 in parallel with previous instruction in case
23744 previous instruction is not needed to compute the address. */
23745 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23746 && !ix86_agi_dependent (dep_insn, insn))
23748 /* Claim moves to take one cycle, as core can issue one load
23749 at time and the next load can start cycle later. */
23750 if (dep_insn_type == TYPE_IMOV
23751 || dep_insn_type == TYPE_FMOV)
23759 memory = get_attr_memory (insn);
23761 /* The esp dependency is resolved before the instruction is really
23763 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23764 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23767 /* INT->FP conversion is expensive. */
23768 if (get_attr_fp_int_src (dep_insn))
23771 /* Show ability of reorder buffer to hide latency of load by executing
23772 in parallel with previous instruction in case
23773 previous instruction is not needed to compute the address. */
23774 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23775 && !ix86_agi_dependent (dep_insn, insn))
23777 /* Claim moves to take one cycle, as core can issue one load
23778 at time and the next load can start cycle later. */
23779 if (dep_insn_type == TYPE_IMOV
23780 || dep_insn_type == TYPE_FMOV)
23789 case PROCESSOR_ATHLON:
23791 case PROCESSOR_AMDFAM10:
23792 case PROCESSOR_BDVER1:
23793 case PROCESSOR_BDVER2:
23794 case PROCESSOR_BTVER1:
23795 case PROCESSOR_ATOM:
23796 case PROCESSOR_GENERIC32:
23797 case PROCESSOR_GENERIC64:
23798 memory = get_attr_memory (insn);
23800 /* Show ability of reorder buffer to hide latency of load by executing
23801 in parallel with previous instruction in case
23802 previous instruction is not needed to compute the address. */
23803 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23804 && !ix86_agi_dependent (dep_insn, insn))
23806 enum attr_unit unit = get_attr_unit (insn);
23809 /* Because of the difference between the length of integer and
23810 floating unit pipeline preparation stages, the memory operands
23811 for floating point are cheaper.
23813 ??? For Athlon it the difference is most probably 2. */
23814 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23817 loadcost = TARGET_ATHLON ? 2 : 0;
23819 if (cost >= loadcost)
23832 /* How many alternative schedules to try. This should be as wide as the
23833 scheduling freedom in the DFA, but no wider. Making this value too
23834 large results extra work for the scheduler. */
23837 ia32_multipass_dfa_lookahead (void)
23841 case PROCESSOR_PENTIUM:
23844 case PROCESSOR_PENTIUMPRO:
23848 case PROCESSOR_CORE2_32:
23849 case PROCESSOR_CORE2_64:
23850 case PROCESSOR_COREI7_32:
23851 case PROCESSOR_COREI7_64:
23852 case PROCESSOR_ATOM:
23853 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23854 as many instructions can be executed on a cycle, i.e.,
23855 issue_rate. I wonder why tuning for many CPUs does not do this. */
23856 return ix86_issue_rate ();
23865 /* Model decoder of Core 2/i7.
23866 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23867 track the instruction fetch block boundaries and make sure that long
23868 (9+ bytes) instructions are assigned to D0. */
23870 /* Maximum length of an insn that can be handled by
23871 a secondary decoder unit. '8' for Core 2/i7. */
23872 static int core2i7_secondary_decoder_max_insn_size;
23874 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23875 '16' for Core 2/i7. */
23876 static int core2i7_ifetch_block_size;
23878 /* Maximum number of instructions decoder can handle per cycle.
23879 '6' for Core 2/i7. */
23880 static int core2i7_ifetch_block_max_insns;
23882 typedef struct ix86_first_cycle_multipass_data_ *
23883 ix86_first_cycle_multipass_data_t;
23884 typedef const struct ix86_first_cycle_multipass_data_ *
23885 const_ix86_first_cycle_multipass_data_t;
23887 /* A variable to store target state across calls to max_issue within
23889 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23890 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23892 /* Initialize DATA. */
23894 core2i7_first_cycle_multipass_init (void *_data)
23896 ix86_first_cycle_multipass_data_t data
23897 = (ix86_first_cycle_multipass_data_t) _data;
23899 data->ifetch_block_len = 0;
23900 data->ifetch_block_n_insns = 0;
23901 data->ready_try_change = NULL;
23902 data->ready_try_change_size = 0;
23905 /* Advancing the cycle; reset ifetch block counts. */
23907 core2i7_dfa_post_advance_cycle (void)
23909 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23911 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23913 data->ifetch_block_len = 0;
23914 data->ifetch_block_n_insns = 0;
23917 static int min_insn_size (rtx);
23919 /* Filter out insns from ready_try that the core will not be able to issue
23920 on current cycle due to decoder. */
23922 core2i7_first_cycle_multipass_filter_ready_try
23923 (const_ix86_first_cycle_multipass_data_t data,
23924 char *ready_try, int n_ready, bool first_cycle_insn_p)
23931 if (ready_try[n_ready])
23934 insn = get_ready_element (n_ready);
23935 insn_size = min_insn_size (insn);
23937 if (/* If this is a too long an insn for a secondary decoder ... */
23938 (!first_cycle_insn_p
23939 && insn_size > core2i7_secondary_decoder_max_insn_size)
23940 /* ... or it would not fit into the ifetch block ... */
23941 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23942 /* ... or the decoder is full already ... */
23943 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23944 /* ... mask the insn out. */
23946 ready_try[n_ready] = 1;
23948 if (data->ready_try_change)
23949 SET_BIT (data->ready_try_change, n_ready);
23954 /* Prepare for a new round of multipass lookahead scheduling. */
23956 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
23957 bool first_cycle_insn_p)
23959 ix86_first_cycle_multipass_data_t data
23960 = (ix86_first_cycle_multipass_data_t) _data;
23961 const_ix86_first_cycle_multipass_data_t prev_data
23962 = ix86_first_cycle_multipass_data;
23964 /* Restore the state from the end of the previous round. */
23965 data->ifetch_block_len = prev_data->ifetch_block_len;
23966 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
23968 /* Filter instructions that cannot be issued on current cycle due to
23969 decoder restrictions. */
23970 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23971 first_cycle_insn_p);
23974 /* INSN is being issued in current solution. Account for its impact on
23975 the decoder model. */
23977 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
23978 rtx insn, const void *_prev_data)
23980 ix86_first_cycle_multipass_data_t data
23981 = (ix86_first_cycle_multipass_data_t) _data;
23982 const_ix86_first_cycle_multipass_data_t prev_data
23983 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23985 int insn_size = min_insn_size (insn);
23987 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23988 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23989 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23990 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23992 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23993 if (!data->ready_try_change)
23995 data->ready_try_change = sbitmap_alloc (n_ready);
23996 data->ready_try_change_size = n_ready;
23998 else if (data->ready_try_change_size < n_ready)
24000 data->ready_try_change = sbitmap_resize (data->ready_try_change,
24002 data->ready_try_change_size = n_ready;
24004 sbitmap_zero (data->ready_try_change);
24006 /* Filter out insns from ready_try that the core will not be able to issue
24007 on current cycle due to decoder. */
24008 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24012 /* Revert the effect on ready_try. */
24014 core2i7_first_cycle_multipass_backtrack (const void *_data,
24016 int n_ready ATTRIBUTE_UNUSED)
24018 const_ix86_first_cycle_multipass_data_t data
24019 = (const_ix86_first_cycle_multipass_data_t) _data;
24020 unsigned int i = 0;
24021 sbitmap_iterator sbi;
24023 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
24024 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24030 /* Save the result of multipass lookahead scheduling for the next round. */
24032 core2i7_first_cycle_multipass_end (const void *_data)
24034 const_ix86_first_cycle_multipass_data_t data
24035 = (const_ix86_first_cycle_multipass_data_t) _data;
24036 ix86_first_cycle_multipass_data_t next_data
24037 = ix86_first_cycle_multipass_data;
24041 next_data->ifetch_block_len = data->ifetch_block_len;
24042 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24046 /* Deallocate target data. */
24048 core2i7_first_cycle_multipass_fini (void *_data)
24050 ix86_first_cycle_multipass_data_t data
24051 = (ix86_first_cycle_multipass_data_t) _data;
24053 if (data->ready_try_change)
24055 sbitmap_free (data->ready_try_change);
24056 data->ready_try_change = NULL;
24057 data->ready_try_change_size = 0;
24061 /* Prepare for scheduling pass. */
24063 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24064 int verbose ATTRIBUTE_UNUSED,
24065 int max_uid ATTRIBUTE_UNUSED)
24067 /* Install scheduling hooks for current CPU. Some of these hooks are used
24068 in time-critical parts of the scheduler, so we only set them up when
24069 they are actually used. */
24072 case PROCESSOR_CORE2_32:
24073 case PROCESSOR_CORE2_64:
24074 case PROCESSOR_COREI7_32:
24075 case PROCESSOR_COREI7_64:
24076 targetm.sched.dfa_post_advance_cycle
24077 = core2i7_dfa_post_advance_cycle;
24078 targetm.sched.first_cycle_multipass_init
24079 = core2i7_first_cycle_multipass_init;
24080 targetm.sched.first_cycle_multipass_begin
24081 = core2i7_first_cycle_multipass_begin;
24082 targetm.sched.first_cycle_multipass_issue
24083 = core2i7_first_cycle_multipass_issue;
24084 targetm.sched.first_cycle_multipass_backtrack
24085 = core2i7_first_cycle_multipass_backtrack;
24086 targetm.sched.first_cycle_multipass_end
24087 = core2i7_first_cycle_multipass_end;
24088 targetm.sched.first_cycle_multipass_fini
24089 = core2i7_first_cycle_multipass_fini;
24091 /* Set decoder parameters. */
24092 core2i7_secondary_decoder_max_insn_size = 8;
24093 core2i7_ifetch_block_size = 16;
24094 core2i7_ifetch_block_max_insns = 6;
24098 targetm.sched.dfa_post_advance_cycle = NULL;
24099 targetm.sched.first_cycle_multipass_init = NULL;
24100 targetm.sched.first_cycle_multipass_begin = NULL;
24101 targetm.sched.first_cycle_multipass_issue = NULL;
24102 targetm.sched.first_cycle_multipass_backtrack = NULL;
24103 targetm.sched.first_cycle_multipass_end = NULL;
24104 targetm.sched.first_cycle_multipass_fini = NULL;
24110 /* Compute the alignment given to a constant that is being placed in memory.
24111 EXP is the constant and ALIGN is the alignment that the object would
24113 The value of this function is used instead of that alignment to align
24117 ix86_constant_alignment (tree exp, int align)
24119 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24120 || TREE_CODE (exp) == INTEGER_CST)
24122 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24124 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24127 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24128 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24129 return BITS_PER_WORD;
24134 /* Compute the alignment for a static variable.
24135 TYPE is the data type, and ALIGN is the alignment that
24136 the object would ordinarily have. The value of this function is used
24137 instead of that alignment to align the object. */
24140 ix86_data_alignment (tree type, int align)
24142 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24144 if (AGGREGATE_TYPE_P (type)
24145 && TYPE_SIZE (type)
24146 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24147 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24148 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24149 && align < max_align)
24152 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24153 to 16byte boundary. */
24156 if (AGGREGATE_TYPE_P (type)
24157 && TYPE_SIZE (type)
24158 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24159 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24160 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24164 if (TREE_CODE (type) == ARRAY_TYPE)
24166 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24168 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24171 else if (TREE_CODE (type) == COMPLEX_TYPE)
24174 if (TYPE_MODE (type) == DCmode && align < 64)
24176 if ((TYPE_MODE (type) == XCmode
24177 || TYPE_MODE (type) == TCmode) && align < 128)
24180 else if ((TREE_CODE (type) == RECORD_TYPE
24181 || TREE_CODE (type) == UNION_TYPE
24182 || TREE_CODE (type) == QUAL_UNION_TYPE)
24183 && TYPE_FIELDS (type))
24185 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24187 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24190 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24191 || TREE_CODE (type) == INTEGER_TYPE)
24193 if (TYPE_MODE (type) == DFmode && align < 64)
24195 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24202 /* Compute the alignment for a local variable or a stack slot. EXP is
24203 the data type or decl itself, MODE is the widest mode available and
24204 ALIGN is the alignment that the object would ordinarily have. The
24205 value of this macro is used instead of that alignment to align the
24209 ix86_local_alignment (tree exp, enum machine_mode mode,
24210 unsigned int align)
24214 if (exp && DECL_P (exp))
24216 type = TREE_TYPE (exp);
24225 /* Don't do dynamic stack realignment for long long objects with
24226 -mpreferred-stack-boundary=2. */
24229 && ix86_preferred_stack_boundary < 64
24230 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24231 && (!type || !TYPE_USER_ALIGN (type))
24232 && (!decl || !DECL_USER_ALIGN (decl)))
24235 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24236 register in MODE. We will return the largest alignment of XF
24240 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24241 align = GET_MODE_ALIGNMENT (DFmode);
24245 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24246 to 16byte boundary. Exact wording is:
24248 An array uses the same alignment as its elements, except that a local or
24249 global array variable of length at least 16 bytes or
24250 a C99 variable-length array variable always has alignment of at least 16 bytes.
24252 This was added to allow use of aligned SSE instructions at arrays. This
24253 rule is meant for static storage (where compiler can not do the analysis
24254 by itself). We follow it for automatic variables only when convenient.
24255 We fully control everything in the function compiled and functions from
24256 other unit can not rely on the alignment.
24258 Exclude va_list type. It is the common case of local array where
24259 we can not benefit from the alignment. */
24260 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24263 if (AGGREGATE_TYPE_P (type)
24264 && (va_list_type_node == NULL_TREE
24265 || (TYPE_MAIN_VARIANT (type)
24266 != TYPE_MAIN_VARIANT (va_list_type_node)))
24267 && TYPE_SIZE (type)
24268 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24269 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24270 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24273 if (TREE_CODE (type) == ARRAY_TYPE)
24275 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24277 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24280 else if (TREE_CODE (type) == COMPLEX_TYPE)
24282 if (TYPE_MODE (type) == DCmode && align < 64)
24284 if ((TYPE_MODE (type) == XCmode
24285 || TYPE_MODE (type) == TCmode) && align < 128)
24288 else if ((TREE_CODE (type) == RECORD_TYPE
24289 || TREE_CODE (type) == UNION_TYPE
24290 || TREE_CODE (type) == QUAL_UNION_TYPE)
24291 && TYPE_FIELDS (type))
24293 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24295 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24298 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24299 || TREE_CODE (type) == INTEGER_TYPE)
24302 if (TYPE_MODE (type) == DFmode && align < 64)
24304 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24310 /* Compute the minimum required alignment for dynamic stack realignment
24311 purposes for a local variable, parameter or a stack slot. EXP is
24312 the data type or decl itself, MODE is its mode and ALIGN is the
24313 alignment that the object would ordinarily have. */
24316 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24317 unsigned int align)
24321 if (exp && DECL_P (exp))
24323 type = TREE_TYPE (exp);
24332 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24335 /* Don't do dynamic stack realignment for long long objects with
24336 -mpreferred-stack-boundary=2. */
24337 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24338 && (!type || !TYPE_USER_ALIGN (type))
24339 && (!decl || !DECL_USER_ALIGN (decl)))
24345 /* Find a location for the static chain incoming to a nested function.
24346 This is a register, unless all free registers are used by arguments. */
24349 ix86_static_chain (const_tree fndecl, bool incoming_p)
24353 if (!DECL_STATIC_CHAIN (fndecl))
24358 /* We always use R10 in 64-bit mode. */
24366 /* By default in 32-bit mode we use ECX to pass the static chain. */
24369 fntype = TREE_TYPE (fndecl);
24370 ccvt = ix86_get_callcvt (fntype);
24371 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
24373 /* Fastcall functions use ecx/edx for arguments, which leaves
24374 us with EAX for the static chain.
24375 Thiscall functions use ecx for arguments, which also
24376 leaves us with EAX for the static chain. */
24379 else if (ix86_function_regparm (fntype, fndecl) == 3)
24381 /* For regparm 3, we have no free call-clobbered registers in
24382 which to store the static chain. In order to implement this,
24383 we have the trampoline push the static chain to the stack.
24384 However, we can't push a value below the return address when
24385 we call the nested function directly, so we have to use an
24386 alternate entry point. For this we use ESI, and have the
24387 alternate entry point push ESI, so that things appear the
24388 same once we're executing the nested function. */
24391 if (fndecl == current_function_decl)
24392 ix86_static_chain_on_stack = true;
24393 return gen_frame_mem (SImode,
24394 plus_constant (arg_pointer_rtx, -8));
24400 return gen_rtx_REG (Pmode, regno);
24403 /* Emit RTL insns to initialize the variable parts of a trampoline.
24404 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24405 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24406 to be passed to the target function. */
24409 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24415 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24421 /* Load the function address to r11. Try to load address using
24422 the shorter movl instead of movabs. We may want to support
24423 movq for kernel mode, but kernel does not use trampolines at
24425 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24427 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24429 mem = adjust_address (m_tramp, HImode, offset);
24430 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24432 mem = adjust_address (m_tramp, SImode, offset + 2);
24433 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24438 mem = adjust_address (m_tramp, HImode, offset);
24439 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24441 mem = adjust_address (m_tramp, DImode, offset + 2);
24442 emit_move_insn (mem, fnaddr);
24446 /* Load static chain using movabs to r10. Use the
24447 shorter movl instead of movabs for x32. */
24459 mem = adjust_address (m_tramp, HImode, offset);
24460 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24462 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24463 emit_move_insn (mem, chain_value);
24466 /* Jump to r11; the last (unused) byte is a nop, only there to
24467 pad the write out to a single 32-bit store. */
24468 mem = adjust_address (m_tramp, SImode, offset);
24469 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24476 /* Depending on the static chain location, either load a register
24477 with a constant, or push the constant to the stack. All of the
24478 instructions are the same size. */
24479 chain = ix86_static_chain (fndecl, true);
24482 switch (REGNO (chain))
24485 opcode = 0xb8; break;
24487 opcode = 0xb9; break;
24489 gcc_unreachable ();
24495 mem = adjust_address (m_tramp, QImode, offset);
24496 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24498 mem = adjust_address (m_tramp, SImode, offset + 1);
24499 emit_move_insn (mem, chain_value);
24502 mem = adjust_address (m_tramp, QImode, offset);
24503 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24505 mem = adjust_address (m_tramp, SImode, offset + 1);
24507 /* Compute offset from the end of the jmp to the target function.
24508 In the case in which the trampoline stores the static chain on
24509 the stack, we need to skip the first insn which pushes the
24510 (call-saved) register static chain; this push is 1 byte. */
24512 disp = expand_binop (SImode, sub_optab, fnaddr,
24513 plus_constant (XEXP (m_tramp, 0),
24514 offset - (MEM_P (chain) ? 1 : 0)),
24515 NULL_RTX, 1, OPTAB_DIRECT);
24516 emit_move_insn (mem, disp);
24519 gcc_assert (offset <= TRAMPOLINE_SIZE);
24521 #ifdef HAVE_ENABLE_EXECUTE_STACK
24522 #ifdef CHECK_EXECUTE_STACK_ENABLED
24523 if (CHECK_EXECUTE_STACK_ENABLED)
24525 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24526 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24530 /* The following file contains several enumerations and data structures
24531 built from the definitions in i386-builtin-types.def. */
24533 #include "i386-builtin-types.inc"
24535 /* Table for the ix86 builtin non-function types. */
24536 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24538 /* Retrieve an element from the above table, building some of
24539 the types lazily. */
24542 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24544 unsigned int index;
24547 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24549 type = ix86_builtin_type_tab[(int) tcode];
24553 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24554 if (tcode <= IX86_BT_LAST_VECT)
24556 enum machine_mode mode;
24558 index = tcode - IX86_BT_LAST_PRIM - 1;
24559 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24560 mode = ix86_builtin_type_vect_mode[index];
24562 type = build_vector_type_for_mode (itype, mode);
24568 index = tcode - IX86_BT_LAST_VECT - 1;
24569 if (tcode <= IX86_BT_LAST_PTR)
24570 quals = TYPE_UNQUALIFIED;
24572 quals = TYPE_QUAL_CONST;
24574 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24575 if (quals != TYPE_UNQUALIFIED)
24576 itype = build_qualified_type (itype, quals);
24578 type = build_pointer_type (itype);
24581 ix86_builtin_type_tab[(int) tcode] = type;
24585 /* Table for the ix86 builtin function types. */
24586 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24588 /* Retrieve an element from the above table, building some of
24589 the types lazily. */
24592 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24596 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24598 type = ix86_builtin_func_type_tab[(int) tcode];
24602 if (tcode <= IX86_BT_LAST_FUNC)
24604 unsigned start = ix86_builtin_func_start[(int) tcode];
24605 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24606 tree rtype, atype, args = void_list_node;
24609 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24610 for (i = after - 1; i > start; --i)
24612 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24613 args = tree_cons (NULL, atype, args);
24616 type = build_function_type (rtype, args);
24620 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24621 enum ix86_builtin_func_type icode;
24623 icode = ix86_builtin_func_alias_base[index];
24624 type = ix86_get_builtin_func_type (icode);
24627 ix86_builtin_func_type_tab[(int) tcode] = type;
24632 /* Codes for all the SSE/MMX builtins. */
24635 IX86_BUILTIN_ADDPS,
24636 IX86_BUILTIN_ADDSS,
24637 IX86_BUILTIN_DIVPS,
24638 IX86_BUILTIN_DIVSS,
24639 IX86_BUILTIN_MULPS,
24640 IX86_BUILTIN_MULSS,
24641 IX86_BUILTIN_SUBPS,
24642 IX86_BUILTIN_SUBSS,
24644 IX86_BUILTIN_CMPEQPS,
24645 IX86_BUILTIN_CMPLTPS,
24646 IX86_BUILTIN_CMPLEPS,
24647 IX86_BUILTIN_CMPGTPS,
24648 IX86_BUILTIN_CMPGEPS,
24649 IX86_BUILTIN_CMPNEQPS,
24650 IX86_BUILTIN_CMPNLTPS,
24651 IX86_BUILTIN_CMPNLEPS,
24652 IX86_BUILTIN_CMPNGTPS,
24653 IX86_BUILTIN_CMPNGEPS,
24654 IX86_BUILTIN_CMPORDPS,
24655 IX86_BUILTIN_CMPUNORDPS,
24656 IX86_BUILTIN_CMPEQSS,
24657 IX86_BUILTIN_CMPLTSS,
24658 IX86_BUILTIN_CMPLESS,
24659 IX86_BUILTIN_CMPNEQSS,
24660 IX86_BUILTIN_CMPNLTSS,
24661 IX86_BUILTIN_CMPNLESS,
24662 IX86_BUILTIN_CMPNGTSS,
24663 IX86_BUILTIN_CMPNGESS,
24664 IX86_BUILTIN_CMPORDSS,
24665 IX86_BUILTIN_CMPUNORDSS,
24667 IX86_BUILTIN_COMIEQSS,
24668 IX86_BUILTIN_COMILTSS,
24669 IX86_BUILTIN_COMILESS,
24670 IX86_BUILTIN_COMIGTSS,
24671 IX86_BUILTIN_COMIGESS,
24672 IX86_BUILTIN_COMINEQSS,
24673 IX86_BUILTIN_UCOMIEQSS,
24674 IX86_BUILTIN_UCOMILTSS,
24675 IX86_BUILTIN_UCOMILESS,
24676 IX86_BUILTIN_UCOMIGTSS,
24677 IX86_BUILTIN_UCOMIGESS,
24678 IX86_BUILTIN_UCOMINEQSS,
24680 IX86_BUILTIN_CVTPI2PS,
24681 IX86_BUILTIN_CVTPS2PI,
24682 IX86_BUILTIN_CVTSI2SS,
24683 IX86_BUILTIN_CVTSI642SS,
24684 IX86_BUILTIN_CVTSS2SI,
24685 IX86_BUILTIN_CVTSS2SI64,
24686 IX86_BUILTIN_CVTTPS2PI,
24687 IX86_BUILTIN_CVTTSS2SI,
24688 IX86_BUILTIN_CVTTSS2SI64,
24690 IX86_BUILTIN_MAXPS,
24691 IX86_BUILTIN_MAXSS,
24692 IX86_BUILTIN_MINPS,
24693 IX86_BUILTIN_MINSS,
24695 IX86_BUILTIN_LOADUPS,
24696 IX86_BUILTIN_STOREUPS,
24697 IX86_BUILTIN_MOVSS,
24699 IX86_BUILTIN_MOVHLPS,
24700 IX86_BUILTIN_MOVLHPS,
24701 IX86_BUILTIN_LOADHPS,
24702 IX86_BUILTIN_LOADLPS,
24703 IX86_BUILTIN_STOREHPS,
24704 IX86_BUILTIN_STORELPS,
24706 IX86_BUILTIN_MASKMOVQ,
24707 IX86_BUILTIN_MOVMSKPS,
24708 IX86_BUILTIN_PMOVMSKB,
24710 IX86_BUILTIN_MOVNTPS,
24711 IX86_BUILTIN_MOVNTQ,
24713 IX86_BUILTIN_LOADDQU,
24714 IX86_BUILTIN_STOREDQU,
24716 IX86_BUILTIN_PACKSSWB,
24717 IX86_BUILTIN_PACKSSDW,
24718 IX86_BUILTIN_PACKUSWB,
24720 IX86_BUILTIN_PADDB,
24721 IX86_BUILTIN_PADDW,
24722 IX86_BUILTIN_PADDD,
24723 IX86_BUILTIN_PADDQ,
24724 IX86_BUILTIN_PADDSB,
24725 IX86_BUILTIN_PADDSW,
24726 IX86_BUILTIN_PADDUSB,
24727 IX86_BUILTIN_PADDUSW,
24728 IX86_BUILTIN_PSUBB,
24729 IX86_BUILTIN_PSUBW,
24730 IX86_BUILTIN_PSUBD,
24731 IX86_BUILTIN_PSUBQ,
24732 IX86_BUILTIN_PSUBSB,
24733 IX86_BUILTIN_PSUBSW,
24734 IX86_BUILTIN_PSUBUSB,
24735 IX86_BUILTIN_PSUBUSW,
24738 IX86_BUILTIN_PANDN,
24742 IX86_BUILTIN_PAVGB,
24743 IX86_BUILTIN_PAVGW,
24745 IX86_BUILTIN_PCMPEQB,
24746 IX86_BUILTIN_PCMPEQW,
24747 IX86_BUILTIN_PCMPEQD,
24748 IX86_BUILTIN_PCMPGTB,
24749 IX86_BUILTIN_PCMPGTW,
24750 IX86_BUILTIN_PCMPGTD,
24752 IX86_BUILTIN_PMADDWD,
24754 IX86_BUILTIN_PMAXSW,
24755 IX86_BUILTIN_PMAXUB,
24756 IX86_BUILTIN_PMINSW,
24757 IX86_BUILTIN_PMINUB,
24759 IX86_BUILTIN_PMULHUW,
24760 IX86_BUILTIN_PMULHW,
24761 IX86_BUILTIN_PMULLW,
24763 IX86_BUILTIN_PSADBW,
24764 IX86_BUILTIN_PSHUFW,
24766 IX86_BUILTIN_PSLLW,
24767 IX86_BUILTIN_PSLLD,
24768 IX86_BUILTIN_PSLLQ,
24769 IX86_BUILTIN_PSRAW,
24770 IX86_BUILTIN_PSRAD,
24771 IX86_BUILTIN_PSRLW,
24772 IX86_BUILTIN_PSRLD,
24773 IX86_BUILTIN_PSRLQ,
24774 IX86_BUILTIN_PSLLWI,
24775 IX86_BUILTIN_PSLLDI,
24776 IX86_BUILTIN_PSLLQI,
24777 IX86_BUILTIN_PSRAWI,
24778 IX86_BUILTIN_PSRADI,
24779 IX86_BUILTIN_PSRLWI,
24780 IX86_BUILTIN_PSRLDI,
24781 IX86_BUILTIN_PSRLQI,
24783 IX86_BUILTIN_PUNPCKHBW,
24784 IX86_BUILTIN_PUNPCKHWD,
24785 IX86_BUILTIN_PUNPCKHDQ,
24786 IX86_BUILTIN_PUNPCKLBW,
24787 IX86_BUILTIN_PUNPCKLWD,
24788 IX86_BUILTIN_PUNPCKLDQ,
24790 IX86_BUILTIN_SHUFPS,
24792 IX86_BUILTIN_RCPPS,
24793 IX86_BUILTIN_RCPSS,
24794 IX86_BUILTIN_RSQRTPS,
24795 IX86_BUILTIN_RSQRTPS_NR,
24796 IX86_BUILTIN_RSQRTSS,
24797 IX86_BUILTIN_RSQRTF,
24798 IX86_BUILTIN_SQRTPS,
24799 IX86_BUILTIN_SQRTPS_NR,
24800 IX86_BUILTIN_SQRTSS,
24802 IX86_BUILTIN_UNPCKHPS,
24803 IX86_BUILTIN_UNPCKLPS,
24805 IX86_BUILTIN_ANDPS,
24806 IX86_BUILTIN_ANDNPS,
24808 IX86_BUILTIN_XORPS,
24811 IX86_BUILTIN_LDMXCSR,
24812 IX86_BUILTIN_STMXCSR,
24813 IX86_BUILTIN_SFENCE,
24815 /* 3DNow! Original */
24816 IX86_BUILTIN_FEMMS,
24817 IX86_BUILTIN_PAVGUSB,
24818 IX86_BUILTIN_PF2ID,
24819 IX86_BUILTIN_PFACC,
24820 IX86_BUILTIN_PFADD,
24821 IX86_BUILTIN_PFCMPEQ,
24822 IX86_BUILTIN_PFCMPGE,
24823 IX86_BUILTIN_PFCMPGT,
24824 IX86_BUILTIN_PFMAX,
24825 IX86_BUILTIN_PFMIN,
24826 IX86_BUILTIN_PFMUL,
24827 IX86_BUILTIN_PFRCP,
24828 IX86_BUILTIN_PFRCPIT1,
24829 IX86_BUILTIN_PFRCPIT2,
24830 IX86_BUILTIN_PFRSQIT1,
24831 IX86_BUILTIN_PFRSQRT,
24832 IX86_BUILTIN_PFSUB,
24833 IX86_BUILTIN_PFSUBR,
24834 IX86_BUILTIN_PI2FD,
24835 IX86_BUILTIN_PMULHRW,
24837 /* 3DNow! Athlon Extensions */
24838 IX86_BUILTIN_PF2IW,
24839 IX86_BUILTIN_PFNACC,
24840 IX86_BUILTIN_PFPNACC,
24841 IX86_BUILTIN_PI2FW,
24842 IX86_BUILTIN_PSWAPDSI,
24843 IX86_BUILTIN_PSWAPDSF,
24846 IX86_BUILTIN_ADDPD,
24847 IX86_BUILTIN_ADDSD,
24848 IX86_BUILTIN_DIVPD,
24849 IX86_BUILTIN_DIVSD,
24850 IX86_BUILTIN_MULPD,
24851 IX86_BUILTIN_MULSD,
24852 IX86_BUILTIN_SUBPD,
24853 IX86_BUILTIN_SUBSD,
24855 IX86_BUILTIN_CMPEQPD,
24856 IX86_BUILTIN_CMPLTPD,
24857 IX86_BUILTIN_CMPLEPD,
24858 IX86_BUILTIN_CMPGTPD,
24859 IX86_BUILTIN_CMPGEPD,
24860 IX86_BUILTIN_CMPNEQPD,
24861 IX86_BUILTIN_CMPNLTPD,
24862 IX86_BUILTIN_CMPNLEPD,
24863 IX86_BUILTIN_CMPNGTPD,
24864 IX86_BUILTIN_CMPNGEPD,
24865 IX86_BUILTIN_CMPORDPD,
24866 IX86_BUILTIN_CMPUNORDPD,
24867 IX86_BUILTIN_CMPEQSD,
24868 IX86_BUILTIN_CMPLTSD,
24869 IX86_BUILTIN_CMPLESD,
24870 IX86_BUILTIN_CMPNEQSD,
24871 IX86_BUILTIN_CMPNLTSD,
24872 IX86_BUILTIN_CMPNLESD,
24873 IX86_BUILTIN_CMPORDSD,
24874 IX86_BUILTIN_CMPUNORDSD,
24876 IX86_BUILTIN_COMIEQSD,
24877 IX86_BUILTIN_COMILTSD,
24878 IX86_BUILTIN_COMILESD,
24879 IX86_BUILTIN_COMIGTSD,
24880 IX86_BUILTIN_COMIGESD,
24881 IX86_BUILTIN_COMINEQSD,
24882 IX86_BUILTIN_UCOMIEQSD,
24883 IX86_BUILTIN_UCOMILTSD,
24884 IX86_BUILTIN_UCOMILESD,
24885 IX86_BUILTIN_UCOMIGTSD,
24886 IX86_BUILTIN_UCOMIGESD,
24887 IX86_BUILTIN_UCOMINEQSD,
24889 IX86_BUILTIN_MAXPD,
24890 IX86_BUILTIN_MAXSD,
24891 IX86_BUILTIN_MINPD,
24892 IX86_BUILTIN_MINSD,
24894 IX86_BUILTIN_ANDPD,
24895 IX86_BUILTIN_ANDNPD,
24897 IX86_BUILTIN_XORPD,
24899 IX86_BUILTIN_SQRTPD,
24900 IX86_BUILTIN_SQRTSD,
24902 IX86_BUILTIN_UNPCKHPD,
24903 IX86_BUILTIN_UNPCKLPD,
24905 IX86_BUILTIN_SHUFPD,
24907 IX86_BUILTIN_LOADUPD,
24908 IX86_BUILTIN_STOREUPD,
24909 IX86_BUILTIN_MOVSD,
24911 IX86_BUILTIN_LOADHPD,
24912 IX86_BUILTIN_LOADLPD,
24914 IX86_BUILTIN_CVTDQ2PD,
24915 IX86_BUILTIN_CVTDQ2PS,
24917 IX86_BUILTIN_CVTPD2DQ,
24918 IX86_BUILTIN_CVTPD2PI,
24919 IX86_BUILTIN_CVTPD2PS,
24920 IX86_BUILTIN_CVTTPD2DQ,
24921 IX86_BUILTIN_CVTTPD2PI,
24923 IX86_BUILTIN_CVTPI2PD,
24924 IX86_BUILTIN_CVTSI2SD,
24925 IX86_BUILTIN_CVTSI642SD,
24927 IX86_BUILTIN_CVTSD2SI,
24928 IX86_BUILTIN_CVTSD2SI64,
24929 IX86_BUILTIN_CVTSD2SS,
24930 IX86_BUILTIN_CVTSS2SD,
24931 IX86_BUILTIN_CVTTSD2SI,
24932 IX86_BUILTIN_CVTTSD2SI64,
24934 IX86_BUILTIN_CVTPS2DQ,
24935 IX86_BUILTIN_CVTPS2PD,
24936 IX86_BUILTIN_CVTTPS2DQ,
24938 IX86_BUILTIN_MOVNTI,
24939 IX86_BUILTIN_MOVNTI64,
24940 IX86_BUILTIN_MOVNTPD,
24941 IX86_BUILTIN_MOVNTDQ,
24943 IX86_BUILTIN_MOVQ128,
24946 IX86_BUILTIN_MASKMOVDQU,
24947 IX86_BUILTIN_MOVMSKPD,
24948 IX86_BUILTIN_PMOVMSKB128,
24950 IX86_BUILTIN_PACKSSWB128,
24951 IX86_BUILTIN_PACKSSDW128,
24952 IX86_BUILTIN_PACKUSWB128,
24954 IX86_BUILTIN_PADDB128,
24955 IX86_BUILTIN_PADDW128,
24956 IX86_BUILTIN_PADDD128,
24957 IX86_BUILTIN_PADDQ128,
24958 IX86_BUILTIN_PADDSB128,
24959 IX86_BUILTIN_PADDSW128,
24960 IX86_BUILTIN_PADDUSB128,
24961 IX86_BUILTIN_PADDUSW128,
24962 IX86_BUILTIN_PSUBB128,
24963 IX86_BUILTIN_PSUBW128,
24964 IX86_BUILTIN_PSUBD128,
24965 IX86_BUILTIN_PSUBQ128,
24966 IX86_BUILTIN_PSUBSB128,
24967 IX86_BUILTIN_PSUBSW128,
24968 IX86_BUILTIN_PSUBUSB128,
24969 IX86_BUILTIN_PSUBUSW128,
24971 IX86_BUILTIN_PAND128,
24972 IX86_BUILTIN_PANDN128,
24973 IX86_BUILTIN_POR128,
24974 IX86_BUILTIN_PXOR128,
24976 IX86_BUILTIN_PAVGB128,
24977 IX86_BUILTIN_PAVGW128,
24979 IX86_BUILTIN_PCMPEQB128,
24980 IX86_BUILTIN_PCMPEQW128,
24981 IX86_BUILTIN_PCMPEQD128,
24982 IX86_BUILTIN_PCMPGTB128,
24983 IX86_BUILTIN_PCMPGTW128,
24984 IX86_BUILTIN_PCMPGTD128,
24986 IX86_BUILTIN_PMADDWD128,
24988 IX86_BUILTIN_PMAXSW128,
24989 IX86_BUILTIN_PMAXUB128,
24990 IX86_BUILTIN_PMINSW128,
24991 IX86_BUILTIN_PMINUB128,
24993 IX86_BUILTIN_PMULUDQ,
24994 IX86_BUILTIN_PMULUDQ128,
24995 IX86_BUILTIN_PMULHUW128,
24996 IX86_BUILTIN_PMULHW128,
24997 IX86_BUILTIN_PMULLW128,
24999 IX86_BUILTIN_PSADBW128,
25000 IX86_BUILTIN_PSHUFHW,
25001 IX86_BUILTIN_PSHUFLW,
25002 IX86_BUILTIN_PSHUFD,
25004 IX86_BUILTIN_PSLLDQI128,
25005 IX86_BUILTIN_PSLLWI128,
25006 IX86_BUILTIN_PSLLDI128,
25007 IX86_BUILTIN_PSLLQI128,
25008 IX86_BUILTIN_PSRAWI128,
25009 IX86_BUILTIN_PSRADI128,
25010 IX86_BUILTIN_PSRLDQI128,
25011 IX86_BUILTIN_PSRLWI128,
25012 IX86_BUILTIN_PSRLDI128,
25013 IX86_BUILTIN_PSRLQI128,
25015 IX86_BUILTIN_PSLLDQ128,
25016 IX86_BUILTIN_PSLLW128,
25017 IX86_BUILTIN_PSLLD128,
25018 IX86_BUILTIN_PSLLQ128,
25019 IX86_BUILTIN_PSRAW128,
25020 IX86_BUILTIN_PSRAD128,
25021 IX86_BUILTIN_PSRLW128,
25022 IX86_BUILTIN_PSRLD128,
25023 IX86_BUILTIN_PSRLQ128,
25025 IX86_BUILTIN_PUNPCKHBW128,
25026 IX86_BUILTIN_PUNPCKHWD128,
25027 IX86_BUILTIN_PUNPCKHDQ128,
25028 IX86_BUILTIN_PUNPCKHQDQ128,
25029 IX86_BUILTIN_PUNPCKLBW128,
25030 IX86_BUILTIN_PUNPCKLWD128,
25031 IX86_BUILTIN_PUNPCKLDQ128,
25032 IX86_BUILTIN_PUNPCKLQDQ128,
25034 IX86_BUILTIN_CLFLUSH,
25035 IX86_BUILTIN_MFENCE,
25036 IX86_BUILTIN_LFENCE,
25037 IX86_BUILTIN_PAUSE,
25039 IX86_BUILTIN_BSRSI,
25040 IX86_BUILTIN_BSRDI,
25041 IX86_BUILTIN_RDPMC,
25042 IX86_BUILTIN_RDTSC,
25043 IX86_BUILTIN_RDTSCP,
25044 IX86_BUILTIN_ROLQI,
25045 IX86_BUILTIN_ROLHI,
25046 IX86_BUILTIN_RORQI,
25047 IX86_BUILTIN_RORHI,
25050 IX86_BUILTIN_ADDSUBPS,
25051 IX86_BUILTIN_HADDPS,
25052 IX86_BUILTIN_HSUBPS,
25053 IX86_BUILTIN_MOVSHDUP,
25054 IX86_BUILTIN_MOVSLDUP,
25055 IX86_BUILTIN_ADDSUBPD,
25056 IX86_BUILTIN_HADDPD,
25057 IX86_BUILTIN_HSUBPD,
25058 IX86_BUILTIN_LDDQU,
25060 IX86_BUILTIN_MONITOR,
25061 IX86_BUILTIN_MWAIT,
25064 IX86_BUILTIN_PHADDW,
25065 IX86_BUILTIN_PHADDD,
25066 IX86_BUILTIN_PHADDSW,
25067 IX86_BUILTIN_PHSUBW,
25068 IX86_BUILTIN_PHSUBD,
25069 IX86_BUILTIN_PHSUBSW,
25070 IX86_BUILTIN_PMADDUBSW,
25071 IX86_BUILTIN_PMULHRSW,
25072 IX86_BUILTIN_PSHUFB,
25073 IX86_BUILTIN_PSIGNB,
25074 IX86_BUILTIN_PSIGNW,
25075 IX86_BUILTIN_PSIGND,
25076 IX86_BUILTIN_PALIGNR,
25077 IX86_BUILTIN_PABSB,
25078 IX86_BUILTIN_PABSW,
25079 IX86_BUILTIN_PABSD,
25081 IX86_BUILTIN_PHADDW128,
25082 IX86_BUILTIN_PHADDD128,
25083 IX86_BUILTIN_PHADDSW128,
25084 IX86_BUILTIN_PHSUBW128,
25085 IX86_BUILTIN_PHSUBD128,
25086 IX86_BUILTIN_PHSUBSW128,
25087 IX86_BUILTIN_PMADDUBSW128,
25088 IX86_BUILTIN_PMULHRSW128,
25089 IX86_BUILTIN_PSHUFB128,
25090 IX86_BUILTIN_PSIGNB128,
25091 IX86_BUILTIN_PSIGNW128,
25092 IX86_BUILTIN_PSIGND128,
25093 IX86_BUILTIN_PALIGNR128,
25094 IX86_BUILTIN_PABSB128,
25095 IX86_BUILTIN_PABSW128,
25096 IX86_BUILTIN_PABSD128,
25098 /* AMDFAM10 - SSE4A New Instructions. */
25099 IX86_BUILTIN_MOVNTSD,
25100 IX86_BUILTIN_MOVNTSS,
25101 IX86_BUILTIN_EXTRQI,
25102 IX86_BUILTIN_EXTRQ,
25103 IX86_BUILTIN_INSERTQI,
25104 IX86_BUILTIN_INSERTQ,
25107 IX86_BUILTIN_BLENDPD,
25108 IX86_BUILTIN_BLENDPS,
25109 IX86_BUILTIN_BLENDVPD,
25110 IX86_BUILTIN_BLENDVPS,
25111 IX86_BUILTIN_PBLENDVB128,
25112 IX86_BUILTIN_PBLENDW128,
25117 IX86_BUILTIN_INSERTPS128,
25119 IX86_BUILTIN_MOVNTDQA,
25120 IX86_BUILTIN_MPSADBW128,
25121 IX86_BUILTIN_PACKUSDW128,
25122 IX86_BUILTIN_PCMPEQQ,
25123 IX86_BUILTIN_PHMINPOSUW128,
25125 IX86_BUILTIN_PMAXSB128,
25126 IX86_BUILTIN_PMAXSD128,
25127 IX86_BUILTIN_PMAXUD128,
25128 IX86_BUILTIN_PMAXUW128,
25130 IX86_BUILTIN_PMINSB128,
25131 IX86_BUILTIN_PMINSD128,
25132 IX86_BUILTIN_PMINUD128,
25133 IX86_BUILTIN_PMINUW128,
25135 IX86_BUILTIN_PMOVSXBW128,
25136 IX86_BUILTIN_PMOVSXBD128,
25137 IX86_BUILTIN_PMOVSXBQ128,
25138 IX86_BUILTIN_PMOVSXWD128,
25139 IX86_BUILTIN_PMOVSXWQ128,
25140 IX86_BUILTIN_PMOVSXDQ128,
25142 IX86_BUILTIN_PMOVZXBW128,
25143 IX86_BUILTIN_PMOVZXBD128,
25144 IX86_BUILTIN_PMOVZXBQ128,
25145 IX86_BUILTIN_PMOVZXWD128,
25146 IX86_BUILTIN_PMOVZXWQ128,
25147 IX86_BUILTIN_PMOVZXDQ128,
25149 IX86_BUILTIN_PMULDQ128,
25150 IX86_BUILTIN_PMULLD128,
25152 IX86_BUILTIN_ROUNDSD,
25153 IX86_BUILTIN_ROUNDSS,
25155 IX86_BUILTIN_ROUNDPD,
25156 IX86_BUILTIN_ROUNDPS,
25158 IX86_BUILTIN_FLOORPD,
25159 IX86_BUILTIN_CEILPD,
25160 IX86_BUILTIN_TRUNCPD,
25161 IX86_BUILTIN_RINTPD,
25162 IX86_BUILTIN_ROUNDPD_AZ,
25164 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25165 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25166 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25168 IX86_BUILTIN_FLOORPS,
25169 IX86_BUILTIN_CEILPS,
25170 IX86_BUILTIN_TRUNCPS,
25171 IX86_BUILTIN_RINTPS,
25172 IX86_BUILTIN_ROUNDPS_AZ,
25174 IX86_BUILTIN_FLOORPS_SFIX,
25175 IX86_BUILTIN_CEILPS_SFIX,
25176 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25178 IX86_BUILTIN_PTESTZ,
25179 IX86_BUILTIN_PTESTC,
25180 IX86_BUILTIN_PTESTNZC,
25182 IX86_BUILTIN_VEC_INIT_V2SI,
25183 IX86_BUILTIN_VEC_INIT_V4HI,
25184 IX86_BUILTIN_VEC_INIT_V8QI,
25185 IX86_BUILTIN_VEC_EXT_V2DF,
25186 IX86_BUILTIN_VEC_EXT_V2DI,
25187 IX86_BUILTIN_VEC_EXT_V4SF,
25188 IX86_BUILTIN_VEC_EXT_V4SI,
25189 IX86_BUILTIN_VEC_EXT_V8HI,
25190 IX86_BUILTIN_VEC_EXT_V2SI,
25191 IX86_BUILTIN_VEC_EXT_V4HI,
25192 IX86_BUILTIN_VEC_EXT_V16QI,
25193 IX86_BUILTIN_VEC_SET_V2DI,
25194 IX86_BUILTIN_VEC_SET_V4SF,
25195 IX86_BUILTIN_VEC_SET_V4SI,
25196 IX86_BUILTIN_VEC_SET_V8HI,
25197 IX86_BUILTIN_VEC_SET_V4HI,
25198 IX86_BUILTIN_VEC_SET_V16QI,
25200 IX86_BUILTIN_VEC_PACK_SFIX,
25201 IX86_BUILTIN_VEC_PACK_SFIX256,
25204 IX86_BUILTIN_CRC32QI,
25205 IX86_BUILTIN_CRC32HI,
25206 IX86_BUILTIN_CRC32SI,
25207 IX86_BUILTIN_CRC32DI,
25209 IX86_BUILTIN_PCMPESTRI128,
25210 IX86_BUILTIN_PCMPESTRM128,
25211 IX86_BUILTIN_PCMPESTRA128,
25212 IX86_BUILTIN_PCMPESTRC128,
25213 IX86_BUILTIN_PCMPESTRO128,
25214 IX86_BUILTIN_PCMPESTRS128,
25215 IX86_BUILTIN_PCMPESTRZ128,
25216 IX86_BUILTIN_PCMPISTRI128,
25217 IX86_BUILTIN_PCMPISTRM128,
25218 IX86_BUILTIN_PCMPISTRA128,
25219 IX86_BUILTIN_PCMPISTRC128,
25220 IX86_BUILTIN_PCMPISTRO128,
25221 IX86_BUILTIN_PCMPISTRS128,
25222 IX86_BUILTIN_PCMPISTRZ128,
25224 IX86_BUILTIN_PCMPGTQ,
25226 /* AES instructions */
25227 IX86_BUILTIN_AESENC128,
25228 IX86_BUILTIN_AESENCLAST128,
25229 IX86_BUILTIN_AESDEC128,
25230 IX86_BUILTIN_AESDECLAST128,
25231 IX86_BUILTIN_AESIMC128,
25232 IX86_BUILTIN_AESKEYGENASSIST128,
25234 /* PCLMUL instruction */
25235 IX86_BUILTIN_PCLMULQDQ128,
25238 IX86_BUILTIN_ADDPD256,
25239 IX86_BUILTIN_ADDPS256,
25240 IX86_BUILTIN_ADDSUBPD256,
25241 IX86_BUILTIN_ADDSUBPS256,
25242 IX86_BUILTIN_ANDPD256,
25243 IX86_BUILTIN_ANDPS256,
25244 IX86_BUILTIN_ANDNPD256,
25245 IX86_BUILTIN_ANDNPS256,
25246 IX86_BUILTIN_BLENDPD256,
25247 IX86_BUILTIN_BLENDPS256,
25248 IX86_BUILTIN_BLENDVPD256,
25249 IX86_BUILTIN_BLENDVPS256,
25250 IX86_BUILTIN_DIVPD256,
25251 IX86_BUILTIN_DIVPS256,
25252 IX86_BUILTIN_DPPS256,
25253 IX86_BUILTIN_HADDPD256,
25254 IX86_BUILTIN_HADDPS256,
25255 IX86_BUILTIN_HSUBPD256,
25256 IX86_BUILTIN_HSUBPS256,
25257 IX86_BUILTIN_MAXPD256,
25258 IX86_BUILTIN_MAXPS256,
25259 IX86_BUILTIN_MINPD256,
25260 IX86_BUILTIN_MINPS256,
25261 IX86_BUILTIN_MULPD256,
25262 IX86_BUILTIN_MULPS256,
25263 IX86_BUILTIN_ORPD256,
25264 IX86_BUILTIN_ORPS256,
25265 IX86_BUILTIN_SHUFPD256,
25266 IX86_BUILTIN_SHUFPS256,
25267 IX86_BUILTIN_SUBPD256,
25268 IX86_BUILTIN_SUBPS256,
25269 IX86_BUILTIN_XORPD256,
25270 IX86_BUILTIN_XORPS256,
25271 IX86_BUILTIN_CMPSD,
25272 IX86_BUILTIN_CMPSS,
25273 IX86_BUILTIN_CMPPD,
25274 IX86_BUILTIN_CMPPS,
25275 IX86_BUILTIN_CMPPD256,
25276 IX86_BUILTIN_CMPPS256,
25277 IX86_BUILTIN_CVTDQ2PD256,
25278 IX86_BUILTIN_CVTDQ2PS256,
25279 IX86_BUILTIN_CVTPD2PS256,
25280 IX86_BUILTIN_CVTPS2DQ256,
25281 IX86_BUILTIN_CVTPS2PD256,
25282 IX86_BUILTIN_CVTTPD2DQ256,
25283 IX86_BUILTIN_CVTPD2DQ256,
25284 IX86_BUILTIN_CVTTPS2DQ256,
25285 IX86_BUILTIN_EXTRACTF128PD256,
25286 IX86_BUILTIN_EXTRACTF128PS256,
25287 IX86_BUILTIN_EXTRACTF128SI256,
25288 IX86_BUILTIN_VZEROALL,
25289 IX86_BUILTIN_VZEROUPPER,
25290 IX86_BUILTIN_VPERMILVARPD,
25291 IX86_BUILTIN_VPERMILVARPS,
25292 IX86_BUILTIN_VPERMILVARPD256,
25293 IX86_BUILTIN_VPERMILVARPS256,
25294 IX86_BUILTIN_VPERMILPD,
25295 IX86_BUILTIN_VPERMILPS,
25296 IX86_BUILTIN_VPERMILPD256,
25297 IX86_BUILTIN_VPERMILPS256,
25298 IX86_BUILTIN_VPERMIL2PD,
25299 IX86_BUILTIN_VPERMIL2PS,
25300 IX86_BUILTIN_VPERMIL2PD256,
25301 IX86_BUILTIN_VPERMIL2PS256,
25302 IX86_BUILTIN_VPERM2F128PD256,
25303 IX86_BUILTIN_VPERM2F128PS256,
25304 IX86_BUILTIN_VPERM2F128SI256,
25305 IX86_BUILTIN_VBROADCASTSS,
25306 IX86_BUILTIN_VBROADCASTSD256,
25307 IX86_BUILTIN_VBROADCASTSS256,
25308 IX86_BUILTIN_VBROADCASTPD256,
25309 IX86_BUILTIN_VBROADCASTPS256,
25310 IX86_BUILTIN_VINSERTF128PD256,
25311 IX86_BUILTIN_VINSERTF128PS256,
25312 IX86_BUILTIN_VINSERTF128SI256,
25313 IX86_BUILTIN_LOADUPD256,
25314 IX86_BUILTIN_LOADUPS256,
25315 IX86_BUILTIN_STOREUPD256,
25316 IX86_BUILTIN_STOREUPS256,
25317 IX86_BUILTIN_LDDQU256,
25318 IX86_BUILTIN_MOVNTDQ256,
25319 IX86_BUILTIN_MOVNTPD256,
25320 IX86_BUILTIN_MOVNTPS256,
25321 IX86_BUILTIN_LOADDQU256,
25322 IX86_BUILTIN_STOREDQU256,
25323 IX86_BUILTIN_MASKLOADPD,
25324 IX86_BUILTIN_MASKLOADPS,
25325 IX86_BUILTIN_MASKSTOREPD,
25326 IX86_BUILTIN_MASKSTOREPS,
25327 IX86_BUILTIN_MASKLOADPD256,
25328 IX86_BUILTIN_MASKLOADPS256,
25329 IX86_BUILTIN_MASKSTOREPD256,
25330 IX86_BUILTIN_MASKSTOREPS256,
25331 IX86_BUILTIN_MOVSHDUP256,
25332 IX86_BUILTIN_MOVSLDUP256,
25333 IX86_BUILTIN_MOVDDUP256,
25335 IX86_BUILTIN_SQRTPD256,
25336 IX86_BUILTIN_SQRTPS256,
25337 IX86_BUILTIN_SQRTPS_NR256,
25338 IX86_BUILTIN_RSQRTPS256,
25339 IX86_BUILTIN_RSQRTPS_NR256,
25341 IX86_BUILTIN_RCPPS256,
25343 IX86_BUILTIN_ROUNDPD256,
25344 IX86_BUILTIN_ROUNDPS256,
25346 IX86_BUILTIN_FLOORPD256,
25347 IX86_BUILTIN_CEILPD256,
25348 IX86_BUILTIN_TRUNCPD256,
25349 IX86_BUILTIN_RINTPD256,
25350 IX86_BUILTIN_ROUNDPD_AZ256,
25352 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25353 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25354 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25356 IX86_BUILTIN_FLOORPS256,
25357 IX86_BUILTIN_CEILPS256,
25358 IX86_BUILTIN_TRUNCPS256,
25359 IX86_BUILTIN_RINTPS256,
25360 IX86_BUILTIN_ROUNDPS_AZ256,
25362 IX86_BUILTIN_FLOORPS_SFIX256,
25363 IX86_BUILTIN_CEILPS_SFIX256,
25364 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25366 IX86_BUILTIN_UNPCKHPD256,
25367 IX86_BUILTIN_UNPCKLPD256,
25368 IX86_BUILTIN_UNPCKHPS256,
25369 IX86_BUILTIN_UNPCKLPS256,
25371 IX86_BUILTIN_SI256_SI,
25372 IX86_BUILTIN_PS256_PS,
25373 IX86_BUILTIN_PD256_PD,
25374 IX86_BUILTIN_SI_SI256,
25375 IX86_BUILTIN_PS_PS256,
25376 IX86_BUILTIN_PD_PD256,
25378 IX86_BUILTIN_VTESTZPD,
25379 IX86_BUILTIN_VTESTCPD,
25380 IX86_BUILTIN_VTESTNZCPD,
25381 IX86_BUILTIN_VTESTZPS,
25382 IX86_BUILTIN_VTESTCPS,
25383 IX86_BUILTIN_VTESTNZCPS,
25384 IX86_BUILTIN_VTESTZPD256,
25385 IX86_BUILTIN_VTESTCPD256,
25386 IX86_BUILTIN_VTESTNZCPD256,
25387 IX86_BUILTIN_VTESTZPS256,
25388 IX86_BUILTIN_VTESTCPS256,
25389 IX86_BUILTIN_VTESTNZCPS256,
25390 IX86_BUILTIN_PTESTZ256,
25391 IX86_BUILTIN_PTESTC256,
25392 IX86_BUILTIN_PTESTNZC256,
25394 IX86_BUILTIN_MOVMSKPD256,
25395 IX86_BUILTIN_MOVMSKPS256,
25398 IX86_BUILTIN_MPSADBW256,
25399 IX86_BUILTIN_PABSB256,
25400 IX86_BUILTIN_PABSW256,
25401 IX86_BUILTIN_PABSD256,
25402 IX86_BUILTIN_PACKSSDW256,
25403 IX86_BUILTIN_PACKSSWB256,
25404 IX86_BUILTIN_PACKUSDW256,
25405 IX86_BUILTIN_PACKUSWB256,
25406 IX86_BUILTIN_PADDB256,
25407 IX86_BUILTIN_PADDW256,
25408 IX86_BUILTIN_PADDD256,
25409 IX86_BUILTIN_PADDQ256,
25410 IX86_BUILTIN_PADDSB256,
25411 IX86_BUILTIN_PADDSW256,
25412 IX86_BUILTIN_PADDUSB256,
25413 IX86_BUILTIN_PADDUSW256,
25414 IX86_BUILTIN_PALIGNR256,
25415 IX86_BUILTIN_AND256I,
25416 IX86_BUILTIN_ANDNOT256I,
25417 IX86_BUILTIN_PAVGB256,
25418 IX86_BUILTIN_PAVGW256,
25419 IX86_BUILTIN_PBLENDVB256,
25420 IX86_BUILTIN_PBLENDVW256,
25421 IX86_BUILTIN_PCMPEQB256,
25422 IX86_BUILTIN_PCMPEQW256,
25423 IX86_BUILTIN_PCMPEQD256,
25424 IX86_BUILTIN_PCMPEQQ256,
25425 IX86_BUILTIN_PCMPGTB256,
25426 IX86_BUILTIN_PCMPGTW256,
25427 IX86_BUILTIN_PCMPGTD256,
25428 IX86_BUILTIN_PCMPGTQ256,
25429 IX86_BUILTIN_PHADDW256,
25430 IX86_BUILTIN_PHADDD256,
25431 IX86_BUILTIN_PHADDSW256,
25432 IX86_BUILTIN_PHSUBW256,
25433 IX86_BUILTIN_PHSUBD256,
25434 IX86_BUILTIN_PHSUBSW256,
25435 IX86_BUILTIN_PMADDUBSW256,
25436 IX86_BUILTIN_PMADDWD256,
25437 IX86_BUILTIN_PMAXSB256,
25438 IX86_BUILTIN_PMAXSW256,
25439 IX86_BUILTIN_PMAXSD256,
25440 IX86_BUILTIN_PMAXUB256,
25441 IX86_BUILTIN_PMAXUW256,
25442 IX86_BUILTIN_PMAXUD256,
25443 IX86_BUILTIN_PMINSB256,
25444 IX86_BUILTIN_PMINSW256,
25445 IX86_BUILTIN_PMINSD256,
25446 IX86_BUILTIN_PMINUB256,
25447 IX86_BUILTIN_PMINUW256,
25448 IX86_BUILTIN_PMINUD256,
25449 IX86_BUILTIN_PMOVMSKB256,
25450 IX86_BUILTIN_PMOVSXBW256,
25451 IX86_BUILTIN_PMOVSXBD256,
25452 IX86_BUILTIN_PMOVSXBQ256,
25453 IX86_BUILTIN_PMOVSXWD256,
25454 IX86_BUILTIN_PMOVSXWQ256,
25455 IX86_BUILTIN_PMOVSXDQ256,
25456 IX86_BUILTIN_PMOVZXBW256,
25457 IX86_BUILTIN_PMOVZXBD256,
25458 IX86_BUILTIN_PMOVZXBQ256,
25459 IX86_BUILTIN_PMOVZXWD256,
25460 IX86_BUILTIN_PMOVZXWQ256,
25461 IX86_BUILTIN_PMOVZXDQ256,
25462 IX86_BUILTIN_PMULDQ256,
25463 IX86_BUILTIN_PMULHRSW256,
25464 IX86_BUILTIN_PMULHUW256,
25465 IX86_BUILTIN_PMULHW256,
25466 IX86_BUILTIN_PMULLW256,
25467 IX86_BUILTIN_PMULLD256,
25468 IX86_BUILTIN_PMULUDQ256,
25469 IX86_BUILTIN_POR256,
25470 IX86_BUILTIN_PSADBW256,
25471 IX86_BUILTIN_PSHUFB256,
25472 IX86_BUILTIN_PSHUFD256,
25473 IX86_BUILTIN_PSHUFHW256,
25474 IX86_BUILTIN_PSHUFLW256,
25475 IX86_BUILTIN_PSIGNB256,
25476 IX86_BUILTIN_PSIGNW256,
25477 IX86_BUILTIN_PSIGND256,
25478 IX86_BUILTIN_PSLLDQI256,
25479 IX86_BUILTIN_PSLLWI256,
25480 IX86_BUILTIN_PSLLW256,
25481 IX86_BUILTIN_PSLLDI256,
25482 IX86_BUILTIN_PSLLD256,
25483 IX86_BUILTIN_PSLLQI256,
25484 IX86_BUILTIN_PSLLQ256,
25485 IX86_BUILTIN_PSRAWI256,
25486 IX86_BUILTIN_PSRAW256,
25487 IX86_BUILTIN_PSRADI256,
25488 IX86_BUILTIN_PSRAD256,
25489 IX86_BUILTIN_PSRLDQI256,
25490 IX86_BUILTIN_PSRLWI256,
25491 IX86_BUILTIN_PSRLW256,
25492 IX86_BUILTIN_PSRLDI256,
25493 IX86_BUILTIN_PSRLD256,
25494 IX86_BUILTIN_PSRLQI256,
25495 IX86_BUILTIN_PSRLQ256,
25496 IX86_BUILTIN_PSUBB256,
25497 IX86_BUILTIN_PSUBW256,
25498 IX86_BUILTIN_PSUBD256,
25499 IX86_BUILTIN_PSUBQ256,
25500 IX86_BUILTIN_PSUBSB256,
25501 IX86_BUILTIN_PSUBSW256,
25502 IX86_BUILTIN_PSUBUSB256,
25503 IX86_BUILTIN_PSUBUSW256,
25504 IX86_BUILTIN_PUNPCKHBW256,
25505 IX86_BUILTIN_PUNPCKHWD256,
25506 IX86_BUILTIN_PUNPCKHDQ256,
25507 IX86_BUILTIN_PUNPCKHQDQ256,
25508 IX86_BUILTIN_PUNPCKLBW256,
25509 IX86_BUILTIN_PUNPCKLWD256,
25510 IX86_BUILTIN_PUNPCKLDQ256,
25511 IX86_BUILTIN_PUNPCKLQDQ256,
25512 IX86_BUILTIN_PXOR256,
25513 IX86_BUILTIN_MOVNTDQA256,
25514 IX86_BUILTIN_VBROADCASTSS_PS,
25515 IX86_BUILTIN_VBROADCASTSS_PS256,
25516 IX86_BUILTIN_VBROADCASTSD_PD256,
25517 IX86_BUILTIN_VBROADCASTSI256,
25518 IX86_BUILTIN_PBLENDD256,
25519 IX86_BUILTIN_PBLENDD128,
25520 IX86_BUILTIN_PBROADCASTB256,
25521 IX86_BUILTIN_PBROADCASTW256,
25522 IX86_BUILTIN_PBROADCASTD256,
25523 IX86_BUILTIN_PBROADCASTQ256,
25524 IX86_BUILTIN_PBROADCASTB128,
25525 IX86_BUILTIN_PBROADCASTW128,
25526 IX86_BUILTIN_PBROADCASTD128,
25527 IX86_BUILTIN_PBROADCASTQ128,
25528 IX86_BUILTIN_VPERMVARSI256,
25529 IX86_BUILTIN_VPERMDF256,
25530 IX86_BUILTIN_VPERMVARSF256,
25531 IX86_BUILTIN_VPERMDI256,
25532 IX86_BUILTIN_VPERMTI256,
25533 IX86_BUILTIN_VEXTRACT128I256,
25534 IX86_BUILTIN_VINSERT128I256,
25535 IX86_BUILTIN_MASKLOADD,
25536 IX86_BUILTIN_MASKLOADQ,
25537 IX86_BUILTIN_MASKLOADD256,
25538 IX86_BUILTIN_MASKLOADQ256,
25539 IX86_BUILTIN_MASKSTORED,
25540 IX86_BUILTIN_MASKSTOREQ,
25541 IX86_BUILTIN_MASKSTORED256,
25542 IX86_BUILTIN_MASKSTOREQ256,
25543 IX86_BUILTIN_PSLLVV4DI,
25544 IX86_BUILTIN_PSLLVV2DI,
25545 IX86_BUILTIN_PSLLVV8SI,
25546 IX86_BUILTIN_PSLLVV4SI,
25547 IX86_BUILTIN_PSRAVV8SI,
25548 IX86_BUILTIN_PSRAVV4SI,
25549 IX86_BUILTIN_PSRLVV4DI,
25550 IX86_BUILTIN_PSRLVV2DI,
25551 IX86_BUILTIN_PSRLVV8SI,
25552 IX86_BUILTIN_PSRLVV4SI,
25554 IX86_BUILTIN_GATHERSIV2DF,
25555 IX86_BUILTIN_GATHERSIV4DF,
25556 IX86_BUILTIN_GATHERDIV2DF,
25557 IX86_BUILTIN_GATHERDIV4DF,
25558 IX86_BUILTIN_GATHERSIV4SF,
25559 IX86_BUILTIN_GATHERSIV8SF,
25560 IX86_BUILTIN_GATHERDIV4SF,
25561 IX86_BUILTIN_GATHERDIV8SF,
25562 IX86_BUILTIN_GATHERSIV2DI,
25563 IX86_BUILTIN_GATHERSIV4DI,
25564 IX86_BUILTIN_GATHERDIV2DI,
25565 IX86_BUILTIN_GATHERDIV4DI,
25566 IX86_BUILTIN_GATHERSIV4SI,
25567 IX86_BUILTIN_GATHERSIV8SI,
25568 IX86_BUILTIN_GATHERDIV4SI,
25569 IX86_BUILTIN_GATHERDIV8SI,
25571 /* Alternate 4 element gather for the vectorizer where
25572 all operands are 32-byte wide. */
25573 IX86_BUILTIN_GATHERALTSIV4DF,
25574 IX86_BUILTIN_GATHERALTDIV8SF,
25575 IX86_BUILTIN_GATHERALTSIV4DI,
25576 IX86_BUILTIN_GATHERALTDIV8SI,
25578 /* TFmode support builtins. */
25580 IX86_BUILTIN_HUGE_VALQ,
25581 IX86_BUILTIN_FABSQ,
25582 IX86_BUILTIN_COPYSIGNQ,
25584 /* Vectorizer support builtins. */
25585 IX86_BUILTIN_CPYSGNPS,
25586 IX86_BUILTIN_CPYSGNPD,
25587 IX86_BUILTIN_CPYSGNPS256,
25588 IX86_BUILTIN_CPYSGNPD256,
25590 /* FMA4 instructions. */
25591 IX86_BUILTIN_VFMADDSS,
25592 IX86_BUILTIN_VFMADDSD,
25593 IX86_BUILTIN_VFMADDPS,
25594 IX86_BUILTIN_VFMADDPD,
25595 IX86_BUILTIN_VFMADDPS256,
25596 IX86_BUILTIN_VFMADDPD256,
25597 IX86_BUILTIN_VFMADDSUBPS,
25598 IX86_BUILTIN_VFMADDSUBPD,
25599 IX86_BUILTIN_VFMADDSUBPS256,
25600 IX86_BUILTIN_VFMADDSUBPD256,
25602 /* FMA3 instructions. */
25603 IX86_BUILTIN_VFMADDSS3,
25604 IX86_BUILTIN_VFMADDSD3,
25606 /* XOP instructions. */
25607 IX86_BUILTIN_VPCMOV,
25608 IX86_BUILTIN_VPCMOV_V2DI,
25609 IX86_BUILTIN_VPCMOV_V4SI,
25610 IX86_BUILTIN_VPCMOV_V8HI,
25611 IX86_BUILTIN_VPCMOV_V16QI,
25612 IX86_BUILTIN_VPCMOV_V4SF,
25613 IX86_BUILTIN_VPCMOV_V2DF,
25614 IX86_BUILTIN_VPCMOV256,
25615 IX86_BUILTIN_VPCMOV_V4DI256,
25616 IX86_BUILTIN_VPCMOV_V8SI256,
25617 IX86_BUILTIN_VPCMOV_V16HI256,
25618 IX86_BUILTIN_VPCMOV_V32QI256,
25619 IX86_BUILTIN_VPCMOV_V8SF256,
25620 IX86_BUILTIN_VPCMOV_V4DF256,
25622 IX86_BUILTIN_VPPERM,
25624 IX86_BUILTIN_VPMACSSWW,
25625 IX86_BUILTIN_VPMACSWW,
25626 IX86_BUILTIN_VPMACSSWD,
25627 IX86_BUILTIN_VPMACSWD,
25628 IX86_BUILTIN_VPMACSSDD,
25629 IX86_BUILTIN_VPMACSDD,
25630 IX86_BUILTIN_VPMACSSDQL,
25631 IX86_BUILTIN_VPMACSSDQH,
25632 IX86_BUILTIN_VPMACSDQL,
25633 IX86_BUILTIN_VPMACSDQH,
25634 IX86_BUILTIN_VPMADCSSWD,
25635 IX86_BUILTIN_VPMADCSWD,
25637 IX86_BUILTIN_VPHADDBW,
25638 IX86_BUILTIN_VPHADDBD,
25639 IX86_BUILTIN_VPHADDBQ,
25640 IX86_BUILTIN_VPHADDWD,
25641 IX86_BUILTIN_VPHADDWQ,
25642 IX86_BUILTIN_VPHADDDQ,
25643 IX86_BUILTIN_VPHADDUBW,
25644 IX86_BUILTIN_VPHADDUBD,
25645 IX86_BUILTIN_VPHADDUBQ,
25646 IX86_BUILTIN_VPHADDUWD,
25647 IX86_BUILTIN_VPHADDUWQ,
25648 IX86_BUILTIN_VPHADDUDQ,
25649 IX86_BUILTIN_VPHSUBBW,
25650 IX86_BUILTIN_VPHSUBWD,
25651 IX86_BUILTIN_VPHSUBDQ,
25653 IX86_BUILTIN_VPROTB,
25654 IX86_BUILTIN_VPROTW,
25655 IX86_BUILTIN_VPROTD,
25656 IX86_BUILTIN_VPROTQ,
25657 IX86_BUILTIN_VPROTB_IMM,
25658 IX86_BUILTIN_VPROTW_IMM,
25659 IX86_BUILTIN_VPROTD_IMM,
25660 IX86_BUILTIN_VPROTQ_IMM,
25662 IX86_BUILTIN_VPSHLB,
25663 IX86_BUILTIN_VPSHLW,
25664 IX86_BUILTIN_VPSHLD,
25665 IX86_BUILTIN_VPSHLQ,
25666 IX86_BUILTIN_VPSHAB,
25667 IX86_BUILTIN_VPSHAW,
25668 IX86_BUILTIN_VPSHAD,
25669 IX86_BUILTIN_VPSHAQ,
25671 IX86_BUILTIN_VFRCZSS,
25672 IX86_BUILTIN_VFRCZSD,
25673 IX86_BUILTIN_VFRCZPS,
25674 IX86_BUILTIN_VFRCZPD,
25675 IX86_BUILTIN_VFRCZPS256,
25676 IX86_BUILTIN_VFRCZPD256,
25678 IX86_BUILTIN_VPCOMEQUB,
25679 IX86_BUILTIN_VPCOMNEUB,
25680 IX86_BUILTIN_VPCOMLTUB,
25681 IX86_BUILTIN_VPCOMLEUB,
25682 IX86_BUILTIN_VPCOMGTUB,
25683 IX86_BUILTIN_VPCOMGEUB,
25684 IX86_BUILTIN_VPCOMFALSEUB,
25685 IX86_BUILTIN_VPCOMTRUEUB,
25687 IX86_BUILTIN_VPCOMEQUW,
25688 IX86_BUILTIN_VPCOMNEUW,
25689 IX86_BUILTIN_VPCOMLTUW,
25690 IX86_BUILTIN_VPCOMLEUW,
25691 IX86_BUILTIN_VPCOMGTUW,
25692 IX86_BUILTIN_VPCOMGEUW,
25693 IX86_BUILTIN_VPCOMFALSEUW,
25694 IX86_BUILTIN_VPCOMTRUEUW,
25696 IX86_BUILTIN_VPCOMEQUD,
25697 IX86_BUILTIN_VPCOMNEUD,
25698 IX86_BUILTIN_VPCOMLTUD,
25699 IX86_BUILTIN_VPCOMLEUD,
25700 IX86_BUILTIN_VPCOMGTUD,
25701 IX86_BUILTIN_VPCOMGEUD,
25702 IX86_BUILTIN_VPCOMFALSEUD,
25703 IX86_BUILTIN_VPCOMTRUEUD,
25705 IX86_BUILTIN_VPCOMEQUQ,
25706 IX86_BUILTIN_VPCOMNEUQ,
25707 IX86_BUILTIN_VPCOMLTUQ,
25708 IX86_BUILTIN_VPCOMLEUQ,
25709 IX86_BUILTIN_VPCOMGTUQ,
25710 IX86_BUILTIN_VPCOMGEUQ,
25711 IX86_BUILTIN_VPCOMFALSEUQ,
25712 IX86_BUILTIN_VPCOMTRUEUQ,
25714 IX86_BUILTIN_VPCOMEQB,
25715 IX86_BUILTIN_VPCOMNEB,
25716 IX86_BUILTIN_VPCOMLTB,
25717 IX86_BUILTIN_VPCOMLEB,
25718 IX86_BUILTIN_VPCOMGTB,
25719 IX86_BUILTIN_VPCOMGEB,
25720 IX86_BUILTIN_VPCOMFALSEB,
25721 IX86_BUILTIN_VPCOMTRUEB,
25723 IX86_BUILTIN_VPCOMEQW,
25724 IX86_BUILTIN_VPCOMNEW,
25725 IX86_BUILTIN_VPCOMLTW,
25726 IX86_BUILTIN_VPCOMLEW,
25727 IX86_BUILTIN_VPCOMGTW,
25728 IX86_BUILTIN_VPCOMGEW,
25729 IX86_BUILTIN_VPCOMFALSEW,
25730 IX86_BUILTIN_VPCOMTRUEW,
25732 IX86_BUILTIN_VPCOMEQD,
25733 IX86_BUILTIN_VPCOMNED,
25734 IX86_BUILTIN_VPCOMLTD,
25735 IX86_BUILTIN_VPCOMLED,
25736 IX86_BUILTIN_VPCOMGTD,
25737 IX86_BUILTIN_VPCOMGED,
25738 IX86_BUILTIN_VPCOMFALSED,
25739 IX86_BUILTIN_VPCOMTRUED,
25741 IX86_BUILTIN_VPCOMEQQ,
25742 IX86_BUILTIN_VPCOMNEQ,
25743 IX86_BUILTIN_VPCOMLTQ,
25744 IX86_BUILTIN_VPCOMLEQ,
25745 IX86_BUILTIN_VPCOMGTQ,
25746 IX86_BUILTIN_VPCOMGEQ,
25747 IX86_BUILTIN_VPCOMFALSEQ,
25748 IX86_BUILTIN_VPCOMTRUEQ,
25750 /* LWP instructions. */
25751 IX86_BUILTIN_LLWPCB,
25752 IX86_BUILTIN_SLWPCB,
25753 IX86_BUILTIN_LWPVAL32,
25754 IX86_BUILTIN_LWPVAL64,
25755 IX86_BUILTIN_LWPINS32,
25756 IX86_BUILTIN_LWPINS64,
25760 /* BMI instructions. */
25761 IX86_BUILTIN_BEXTR32,
25762 IX86_BUILTIN_BEXTR64,
25765 /* TBM instructions. */
25766 IX86_BUILTIN_BEXTRI32,
25767 IX86_BUILTIN_BEXTRI64,
25769 /* BMI2 instructions. */
25770 IX86_BUILTIN_BZHI32,
25771 IX86_BUILTIN_BZHI64,
25772 IX86_BUILTIN_PDEP32,
25773 IX86_BUILTIN_PDEP64,
25774 IX86_BUILTIN_PEXT32,
25775 IX86_BUILTIN_PEXT64,
25777 /* FSGSBASE instructions. */
25778 IX86_BUILTIN_RDFSBASE32,
25779 IX86_BUILTIN_RDFSBASE64,
25780 IX86_BUILTIN_RDGSBASE32,
25781 IX86_BUILTIN_RDGSBASE64,
25782 IX86_BUILTIN_WRFSBASE32,
25783 IX86_BUILTIN_WRFSBASE64,
25784 IX86_BUILTIN_WRGSBASE32,
25785 IX86_BUILTIN_WRGSBASE64,
25787 /* RDRND instructions. */
25788 IX86_BUILTIN_RDRAND16_STEP,
25789 IX86_BUILTIN_RDRAND32_STEP,
25790 IX86_BUILTIN_RDRAND64_STEP,
25792 /* F16C instructions. */
25793 IX86_BUILTIN_CVTPH2PS,
25794 IX86_BUILTIN_CVTPH2PS256,
25795 IX86_BUILTIN_CVTPS2PH,
25796 IX86_BUILTIN_CVTPS2PH256,
25798 /* CFString built-in for darwin */
25799 IX86_BUILTIN_CFSTRING,
25804 /* Table for the ix86 builtin decls. */
25805 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25807 /* Table of all of the builtin functions that are possible with different ISA's
25808 but are waiting to be built until a function is declared to use that
25810 struct builtin_isa {
25811 const char *name; /* function name */
25812 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25813 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25814 bool const_p; /* true if the declaration is constant */
25815 bool set_and_not_built_p;
25818 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25821 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25822 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25823 function decl in the ix86_builtins array. Returns the function decl or
25824 NULL_TREE, if the builtin was not added.
25826 If the front end has a special hook for builtin functions, delay adding
25827 builtin functions that aren't in the current ISA until the ISA is changed
25828 with function specific optimization. Doing so, can save about 300K for the
25829 default compiler. When the builtin is expanded, check at that time whether
25832 If the front end doesn't have a special hook, record all builtins, even if
25833 it isn't an instruction set in the current ISA in case the user uses
25834 function specific options for a different ISA, so that we don't get scope
25835 errors if a builtin is added in the middle of a function scope. */
25838 def_builtin (HOST_WIDE_INT mask, const char *name,
25839 enum ix86_builtin_func_type tcode,
25840 enum ix86_builtins code)
25842 tree decl = NULL_TREE;
25844 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25846 ix86_builtins_isa[(int) code].isa = mask;
25848 mask &= ~OPTION_MASK_ISA_64BIT;
25850 || (mask & ix86_isa_flags) != 0
25851 || (lang_hooks.builtin_function
25852 == lang_hooks.builtin_function_ext_scope))
25855 tree type = ix86_get_builtin_func_type (tcode);
25856 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25858 ix86_builtins[(int) code] = decl;
25859 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25863 ix86_builtins[(int) code] = NULL_TREE;
25864 ix86_builtins_isa[(int) code].tcode = tcode;
25865 ix86_builtins_isa[(int) code].name = name;
25866 ix86_builtins_isa[(int) code].const_p = false;
25867 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25874 /* Like def_builtin, but also marks the function decl "const". */
25877 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25878 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25880 tree decl = def_builtin (mask, name, tcode, code);
25882 TREE_READONLY (decl) = 1;
25884 ix86_builtins_isa[(int) code].const_p = true;
25889 /* Add any new builtin functions for a given ISA that may not have been
25890 declared. This saves a bit of space compared to adding all of the
25891 declarations to the tree, even if we didn't use them. */
25894 ix86_add_new_builtins (HOST_WIDE_INT isa)
25898 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25900 if ((ix86_builtins_isa[i].isa & isa) != 0
25901 && ix86_builtins_isa[i].set_and_not_built_p)
25905 /* Don't define the builtin again. */
25906 ix86_builtins_isa[i].set_and_not_built_p = false;
25908 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25909 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25910 type, i, BUILT_IN_MD, NULL,
25913 ix86_builtins[i] = decl;
25914 if (ix86_builtins_isa[i].const_p)
25915 TREE_READONLY (decl) = 1;
25920 /* Bits for builtin_description.flag. */
25922 /* Set when we don't support the comparison natively, and should
25923 swap_comparison in order to support it. */
25924 #define BUILTIN_DESC_SWAP_OPERANDS 1
25926 struct builtin_description
25928 const HOST_WIDE_INT mask;
25929 const enum insn_code icode;
25930 const char *const name;
25931 const enum ix86_builtins code;
25932 const enum rtx_code comparison;
25936 static const struct builtin_description bdesc_comi[] =
25938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
25957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
25958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
25959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
25960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
25961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
25964 static const struct builtin_description bdesc_pcmpestr[] =
25967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
25968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
25969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
25970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
25971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
25972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
25973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
25976 static const struct builtin_description bdesc_pcmpistr[] =
25979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
25980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
25981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
25982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
25983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
25984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
25985 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
25988 /* Special builtins with variable number of arguments. */
25989 static const struct builtin_description bdesc_special_args[] =
25991 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25992 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25993 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26011 /* SSE or 3DNow!A */
26012 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26013 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26023 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
26024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26031 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26034 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26037 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26038 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26044 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26045 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26046 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26082 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26083 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26084 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26085 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26086 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26087 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26090 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26091 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26092 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26093 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26094 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26095 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26096 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26097 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26100 /* Builtins with variable number of arguments. */
26101 static const struct builtin_description bdesc_args[] =
26103 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26104 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26105 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26106 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26107 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26108 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26109 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
26148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
26150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
26152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
26156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26177 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26178 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26179 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26198 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26199 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26200 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26201 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26202 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26203 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26208 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26210 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26214 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26217 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26221 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26222 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26223 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26253 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26254 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26258 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26260 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26261 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26263 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26268 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26269 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26273 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26275 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26281 /* SSE MMX or 3Dnow!A */
26282 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26283 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26284 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26288 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26303 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26315 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26316 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26320 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26322 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26323 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26324 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26325 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26353 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26357 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26359 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26360 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26362 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26365 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26366 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26368 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26370 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26371 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26372 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26373 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26374 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26375 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26376 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26377 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26388 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26389 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26391 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26393 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26394 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26406 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26407 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26408 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26411 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26412 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26413 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26414 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26415 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26417 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26424 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26433 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26438 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26439 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26440 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26441 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26442 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26446 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26447 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26448 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26449 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26450 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26451 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26453 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26454 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26455 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26456 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26464 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26465 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26470 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26471 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26474 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26475 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26477 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26478 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26561 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26562 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26574 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26575 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26577 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26585 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26586 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26588 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26593 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26594 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26595 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26596 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26597 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26600 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26601 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26602 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26603 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26610 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26611 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26612 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26615 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26618 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26619 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26622 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26623 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26626 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26632 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26633 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26634 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26635 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26636 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26637 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26638 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26639 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26640 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26641 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26642 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26643 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26666 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26667 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26671 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26673 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26689 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26691 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26693 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26705 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26706 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
26717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
26719 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26720 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26730 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26731 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26732 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26753 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26754 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26756 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26760 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26761 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26762 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26767 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26768 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26769 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26770 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26776 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26798 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26799 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26800 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26801 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26802 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26803 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26804 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26805 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26806 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26807 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26808 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26809 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26825 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26826 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26827 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26828 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26830 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26840 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26841 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26842 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26843 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26851 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26852 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26853 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26854 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26857 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26858 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26859 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26860 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26873 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26906 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26909 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26910 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26911 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26914 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26915 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26918 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26919 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26920 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26921 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26924 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26925 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26926 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26927 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26932 /* FMA4 and XOP. */
26933 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26934 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26935 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26936 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26937 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26938 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26939 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26940 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26941 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26942 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26943 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26944 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26945 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26946 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26947 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26948 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26949 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26950 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26951 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26952 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26953 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26954 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26955 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26956 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26957 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26958 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26959 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26960 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26961 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26962 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26963 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26964 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26965 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26966 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26967 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26968 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26969 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26970 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26971 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26972 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26973 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26974 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26975 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26976 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26977 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26978 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26979 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26980 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26981 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26982 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26983 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26984 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26986 static const struct builtin_description bdesc_multi_arg[] =
26988 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
26989 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
26990 UNKNOWN, (int)MULTI_ARG_3_SF },
26991 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
26992 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
26993 UNKNOWN, (int)MULTI_ARG_3_DF },
26995 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
26996 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26997 UNKNOWN, (int)MULTI_ARG_3_SF },
26998 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26999 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
27000 UNKNOWN, (int)MULTI_ARG_3_DF },
27002 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
27003 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
27004 UNKNOWN, (int)MULTI_ARG_3_SF },
27005 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
27006 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
27007 UNKNOWN, (int)MULTI_ARG_3_DF },
27008 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
27009 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
27010 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27011 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
27012 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27013 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27015 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27016 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27017 UNKNOWN, (int)MULTI_ARG_3_SF },
27018 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27019 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27020 UNKNOWN, (int)MULTI_ARG_3_DF },
27021 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27022 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27023 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27024 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27025 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27026 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
27077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
27078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
27147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
27148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
27151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
27152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
27153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
27155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
27159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
27160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
27161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
27163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
27182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
27183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
27184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
27188 /* TM vector builtins. */
27190 /* Reuse the existing x86-specific `struct builtin_description' cause
27191 we're lazy. Add casts to make them fit. */
27192 static const struct builtin_description bdesc_tm[] =
27194 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27195 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27196 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27197 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27198 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27199 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27200 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27202 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27203 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27204 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27205 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27206 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27207 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27208 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27210 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27211 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27212 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27213 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27214 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27215 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27216 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27218 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27219 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27220 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27223 /* TM callbacks. */
27225 /* Return the builtin decl needed to load a vector of TYPE. */
27228 ix86_builtin_tm_load (tree type)
27230 if (TREE_CODE (type) == VECTOR_TYPE)
27232 switch (tree_low_cst (TYPE_SIZE (type), 1))
27235 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27237 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27239 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27245 /* Return the builtin decl needed to store a vector of TYPE. */
27248 ix86_builtin_tm_store (tree type)
27250 if (TREE_CODE (type) == VECTOR_TYPE)
27252 switch (tree_low_cst (TYPE_SIZE (type), 1))
27255 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27257 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27259 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27265 /* Initialize the transactional memory vector load/store builtins. */
27268 ix86_init_tm_builtins (void)
27270 enum ix86_builtin_func_type ftype;
27271 const struct builtin_description *d;
27274 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27275 tree attrs_log, attrs_type_log;
27280 /* If there are no builtins defined, we must be compiling in a
27281 language without trans-mem support. */
27282 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
27285 /* Use whatever attributes a normal TM load has. */
27286 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27287 attrs_load = DECL_ATTRIBUTES (decl);
27288 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27289 /* Use whatever attributes a normal TM store has. */
27290 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27291 attrs_store = DECL_ATTRIBUTES (decl);
27292 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27293 /* Use whatever attributes a normal TM log has. */
27294 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27295 attrs_log = DECL_ATTRIBUTES (decl);
27296 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27298 for (i = 0, d = bdesc_tm;
27299 i < ARRAY_SIZE (bdesc_tm);
27302 if ((d->mask & ix86_isa_flags) != 0
27303 || (lang_hooks.builtin_function
27304 == lang_hooks.builtin_function_ext_scope))
27306 tree type, attrs, attrs_type;
27307 enum built_in_function code = (enum built_in_function) d->code;
27309 ftype = (enum ix86_builtin_func_type) d->flag;
27310 type = ix86_get_builtin_func_type (ftype);
27312 if (BUILTIN_TM_LOAD_P (code))
27314 attrs = attrs_load;
27315 attrs_type = attrs_type_load;
27317 else if (BUILTIN_TM_STORE_P (code))
27319 attrs = attrs_store;
27320 attrs_type = attrs_type_store;
27325 attrs_type = attrs_type_log;
27327 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27328 /* The builtin without the prefix for
27329 calling it directly. */
27330 d->name + strlen ("__builtin_"),
27332 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27333 set the TYPE_ATTRIBUTES. */
27334 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27336 set_builtin_decl (code, decl, false);
27341 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27342 in the current target ISA to allow the user to compile particular modules
27343 with different target specific options that differ from the command line
27346 ix86_init_mmx_sse_builtins (void)
27348 const struct builtin_description * d;
27349 enum ix86_builtin_func_type ftype;
27352 /* Add all special builtins with variable number of operands. */
27353 for (i = 0, d = bdesc_special_args;
27354 i < ARRAY_SIZE (bdesc_special_args);
27360 ftype = (enum ix86_builtin_func_type) d->flag;
27361 def_builtin (d->mask, d->name, ftype, d->code);
27364 /* Add all builtins with variable number of operands. */
27365 for (i = 0, d = bdesc_args;
27366 i < ARRAY_SIZE (bdesc_args);
27372 ftype = (enum ix86_builtin_func_type) d->flag;
27373 def_builtin_const (d->mask, d->name, ftype, d->code);
27376 /* pcmpestr[im] insns. */
27377 for (i = 0, d = bdesc_pcmpestr;
27378 i < ARRAY_SIZE (bdesc_pcmpestr);
27381 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27382 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27384 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27385 def_builtin_const (d->mask, d->name, ftype, d->code);
27388 /* pcmpistr[im] insns. */
27389 for (i = 0, d = bdesc_pcmpistr;
27390 i < ARRAY_SIZE (bdesc_pcmpistr);
27393 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27394 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27396 ftype = INT_FTYPE_V16QI_V16QI_INT;
27397 def_builtin_const (d->mask, d->name, ftype, d->code);
27400 /* comi/ucomi insns. */
27401 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27403 if (d->mask == OPTION_MASK_ISA_SSE2)
27404 ftype = INT_FTYPE_V2DF_V2DF;
27406 ftype = INT_FTYPE_V4SF_V4SF;
27407 def_builtin_const (d->mask, d->name, ftype, d->code);
27411 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27412 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27413 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27414 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27416 /* SSE or 3DNow!A */
27417 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27418 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27419 IX86_BUILTIN_MASKMOVQ);
27422 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27423 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27425 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27426 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27427 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27428 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27431 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27432 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27433 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27434 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27437 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27438 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27439 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27440 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27441 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27442 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27443 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27444 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27445 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27446 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27447 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27448 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27451 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27452 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27455 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27456 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27457 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27458 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27459 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27460 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27461 IX86_BUILTIN_RDRAND64_STEP);
27464 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27465 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27466 IX86_BUILTIN_GATHERSIV2DF);
27468 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27469 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27470 IX86_BUILTIN_GATHERSIV4DF);
27472 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27473 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27474 IX86_BUILTIN_GATHERDIV2DF);
27476 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27477 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27478 IX86_BUILTIN_GATHERDIV4DF);
27480 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27481 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27482 IX86_BUILTIN_GATHERSIV4SF);
27484 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27485 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27486 IX86_BUILTIN_GATHERSIV8SF);
27488 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27489 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27490 IX86_BUILTIN_GATHERDIV4SF);
27492 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27493 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27494 IX86_BUILTIN_GATHERDIV8SF);
27496 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27497 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27498 IX86_BUILTIN_GATHERSIV2DI);
27500 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27501 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27502 IX86_BUILTIN_GATHERSIV4DI);
27504 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27505 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27506 IX86_BUILTIN_GATHERDIV2DI);
27508 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27509 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27510 IX86_BUILTIN_GATHERDIV4DI);
27512 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27513 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27514 IX86_BUILTIN_GATHERSIV4SI);
27516 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27517 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27518 IX86_BUILTIN_GATHERSIV8SI);
27520 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27521 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27522 IX86_BUILTIN_GATHERDIV4SI);
27524 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27525 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27526 IX86_BUILTIN_GATHERDIV8SI);
27528 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27529 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27530 IX86_BUILTIN_GATHERALTSIV4DF);
27532 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27533 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27534 IX86_BUILTIN_GATHERALTDIV8SF);
27536 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27537 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27538 IX86_BUILTIN_GATHERALTSIV4DI);
27540 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27541 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27542 IX86_BUILTIN_GATHERALTDIV8SI);
27544 /* MMX access to the vec_init patterns. */
27545 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27546 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27548 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27549 V4HI_FTYPE_HI_HI_HI_HI,
27550 IX86_BUILTIN_VEC_INIT_V4HI);
27552 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27553 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27554 IX86_BUILTIN_VEC_INIT_V8QI);
27556 /* Access to the vec_extract patterns. */
27557 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27558 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27559 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27560 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27561 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27562 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27563 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27564 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27565 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27566 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27568 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27569 "__builtin_ia32_vec_ext_v4hi",
27570 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27572 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27573 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27575 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27576 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27578 /* Access to the vec_set patterns. */
27579 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27580 "__builtin_ia32_vec_set_v2di",
27581 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27583 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27584 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27586 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27587 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27589 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27590 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27592 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27593 "__builtin_ia32_vec_set_v4hi",
27594 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27596 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27597 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27599 /* Add FMA4 multi-arg argument instructions */
27600 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27605 ftype = (enum ix86_builtin_func_type) d->flag;
27606 def_builtin_const (d->mask, d->name, ftype, d->code);
27610 /* Internal method for ix86_init_builtins. */
27613 ix86_init_builtins_va_builtins_abi (void)
27615 tree ms_va_ref, sysv_va_ref;
27616 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27617 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27618 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27619 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27623 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27624 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27625 ms_va_ref = build_reference_type (ms_va_list_type_node);
27627 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27630 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27631 fnvoid_va_start_ms =
27632 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27633 fnvoid_va_end_sysv =
27634 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27635 fnvoid_va_start_sysv =
27636 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27638 fnvoid_va_copy_ms =
27639 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27641 fnvoid_va_copy_sysv =
27642 build_function_type_list (void_type_node, sysv_va_ref,
27643 sysv_va_ref, NULL_TREE);
27645 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27646 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27647 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27648 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27649 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27650 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27651 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27652 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27653 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27654 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27655 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27656 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27660 ix86_init_builtin_types (void)
27662 tree float128_type_node, float80_type_node;
27664 /* The __float80 type. */
27665 float80_type_node = long_double_type_node;
27666 if (TYPE_MODE (float80_type_node) != XFmode)
27668 /* The __float80 type. */
27669 float80_type_node = make_node (REAL_TYPE);
27671 TYPE_PRECISION (float80_type_node) = 80;
27672 layout_type (float80_type_node);
27674 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27676 /* The __float128 type. */
27677 float128_type_node = make_node (REAL_TYPE);
27678 TYPE_PRECISION (float128_type_node) = 128;
27679 layout_type (float128_type_node);
27680 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27682 /* This macro is built by i386-builtin-types.awk. */
27683 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27687 ix86_init_builtins (void)
27691 ix86_init_builtin_types ();
27693 /* TFmode support builtins. */
27694 def_builtin_const (0, "__builtin_infq",
27695 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27696 def_builtin_const (0, "__builtin_huge_valq",
27697 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
27699 /* We will expand them to normal call if SSE2 isn't available since
27700 they are used by libgcc. */
27701 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
27702 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
27703 BUILT_IN_MD, "__fabstf2", NULL_TREE);
27704 TREE_READONLY (t) = 1;
27705 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
27707 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
27708 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
27709 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
27710 TREE_READONLY (t) = 1;
27711 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
27713 ix86_init_tm_builtins ();
27714 ix86_init_mmx_sse_builtins ();
27717 ix86_init_builtins_va_builtins_abi ();
27719 #ifdef SUBTARGET_INIT_BUILTINS
27720 SUBTARGET_INIT_BUILTINS;
27724 /* Return the ix86 builtin for CODE. */
27727 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27729 if (code >= IX86_BUILTIN_MAX)
27730 return error_mark_node;
27732 return ix86_builtins[code];
27735 /* Errors in the source file can cause expand_expr to return const0_rtx
27736 where we expect a vector. To avoid crashing, use one of the vector
27737 clear instructions. */
27739 safe_vector_operand (rtx x, enum machine_mode mode)
27741 if (x == const0_rtx)
27742 x = CONST0_RTX (mode);
27746 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27749 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27752 tree arg0 = CALL_EXPR_ARG (exp, 0);
27753 tree arg1 = CALL_EXPR_ARG (exp, 1);
27754 rtx op0 = expand_normal (arg0);
27755 rtx op1 = expand_normal (arg1);
27756 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27757 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27758 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27760 if (VECTOR_MODE_P (mode0))
27761 op0 = safe_vector_operand (op0, mode0);
27762 if (VECTOR_MODE_P (mode1))
27763 op1 = safe_vector_operand (op1, mode1);
27765 if (optimize || !target
27766 || GET_MODE (target) != tmode
27767 || !insn_data[icode].operand[0].predicate (target, tmode))
27768 target = gen_reg_rtx (tmode);
27770 if (GET_MODE (op1) == SImode && mode1 == TImode)
27772 rtx x = gen_reg_rtx (V4SImode);
27773 emit_insn (gen_sse2_loadd (x, op1));
27774 op1 = gen_lowpart (TImode, x);
27777 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27778 op0 = copy_to_mode_reg (mode0, op0);
27779 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27780 op1 = copy_to_mode_reg (mode1, op1);
27782 pat = GEN_FCN (icode) (target, op0, op1);
27791 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27794 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27795 enum ix86_builtin_func_type m_type,
27796 enum rtx_code sub_code)
27801 bool comparison_p = false;
27803 bool last_arg_constant = false;
27804 int num_memory = 0;
27807 enum machine_mode mode;
27810 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27814 case MULTI_ARG_4_DF2_DI_I:
27815 case MULTI_ARG_4_DF2_DI_I1:
27816 case MULTI_ARG_4_SF2_SI_I:
27817 case MULTI_ARG_4_SF2_SI_I1:
27819 last_arg_constant = true;
27822 case MULTI_ARG_3_SF:
27823 case MULTI_ARG_3_DF:
27824 case MULTI_ARG_3_SF2:
27825 case MULTI_ARG_3_DF2:
27826 case MULTI_ARG_3_DI:
27827 case MULTI_ARG_3_SI:
27828 case MULTI_ARG_3_SI_DI:
27829 case MULTI_ARG_3_HI:
27830 case MULTI_ARG_3_HI_SI:
27831 case MULTI_ARG_3_QI:
27832 case MULTI_ARG_3_DI2:
27833 case MULTI_ARG_3_SI2:
27834 case MULTI_ARG_3_HI2:
27835 case MULTI_ARG_3_QI2:
27839 case MULTI_ARG_2_SF:
27840 case MULTI_ARG_2_DF:
27841 case MULTI_ARG_2_DI:
27842 case MULTI_ARG_2_SI:
27843 case MULTI_ARG_2_HI:
27844 case MULTI_ARG_2_QI:
27848 case MULTI_ARG_2_DI_IMM:
27849 case MULTI_ARG_2_SI_IMM:
27850 case MULTI_ARG_2_HI_IMM:
27851 case MULTI_ARG_2_QI_IMM:
27853 last_arg_constant = true;
27856 case MULTI_ARG_1_SF:
27857 case MULTI_ARG_1_DF:
27858 case MULTI_ARG_1_SF2:
27859 case MULTI_ARG_1_DF2:
27860 case MULTI_ARG_1_DI:
27861 case MULTI_ARG_1_SI:
27862 case MULTI_ARG_1_HI:
27863 case MULTI_ARG_1_QI:
27864 case MULTI_ARG_1_SI_DI:
27865 case MULTI_ARG_1_HI_DI:
27866 case MULTI_ARG_1_HI_SI:
27867 case MULTI_ARG_1_QI_DI:
27868 case MULTI_ARG_1_QI_SI:
27869 case MULTI_ARG_1_QI_HI:
27873 case MULTI_ARG_2_DI_CMP:
27874 case MULTI_ARG_2_SI_CMP:
27875 case MULTI_ARG_2_HI_CMP:
27876 case MULTI_ARG_2_QI_CMP:
27878 comparison_p = true;
27881 case MULTI_ARG_2_SF_TF:
27882 case MULTI_ARG_2_DF_TF:
27883 case MULTI_ARG_2_DI_TF:
27884 case MULTI_ARG_2_SI_TF:
27885 case MULTI_ARG_2_HI_TF:
27886 case MULTI_ARG_2_QI_TF:
27892 gcc_unreachable ();
27895 if (optimize || !target
27896 || GET_MODE (target) != tmode
27897 || !insn_data[icode].operand[0].predicate (target, tmode))
27898 target = gen_reg_rtx (tmode);
27900 gcc_assert (nargs <= 4);
27902 for (i = 0; i < nargs; i++)
27904 tree arg = CALL_EXPR_ARG (exp, i);
27905 rtx op = expand_normal (arg);
27906 int adjust = (comparison_p) ? 1 : 0;
27907 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27909 if (last_arg_constant && i == nargs - 1)
27911 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27913 enum insn_code new_icode = icode;
27916 case CODE_FOR_xop_vpermil2v2df3:
27917 case CODE_FOR_xop_vpermil2v4sf3:
27918 case CODE_FOR_xop_vpermil2v4df3:
27919 case CODE_FOR_xop_vpermil2v8sf3:
27920 error ("the last argument must be a 2-bit immediate");
27921 return gen_reg_rtx (tmode);
27922 case CODE_FOR_xop_rotlv2di3:
27923 new_icode = CODE_FOR_rotlv2di3;
27925 case CODE_FOR_xop_rotlv4si3:
27926 new_icode = CODE_FOR_rotlv4si3;
27928 case CODE_FOR_xop_rotlv8hi3:
27929 new_icode = CODE_FOR_rotlv8hi3;
27931 case CODE_FOR_xop_rotlv16qi3:
27932 new_icode = CODE_FOR_rotlv16qi3;
27934 if (CONST_INT_P (op))
27936 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27937 op = GEN_INT (INTVAL (op) & mask);
27938 gcc_checking_assert
27939 (insn_data[icode].operand[i + 1].predicate (op, mode));
27943 gcc_checking_assert
27945 && insn_data[new_icode].operand[0].mode == tmode
27946 && insn_data[new_icode].operand[1].mode == tmode
27947 && insn_data[new_icode].operand[2].mode == mode
27948 && insn_data[new_icode].operand[0].predicate
27949 == insn_data[icode].operand[0].predicate
27950 && insn_data[new_icode].operand[1].predicate
27951 == insn_data[icode].operand[1].predicate);
27957 gcc_unreachable ();
27964 if (VECTOR_MODE_P (mode))
27965 op = safe_vector_operand (op, mode);
27967 /* If we aren't optimizing, only allow one memory operand to be
27969 if (memory_operand (op, mode))
27972 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
27975 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
27977 op = force_reg (mode, op);
27981 args[i].mode = mode;
27987 pat = GEN_FCN (icode) (target, args[0].op);
27992 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
27993 GEN_INT ((int)sub_code));
27994 else if (! comparison_p)
27995 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27998 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
28002 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
28007 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28011 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
28015 gcc_unreachable ();
28025 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28026 insns with vec_merge. */
28029 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
28033 tree arg0 = CALL_EXPR_ARG (exp, 0);
28034 rtx op1, op0 = expand_normal (arg0);
28035 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28036 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28038 if (optimize || !target
28039 || GET_MODE (target) != tmode
28040 || !insn_data[icode].operand[0].predicate (target, tmode))
28041 target = gen_reg_rtx (tmode);
28043 if (VECTOR_MODE_P (mode0))
28044 op0 = safe_vector_operand (op0, mode0);
28046 if ((optimize && !register_operand (op0, mode0))
28047 || !insn_data[icode].operand[1].predicate (op0, mode0))
28048 op0 = copy_to_mode_reg (mode0, op0);
28051 if (!insn_data[icode].operand[2].predicate (op1, mode0))
28052 op1 = copy_to_mode_reg (mode0, op1);
28054 pat = GEN_FCN (icode) (target, op0, op1);
28061 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28064 ix86_expand_sse_compare (const struct builtin_description *d,
28065 tree exp, rtx target, bool swap)
28068 tree arg0 = CALL_EXPR_ARG (exp, 0);
28069 tree arg1 = CALL_EXPR_ARG (exp, 1);
28070 rtx op0 = expand_normal (arg0);
28071 rtx op1 = expand_normal (arg1);
28073 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28074 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28075 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28076 enum rtx_code comparison = d->comparison;
28078 if (VECTOR_MODE_P (mode0))
28079 op0 = safe_vector_operand (op0, mode0);
28080 if (VECTOR_MODE_P (mode1))
28081 op1 = safe_vector_operand (op1, mode1);
28083 /* Swap operands if we have a comparison that isn't available in
28087 rtx tmp = gen_reg_rtx (mode1);
28088 emit_move_insn (tmp, op1);
28093 if (optimize || !target
28094 || GET_MODE (target) != tmode
28095 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28096 target = gen_reg_rtx (tmode);
28098 if ((optimize && !register_operand (op0, mode0))
28099 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
28100 op0 = copy_to_mode_reg (mode0, op0);
28101 if ((optimize && !register_operand (op1, mode1))
28102 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
28103 op1 = copy_to_mode_reg (mode1, op1);
28105 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
28106 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28113 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28116 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
28120 tree arg0 = CALL_EXPR_ARG (exp, 0);
28121 tree arg1 = CALL_EXPR_ARG (exp, 1);
28122 rtx op0 = expand_normal (arg0);
28123 rtx op1 = expand_normal (arg1);
28124 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28125 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28126 enum rtx_code comparison = d->comparison;
28128 if (VECTOR_MODE_P (mode0))
28129 op0 = safe_vector_operand (op0, mode0);
28130 if (VECTOR_MODE_P (mode1))
28131 op1 = safe_vector_operand (op1, mode1);
28133 /* Swap operands if we have a comparison that isn't available in
28135 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
28142 target = gen_reg_rtx (SImode);
28143 emit_move_insn (target, const0_rtx);
28144 target = gen_rtx_SUBREG (QImode, target, 0);
28146 if ((optimize && !register_operand (op0, mode0))
28147 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28148 op0 = copy_to_mode_reg (mode0, op0);
28149 if ((optimize && !register_operand (op1, mode1))
28150 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28151 op1 = copy_to_mode_reg (mode1, op1);
28153 pat = GEN_FCN (d->icode) (op0, op1);
28157 emit_insn (gen_rtx_SET (VOIDmode,
28158 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28159 gen_rtx_fmt_ee (comparison, QImode,
28163 return SUBREG_REG (target);
28166 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28169 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
28173 tree arg0 = CALL_EXPR_ARG (exp, 0);
28174 rtx op1, op0 = expand_normal (arg0);
28175 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28176 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28178 if (optimize || target == 0
28179 || GET_MODE (target) != tmode
28180 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28181 target = gen_reg_rtx (tmode);
28183 if (VECTOR_MODE_P (mode0))
28184 op0 = safe_vector_operand (op0, mode0);
28186 if ((optimize && !register_operand (op0, mode0))
28187 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28188 op0 = copy_to_mode_reg (mode0, op0);
28190 op1 = GEN_INT (d->comparison);
28192 pat = GEN_FCN (d->icode) (target, op0, op1);
28200 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
28201 tree exp, rtx target)
28204 tree arg0 = CALL_EXPR_ARG (exp, 0);
28205 tree arg1 = CALL_EXPR_ARG (exp, 1);
28206 rtx op0 = expand_normal (arg0);
28207 rtx op1 = expand_normal (arg1);
28209 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28210 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28211 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28213 if (optimize || target == 0
28214 || GET_MODE (target) != tmode
28215 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28216 target = gen_reg_rtx (tmode);
28218 op0 = safe_vector_operand (op0, mode0);
28219 op1 = safe_vector_operand (op1, mode1);
28221 if ((optimize && !register_operand (op0, mode0))
28222 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28223 op0 = copy_to_mode_reg (mode0, op0);
28224 if ((optimize && !register_operand (op1, mode1))
28225 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28226 op1 = copy_to_mode_reg (mode1, op1);
28228 op2 = GEN_INT (d->comparison);
28230 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28237 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28240 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28244 tree arg0 = CALL_EXPR_ARG (exp, 0);
28245 tree arg1 = CALL_EXPR_ARG (exp, 1);
28246 rtx op0 = expand_normal (arg0);
28247 rtx op1 = expand_normal (arg1);
28248 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28249 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28250 enum rtx_code comparison = d->comparison;
28252 if (VECTOR_MODE_P (mode0))
28253 op0 = safe_vector_operand (op0, mode0);
28254 if (VECTOR_MODE_P (mode1))
28255 op1 = safe_vector_operand (op1, mode1);
28257 target = gen_reg_rtx (SImode);
28258 emit_move_insn (target, const0_rtx);
28259 target = gen_rtx_SUBREG (QImode, target, 0);
28261 if ((optimize && !register_operand (op0, mode0))
28262 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28263 op0 = copy_to_mode_reg (mode0, op0);
28264 if ((optimize && !register_operand (op1, mode1))
28265 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28266 op1 = copy_to_mode_reg (mode1, op1);
28268 pat = GEN_FCN (d->icode) (op0, op1);
28272 emit_insn (gen_rtx_SET (VOIDmode,
28273 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28274 gen_rtx_fmt_ee (comparison, QImode,
28278 return SUBREG_REG (target);
28281 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28284 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28285 tree exp, rtx target)
28288 tree arg0 = CALL_EXPR_ARG (exp, 0);
28289 tree arg1 = CALL_EXPR_ARG (exp, 1);
28290 tree arg2 = CALL_EXPR_ARG (exp, 2);
28291 tree arg3 = CALL_EXPR_ARG (exp, 3);
28292 tree arg4 = CALL_EXPR_ARG (exp, 4);
28293 rtx scratch0, scratch1;
28294 rtx op0 = expand_normal (arg0);
28295 rtx op1 = expand_normal (arg1);
28296 rtx op2 = expand_normal (arg2);
28297 rtx op3 = expand_normal (arg3);
28298 rtx op4 = expand_normal (arg4);
28299 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28301 tmode0 = insn_data[d->icode].operand[0].mode;
28302 tmode1 = insn_data[d->icode].operand[1].mode;
28303 modev2 = insn_data[d->icode].operand[2].mode;
28304 modei3 = insn_data[d->icode].operand[3].mode;
28305 modev4 = insn_data[d->icode].operand[4].mode;
28306 modei5 = insn_data[d->icode].operand[5].mode;
28307 modeimm = insn_data[d->icode].operand[6].mode;
28309 if (VECTOR_MODE_P (modev2))
28310 op0 = safe_vector_operand (op0, modev2);
28311 if (VECTOR_MODE_P (modev4))
28312 op2 = safe_vector_operand (op2, modev4);
28314 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28315 op0 = copy_to_mode_reg (modev2, op0);
28316 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28317 op1 = copy_to_mode_reg (modei3, op1);
28318 if ((optimize && !register_operand (op2, modev4))
28319 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28320 op2 = copy_to_mode_reg (modev4, op2);
28321 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28322 op3 = copy_to_mode_reg (modei5, op3);
28324 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28326 error ("the fifth argument must be an 8-bit immediate");
28330 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28332 if (optimize || !target
28333 || GET_MODE (target) != tmode0
28334 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28335 target = gen_reg_rtx (tmode0);
28337 scratch1 = gen_reg_rtx (tmode1);
28339 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28341 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28343 if (optimize || !target
28344 || GET_MODE (target) != tmode1
28345 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28346 target = gen_reg_rtx (tmode1);
28348 scratch0 = gen_reg_rtx (tmode0);
28350 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28354 gcc_assert (d->flag);
28356 scratch0 = gen_reg_rtx (tmode0);
28357 scratch1 = gen_reg_rtx (tmode1);
28359 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28369 target = gen_reg_rtx (SImode);
28370 emit_move_insn (target, const0_rtx);
28371 target = gen_rtx_SUBREG (QImode, target, 0);
28374 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28375 gen_rtx_fmt_ee (EQ, QImode,
28376 gen_rtx_REG ((enum machine_mode) d->flag,
28379 return SUBREG_REG (target);
28386 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28389 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28390 tree exp, rtx target)
28393 tree arg0 = CALL_EXPR_ARG (exp, 0);
28394 tree arg1 = CALL_EXPR_ARG (exp, 1);
28395 tree arg2 = CALL_EXPR_ARG (exp, 2);
28396 rtx scratch0, scratch1;
28397 rtx op0 = expand_normal (arg0);
28398 rtx op1 = expand_normal (arg1);
28399 rtx op2 = expand_normal (arg2);
28400 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28402 tmode0 = insn_data[d->icode].operand[0].mode;
28403 tmode1 = insn_data[d->icode].operand[1].mode;
28404 modev2 = insn_data[d->icode].operand[2].mode;
28405 modev3 = insn_data[d->icode].operand[3].mode;
28406 modeimm = insn_data[d->icode].operand[4].mode;
28408 if (VECTOR_MODE_P (modev2))
28409 op0 = safe_vector_operand (op0, modev2);
28410 if (VECTOR_MODE_P (modev3))
28411 op1 = safe_vector_operand (op1, modev3);
28413 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28414 op0 = copy_to_mode_reg (modev2, op0);
28415 if ((optimize && !register_operand (op1, modev3))
28416 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28417 op1 = copy_to_mode_reg (modev3, op1);
28419 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28421 error ("the third argument must be an 8-bit immediate");
28425 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28427 if (optimize || !target
28428 || GET_MODE (target) != tmode0
28429 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28430 target = gen_reg_rtx (tmode0);
28432 scratch1 = gen_reg_rtx (tmode1);
28434 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28436 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28438 if (optimize || !target
28439 || GET_MODE (target) != tmode1
28440 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28441 target = gen_reg_rtx (tmode1);
28443 scratch0 = gen_reg_rtx (tmode0);
28445 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28449 gcc_assert (d->flag);
28451 scratch0 = gen_reg_rtx (tmode0);
28452 scratch1 = gen_reg_rtx (tmode1);
28454 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28464 target = gen_reg_rtx (SImode);
28465 emit_move_insn (target, const0_rtx);
28466 target = gen_rtx_SUBREG (QImode, target, 0);
28469 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28470 gen_rtx_fmt_ee (EQ, QImode,
28471 gen_rtx_REG ((enum machine_mode) d->flag,
28474 return SUBREG_REG (target);
28480 /* Subroutine of ix86_expand_builtin to take care of insns with
28481 variable number of operands. */
28484 ix86_expand_args_builtin (const struct builtin_description *d,
28485 tree exp, rtx target)
28487 rtx pat, real_target;
28488 unsigned int i, nargs;
28489 unsigned int nargs_constant = 0;
28490 int num_memory = 0;
28494 enum machine_mode mode;
28496 bool last_arg_count = false;
28497 enum insn_code icode = d->icode;
28498 const struct insn_data_d *insn_p = &insn_data[icode];
28499 enum machine_mode tmode = insn_p->operand[0].mode;
28500 enum machine_mode rmode = VOIDmode;
28502 enum rtx_code comparison = d->comparison;
28504 switch ((enum ix86_builtin_func_type) d->flag)
28506 case V2DF_FTYPE_V2DF_ROUND:
28507 case V4DF_FTYPE_V4DF_ROUND:
28508 case V4SF_FTYPE_V4SF_ROUND:
28509 case V8SF_FTYPE_V8SF_ROUND:
28510 case V4SI_FTYPE_V4SF_ROUND:
28511 case V8SI_FTYPE_V8SF_ROUND:
28512 return ix86_expand_sse_round (d, exp, target);
28513 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28514 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28515 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28516 case INT_FTYPE_V8SF_V8SF_PTEST:
28517 case INT_FTYPE_V4DI_V4DI_PTEST:
28518 case INT_FTYPE_V4DF_V4DF_PTEST:
28519 case INT_FTYPE_V4SF_V4SF_PTEST:
28520 case INT_FTYPE_V2DI_V2DI_PTEST:
28521 case INT_FTYPE_V2DF_V2DF_PTEST:
28522 return ix86_expand_sse_ptest (d, exp, target);
28523 case FLOAT128_FTYPE_FLOAT128:
28524 case FLOAT_FTYPE_FLOAT:
28525 case INT_FTYPE_INT:
28526 case UINT64_FTYPE_INT:
28527 case UINT16_FTYPE_UINT16:
28528 case INT64_FTYPE_INT64:
28529 case INT64_FTYPE_V4SF:
28530 case INT64_FTYPE_V2DF:
28531 case INT_FTYPE_V16QI:
28532 case INT_FTYPE_V8QI:
28533 case INT_FTYPE_V8SF:
28534 case INT_FTYPE_V4DF:
28535 case INT_FTYPE_V4SF:
28536 case INT_FTYPE_V2DF:
28537 case INT_FTYPE_V32QI:
28538 case V16QI_FTYPE_V16QI:
28539 case V8SI_FTYPE_V8SF:
28540 case V8SI_FTYPE_V4SI:
28541 case V8HI_FTYPE_V8HI:
28542 case V8HI_FTYPE_V16QI:
28543 case V8QI_FTYPE_V8QI:
28544 case V8SF_FTYPE_V8SF:
28545 case V8SF_FTYPE_V8SI:
28546 case V8SF_FTYPE_V4SF:
28547 case V8SF_FTYPE_V8HI:
28548 case V4SI_FTYPE_V4SI:
28549 case V4SI_FTYPE_V16QI:
28550 case V4SI_FTYPE_V4SF:
28551 case V4SI_FTYPE_V8SI:
28552 case V4SI_FTYPE_V8HI:
28553 case V4SI_FTYPE_V4DF:
28554 case V4SI_FTYPE_V2DF:
28555 case V4HI_FTYPE_V4HI:
28556 case V4DF_FTYPE_V4DF:
28557 case V4DF_FTYPE_V4SI:
28558 case V4DF_FTYPE_V4SF:
28559 case V4DF_FTYPE_V2DF:
28560 case V4SF_FTYPE_V4SF:
28561 case V4SF_FTYPE_V4SI:
28562 case V4SF_FTYPE_V8SF:
28563 case V4SF_FTYPE_V4DF:
28564 case V4SF_FTYPE_V8HI:
28565 case V4SF_FTYPE_V2DF:
28566 case V2DI_FTYPE_V2DI:
28567 case V2DI_FTYPE_V16QI:
28568 case V2DI_FTYPE_V8HI:
28569 case V2DI_FTYPE_V4SI:
28570 case V2DF_FTYPE_V2DF:
28571 case V2DF_FTYPE_V4SI:
28572 case V2DF_FTYPE_V4DF:
28573 case V2DF_FTYPE_V4SF:
28574 case V2DF_FTYPE_V2SI:
28575 case V2SI_FTYPE_V2SI:
28576 case V2SI_FTYPE_V4SF:
28577 case V2SI_FTYPE_V2SF:
28578 case V2SI_FTYPE_V2DF:
28579 case V2SF_FTYPE_V2SF:
28580 case V2SF_FTYPE_V2SI:
28581 case V32QI_FTYPE_V32QI:
28582 case V32QI_FTYPE_V16QI:
28583 case V16HI_FTYPE_V16HI:
28584 case V16HI_FTYPE_V8HI:
28585 case V8SI_FTYPE_V8SI:
28586 case V16HI_FTYPE_V16QI:
28587 case V8SI_FTYPE_V16QI:
28588 case V4DI_FTYPE_V16QI:
28589 case V8SI_FTYPE_V8HI:
28590 case V4DI_FTYPE_V8HI:
28591 case V4DI_FTYPE_V4SI:
28592 case V4DI_FTYPE_V2DI:
28595 case V4SF_FTYPE_V4SF_VEC_MERGE:
28596 case V2DF_FTYPE_V2DF_VEC_MERGE:
28597 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28598 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28599 case V16QI_FTYPE_V16QI_V16QI:
28600 case V16QI_FTYPE_V8HI_V8HI:
28601 case V8QI_FTYPE_V8QI_V8QI:
28602 case V8QI_FTYPE_V4HI_V4HI:
28603 case V8HI_FTYPE_V8HI_V8HI:
28604 case V8HI_FTYPE_V16QI_V16QI:
28605 case V8HI_FTYPE_V4SI_V4SI:
28606 case V8SF_FTYPE_V8SF_V8SF:
28607 case V8SF_FTYPE_V8SF_V8SI:
28608 case V4SI_FTYPE_V4SI_V4SI:
28609 case V4SI_FTYPE_V8HI_V8HI:
28610 case V4SI_FTYPE_V4SF_V4SF:
28611 case V4SI_FTYPE_V2DF_V2DF:
28612 case V4HI_FTYPE_V4HI_V4HI:
28613 case V4HI_FTYPE_V8QI_V8QI:
28614 case V4HI_FTYPE_V2SI_V2SI:
28615 case V4DF_FTYPE_V4DF_V4DF:
28616 case V4DF_FTYPE_V4DF_V4DI:
28617 case V4SF_FTYPE_V4SF_V4SF:
28618 case V4SF_FTYPE_V4SF_V4SI:
28619 case V4SF_FTYPE_V4SF_V2SI:
28620 case V4SF_FTYPE_V4SF_V2DF:
28621 case V4SF_FTYPE_V4SF_DI:
28622 case V4SF_FTYPE_V4SF_SI:
28623 case V2DI_FTYPE_V2DI_V2DI:
28624 case V2DI_FTYPE_V16QI_V16QI:
28625 case V2DI_FTYPE_V4SI_V4SI:
28626 case V2DI_FTYPE_V2DI_V16QI:
28627 case V2DI_FTYPE_V2DF_V2DF:
28628 case V2SI_FTYPE_V2SI_V2SI:
28629 case V2SI_FTYPE_V4HI_V4HI:
28630 case V2SI_FTYPE_V2SF_V2SF:
28631 case V2DF_FTYPE_V2DF_V2DF:
28632 case V2DF_FTYPE_V2DF_V4SF:
28633 case V2DF_FTYPE_V2DF_V2DI:
28634 case V2DF_FTYPE_V2DF_DI:
28635 case V2DF_FTYPE_V2DF_SI:
28636 case V2SF_FTYPE_V2SF_V2SF:
28637 case V1DI_FTYPE_V1DI_V1DI:
28638 case V1DI_FTYPE_V8QI_V8QI:
28639 case V1DI_FTYPE_V2SI_V2SI:
28640 case V32QI_FTYPE_V16HI_V16HI:
28641 case V16HI_FTYPE_V8SI_V8SI:
28642 case V32QI_FTYPE_V32QI_V32QI:
28643 case V16HI_FTYPE_V32QI_V32QI:
28644 case V16HI_FTYPE_V16HI_V16HI:
28645 case V8SI_FTYPE_V4DF_V4DF:
28646 case V8SI_FTYPE_V8SI_V8SI:
28647 case V8SI_FTYPE_V16HI_V16HI:
28648 case V4DI_FTYPE_V4DI_V4DI:
28649 case V4DI_FTYPE_V8SI_V8SI:
28650 if (comparison == UNKNOWN)
28651 return ix86_expand_binop_builtin (icode, exp, target);
28654 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28655 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28656 gcc_assert (comparison != UNKNOWN);
28660 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28661 case V16HI_FTYPE_V16HI_SI_COUNT:
28662 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28663 case V8SI_FTYPE_V8SI_SI_COUNT:
28664 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28665 case V4DI_FTYPE_V4DI_INT_COUNT:
28666 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28667 case V8HI_FTYPE_V8HI_SI_COUNT:
28668 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28669 case V4SI_FTYPE_V4SI_SI_COUNT:
28670 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28671 case V4HI_FTYPE_V4HI_SI_COUNT:
28672 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28673 case V2DI_FTYPE_V2DI_SI_COUNT:
28674 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28675 case V2SI_FTYPE_V2SI_SI_COUNT:
28676 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28677 case V1DI_FTYPE_V1DI_SI_COUNT:
28679 last_arg_count = true;
28681 case UINT64_FTYPE_UINT64_UINT64:
28682 case UINT_FTYPE_UINT_UINT:
28683 case UINT_FTYPE_UINT_USHORT:
28684 case UINT_FTYPE_UINT_UCHAR:
28685 case UINT16_FTYPE_UINT16_INT:
28686 case UINT8_FTYPE_UINT8_INT:
28689 case V2DI_FTYPE_V2DI_INT_CONVERT:
28692 nargs_constant = 1;
28694 case V4DI_FTYPE_V4DI_INT_CONVERT:
28697 nargs_constant = 1;
28699 case V8HI_FTYPE_V8HI_INT:
28700 case V8HI_FTYPE_V8SF_INT:
28701 case V8HI_FTYPE_V4SF_INT:
28702 case V8SF_FTYPE_V8SF_INT:
28703 case V4SI_FTYPE_V4SI_INT:
28704 case V4SI_FTYPE_V8SI_INT:
28705 case V4HI_FTYPE_V4HI_INT:
28706 case V4DF_FTYPE_V4DF_INT:
28707 case V4SF_FTYPE_V4SF_INT:
28708 case V4SF_FTYPE_V8SF_INT:
28709 case V2DI_FTYPE_V2DI_INT:
28710 case V2DF_FTYPE_V2DF_INT:
28711 case V2DF_FTYPE_V4DF_INT:
28712 case V16HI_FTYPE_V16HI_INT:
28713 case V8SI_FTYPE_V8SI_INT:
28714 case V4DI_FTYPE_V4DI_INT:
28715 case V2DI_FTYPE_V4DI_INT:
28717 nargs_constant = 1;
28719 case V16QI_FTYPE_V16QI_V16QI_V16QI:
28720 case V8SF_FTYPE_V8SF_V8SF_V8SF:
28721 case V4DF_FTYPE_V4DF_V4DF_V4DF:
28722 case V4SF_FTYPE_V4SF_V4SF_V4SF:
28723 case V2DF_FTYPE_V2DF_V2DF_V2DF:
28724 case V32QI_FTYPE_V32QI_V32QI_V32QI:
28727 case V32QI_FTYPE_V32QI_V32QI_INT:
28728 case V16HI_FTYPE_V16HI_V16HI_INT:
28729 case V16QI_FTYPE_V16QI_V16QI_INT:
28730 case V4DI_FTYPE_V4DI_V4DI_INT:
28731 case V8HI_FTYPE_V8HI_V8HI_INT:
28732 case V8SI_FTYPE_V8SI_V8SI_INT:
28733 case V8SI_FTYPE_V8SI_V4SI_INT:
28734 case V8SF_FTYPE_V8SF_V8SF_INT:
28735 case V8SF_FTYPE_V8SF_V4SF_INT:
28736 case V4SI_FTYPE_V4SI_V4SI_INT:
28737 case V4DF_FTYPE_V4DF_V4DF_INT:
28738 case V4DF_FTYPE_V4DF_V2DF_INT:
28739 case V4SF_FTYPE_V4SF_V4SF_INT:
28740 case V2DI_FTYPE_V2DI_V2DI_INT:
28741 case V4DI_FTYPE_V4DI_V2DI_INT:
28742 case V2DF_FTYPE_V2DF_V2DF_INT:
28744 nargs_constant = 1;
28746 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
28749 nargs_constant = 1;
28751 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
28754 nargs_constant = 1;
28756 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
28759 nargs_constant = 1;
28761 case V2DI_FTYPE_V2DI_UINT_UINT:
28763 nargs_constant = 2;
28765 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28766 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28767 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28768 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28770 nargs_constant = 1;
28772 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28774 nargs_constant = 2;
28777 gcc_unreachable ();
28780 gcc_assert (nargs <= ARRAY_SIZE (args));
28782 if (comparison != UNKNOWN)
28784 gcc_assert (nargs == 2);
28785 return ix86_expand_sse_compare (d, exp, target, swap);
28788 if (rmode == VOIDmode || rmode == tmode)
28792 || GET_MODE (target) != tmode
28793 || !insn_p->operand[0].predicate (target, tmode))
28794 target = gen_reg_rtx (tmode);
28795 real_target = target;
28799 target = gen_reg_rtx (rmode);
28800 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28803 for (i = 0; i < nargs; i++)
28805 tree arg = CALL_EXPR_ARG (exp, i);
28806 rtx op = expand_normal (arg);
28807 enum machine_mode mode = insn_p->operand[i + 1].mode;
28808 bool match = insn_p->operand[i + 1].predicate (op, mode);
28810 if (last_arg_count && (i + 1) == nargs)
28812 /* SIMD shift insns take either an 8-bit immediate or
28813 register as count. But builtin functions take int as
28814 count. If count doesn't match, we put it in register. */
28817 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28818 if (!insn_p->operand[i + 1].predicate (op, mode))
28819 op = copy_to_reg (op);
28822 else if ((nargs - i) <= nargs_constant)
28827 case CODE_FOR_avx2_inserti128:
28828 case CODE_FOR_avx2_extracti128:
28829 error ("the last argument must be an 1-bit immediate");
28832 case CODE_FOR_sse4_1_roundsd:
28833 case CODE_FOR_sse4_1_roundss:
28835 case CODE_FOR_sse4_1_roundpd:
28836 case CODE_FOR_sse4_1_roundps:
28837 case CODE_FOR_avx_roundpd256:
28838 case CODE_FOR_avx_roundps256:
28840 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
28841 case CODE_FOR_sse4_1_roundps_sfix:
28842 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
28843 case CODE_FOR_avx_roundps_sfix256:
28845 case CODE_FOR_sse4_1_blendps:
28846 case CODE_FOR_avx_blendpd256:
28847 case CODE_FOR_avx_vpermilv4df:
28848 error ("the last argument must be a 4-bit immediate");
28851 case CODE_FOR_sse4_1_blendpd:
28852 case CODE_FOR_avx_vpermilv2df:
28853 case CODE_FOR_xop_vpermil2v2df3:
28854 case CODE_FOR_xop_vpermil2v4sf3:
28855 case CODE_FOR_xop_vpermil2v4df3:
28856 case CODE_FOR_xop_vpermil2v8sf3:
28857 error ("the last argument must be a 2-bit immediate");
28860 case CODE_FOR_avx_vextractf128v4df:
28861 case CODE_FOR_avx_vextractf128v8sf:
28862 case CODE_FOR_avx_vextractf128v8si:
28863 case CODE_FOR_avx_vinsertf128v4df:
28864 case CODE_FOR_avx_vinsertf128v8sf:
28865 case CODE_FOR_avx_vinsertf128v8si:
28866 error ("the last argument must be a 1-bit immediate");
28869 case CODE_FOR_avx_vmcmpv2df3:
28870 case CODE_FOR_avx_vmcmpv4sf3:
28871 case CODE_FOR_avx_cmpv2df3:
28872 case CODE_FOR_avx_cmpv4sf3:
28873 case CODE_FOR_avx_cmpv4df3:
28874 case CODE_FOR_avx_cmpv8sf3:
28875 error ("the last argument must be a 5-bit immediate");
28879 switch (nargs_constant)
28882 if ((nargs - i) == nargs_constant)
28884 error ("the next to last argument must be an 8-bit immediate");
28888 error ("the last argument must be an 8-bit immediate");
28891 gcc_unreachable ();
28898 if (VECTOR_MODE_P (mode))
28899 op = safe_vector_operand (op, mode);
28901 /* If we aren't optimizing, only allow one memory operand to
28903 if (memory_operand (op, mode))
28906 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28908 if (optimize || !match || num_memory > 1)
28909 op = copy_to_mode_reg (mode, op);
28913 op = copy_to_reg (op);
28914 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28919 args[i].mode = mode;
28925 pat = GEN_FCN (icode) (real_target, args[0].op);
28928 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28931 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28935 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28936 args[2].op, args[3].op);
28939 gcc_unreachable ();
28949 /* Subroutine of ix86_expand_builtin to take care of special insns
28950 with variable number of operands. */
28953 ix86_expand_special_args_builtin (const struct builtin_description *d,
28954 tree exp, rtx target)
28958 unsigned int i, nargs, arg_adjust, memory;
28962 enum machine_mode mode;
28964 enum insn_code icode = d->icode;
28965 bool last_arg_constant = false;
28966 const struct insn_data_d *insn_p = &insn_data[icode];
28967 enum machine_mode tmode = insn_p->operand[0].mode;
28968 enum { load, store } klass;
28970 switch ((enum ix86_builtin_func_type) d->flag)
28972 case VOID_FTYPE_VOID:
28973 if (icode == CODE_FOR_avx_vzeroupper)
28974 target = GEN_INT (vzeroupper_intrinsic);
28975 emit_insn (GEN_FCN (icode) (target));
28977 case VOID_FTYPE_UINT64:
28978 case VOID_FTYPE_UNSIGNED:
28983 case UINT64_FTYPE_VOID:
28984 case UNSIGNED_FTYPE_VOID:
28989 case UINT64_FTYPE_PUNSIGNED:
28990 case V2DI_FTYPE_PV2DI:
28991 case V4DI_FTYPE_PV4DI:
28992 case V32QI_FTYPE_PCCHAR:
28993 case V16QI_FTYPE_PCCHAR:
28994 case V8SF_FTYPE_PCV4SF:
28995 case V8SF_FTYPE_PCFLOAT:
28996 case V4SF_FTYPE_PCFLOAT:
28997 case V4DF_FTYPE_PCV2DF:
28998 case V4DF_FTYPE_PCDOUBLE:
28999 case V2DF_FTYPE_PCDOUBLE:
29000 case VOID_FTYPE_PVOID:
29005 case VOID_FTYPE_PV2SF_V4SF:
29006 case VOID_FTYPE_PV4DI_V4DI:
29007 case VOID_FTYPE_PV2DI_V2DI:
29008 case VOID_FTYPE_PCHAR_V32QI:
29009 case VOID_FTYPE_PCHAR_V16QI:
29010 case VOID_FTYPE_PFLOAT_V8SF:
29011 case VOID_FTYPE_PFLOAT_V4SF:
29012 case VOID_FTYPE_PDOUBLE_V4DF:
29013 case VOID_FTYPE_PDOUBLE_V2DF:
29014 case VOID_FTYPE_PLONGLONG_LONGLONG:
29015 case VOID_FTYPE_PULONGLONG_ULONGLONG:
29016 case VOID_FTYPE_PINT_INT:
29019 /* Reserve memory operand for target. */
29020 memory = ARRAY_SIZE (args);
29022 case V4SF_FTYPE_V4SF_PCV2SF:
29023 case V2DF_FTYPE_V2DF_PCDOUBLE:
29028 case V8SF_FTYPE_PCV8SF_V8SI:
29029 case V4DF_FTYPE_PCV4DF_V4DI:
29030 case V4SF_FTYPE_PCV4SF_V4SI:
29031 case V2DF_FTYPE_PCV2DF_V2DI:
29032 case V8SI_FTYPE_PCV8SI_V8SI:
29033 case V4DI_FTYPE_PCV4DI_V4DI:
29034 case V4SI_FTYPE_PCV4SI_V4SI:
29035 case V2DI_FTYPE_PCV2DI_V2DI:
29040 case VOID_FTYPE_PV8SF_V8SI_V8SF:
29041 case VOID_FTYPE_PV4DF_V4DI_V4DF:
29042 case VOID_FTYPE_PV4SF_V4SI_V4SF:
29043 case VOID_FTYPE_PV2DF_V2DI_V2DF:
29044 case VOID_FTYPE_PV8SI_V8SI_V8SI:
29045 case VOID_FTYPE_PV4DI_V4DI_V4DI:
29046 case VOID_FTYPE_PV4SI_V4SI_V4SI:
29047 case VOID_FTYPE_PV2DI_V2DI_V2DI:
29050 /* Reserve memory operand for target. */
29051 memory = ARRAY_SIZE (args);
29053 case VOID_FTYPE_UINT_UINT_UINT:
29054 case VOID_FTYPE_UINT64_UINT_UINT:
29055 case UCHAR_FTYPE_UINT_UINT_UINT:
29056 case UCHAR_FTYPE_UINT64_UINT_UINT:
29059 memory = ARRAY_SIZE (args);
29060 last_arg_constant = true;
29063 gcc_unreachable ();
29066 gcc_assert (nargs <= ARRAY_SIZE (args));
29068 if (klass == store)
29070 arg = CALL_EXPR_ARG (exp, 0);
29071 op = expand_normal (arg);
29072 gcc_assert (target == 0);
29075 if (GET_MODE (op) != Pmode)
29076 op = convert_to_mode (Pmode, op, 1);
29077 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
29080 target = force_reg (tmode, op);
29088 || !register_operand (target, tmode)
29089 || GET_MODE (target) != tmode)
29090 target = gen_reg_rtx (tmode);
29093 for (i = 0; i < nargs; i++)
29095 enum machine_mode mode = insn_p->operand[i + 1].mode;
29098 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
29099 op = expand_normal (arg);
29100 match = insn_p->operand[i + 1].predicate (op, mode);
29102 if (last_arg_constant && (i + 1) == nargs)
29106 if (icode == CODE_FOR_lwp_lwpvalsi3
29107 || icode == CODE_FOR_lwp_lwpinssi3
29108 || icode == CODE_FOR_lwp_lwpvaldi3
29109 || icode == CODE_FOR_lwp_lwpinsdi3)
29110 error ("the last argument must be a 32-bit immediate");
29112 error ("the last argument must be an 8-bit immediate");
29120 /* This must be the memory operand. */
29121 if (GET_MODE (op) != Pmode)
29122 op = convert_to_mode (Pmode, op, 1);
29123 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
29124 gcc_assert (GET_MODE (op) == mode
29125 || GET_MODE (op) == VOIDmode);
29129 /* This must be register. */
29130 if (VECTOR_MODE_P (mode))
29131 op = safe_vector_operand (op, mode);
29133 gcc_assert (GET_MODE (op) == mode
29134 || GET_MODE (op) == VOIDmode);
29135 op = copy_to_mode_reg (mode, op);
29140 args[i].mode = mode;
29146 pat = GEN_FCN (icode) (target);
29149 pat = GEN_FCN (icode) (target, args[0].op);
29152 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29155 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29158 gcc_unreachable ();
29164 return klass == store ? 0 : target;
29167 /* Return the integer constant in ARG. Constrain it to be in the range
29168 of the subparts of VEC_TYPE; issue an error if not. */
29171 get_element_number (tree vec_type, tree arg)
29173 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
29175 if (!host_integerp (arg, 1)
29176 || (elt = tree_low_cst (arg, 1), elt > max))
29178 error ("selector must be an integer constant in the range 0..%wi", max);
29185 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29186 ix86_expand_vector_init. We DO have language-level syntax for this, in
29187 the form of (type){ init-list }. Except that since we can't place emms
29188 instructions from inside the compiler, we can't allow the use of MMX
29189 registers unless the user explicitly asks for it. So we do *not* define
29190 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29191 we have builtins invoked by mmintrin.h that gives us license to emit
29192 these sorts of instructions. */
29195 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
29197 enum machine_mode tmode = TYPE_MODE (type);
29198 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
29199 int i, n_elt = GET_MODE_NUNITS (tmode);
29200 rtvec v = rtvec_alloc (n_elt);
29202 gcc_assert (VECTOR_MODE_P (tmode));
29203 gcc_assert (call_expr_nargs (exp) == n_elt);
29205 for (i = 0; i < n_elt; ++i)
29207 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29208 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29211 if (!target || !register_operand (target, tmode))
29212 target = gen_reg_rtx (tmode);
29214 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29218 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29219 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29220 had a language-level syntax for referencing vector elements. */
29223 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29225 enum machine_mode tmode, mode0;
29230 arg0 = CALL_EXPR_ARG (exp, 0);
29231 arg1 = CALL_EXPR_ARG (exp, 1);
29233 op0 = expand_normal (arg0);
29234 elt = get_element_number (TREE_TYPE (arg0), arg1);
29236 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29237 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29238 gcc_assert (VECTOR_MODE_P (mode0));
29240 op0 = force_reg (mode0, op0);
29242 if (optimize || !target || !register_operand (target, tmode))
29243 target = gen_reg_rtx (tmode);
29245 ix86_expand_vector_extract (true, target, op0, elt);
29250 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29251 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29252 a language-level syntax for referencing vector elements. */
29255 ix86_expand_vec_set_builtin (tree exp)
29257 enum machine_mode tmode, mode1;
29258 tree arg0, arg1, arg2;
29260 rtx op0, op1, target;
29262 arg0 = CALL_EXPR_ARG (exp, 0);
29263 arg1 = CALL_EXPR_ARG (exp, 1);
29264 arg2 = CALL_EXPR_ARG (exp, 2);
29266 tmode = TYPE_MODE (TREE_TYPE (arg0));
29267 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29268 gcc_assert (VECTOR_MODE_P (tmode));
29270 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29271 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29272 elt = get_element_number (TREE_TYPE (arg0), arg2);
29274 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29275 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29277 op0 = force_reg (tmode, op0);
29278 op1 = force_reg (mode1, op1);
29280 /* OP0 is the source of these builtin functions and shouldn't be
29281 modified. Create a copy, use it and return it as target. */
29282 target = gen_reg_rtx (tmode);
29283 emit_move_insn (target, op0);
29284 ix86_expand_vector_set (true, target, op1, elt);
29289 /* Expand an expression EXP that calls a built-in function,
29290 with result going to TARGET if that's convenient
29291 (and in mode MODE if that's convenient).
29292 SUBTARGET may be used as the target for computing one of EXP's operands.
29293 IGNORE is nonzero if the value is to be ignored. */
29296 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29297 enum machine_mode mode ATTRIBUTE_UNUSED,
29298 int ignore ATTRIBUTE_UNUSED)
29300 const struct builtin_description *d;
29302 enum insn_code icode;
29303 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29304 tree arg0, arg1, arg2, arg3, arg4;
29305 rtx op0, op1, op2, op3, op4, pat;
29306 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29307 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29309 /* Determine whether the builtin function is available under the current ISA.
29310 Originally the builtin was not created if it wasn't applicable to the
29311 current ISA based on the command line switches. With function specific
29312 options, we need to check in the context of the function making the call
29313 whether it is supported. */
29314 if (ix86_builtins_isa[fcode].isa
29315 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29317 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29318 NULL, (enum fpmath_unit) 0, false);
29321 error ("%qE needs unknown isa option", fndecl);
29324 gcc_assert (opts != NULL);
29325 error ("%qE needs isa option %s", fndecl, opts);
29333 case IX86_BUILTIN_MASKMOVQ:
29334 case IX86_BUILTIN_MASKMOVDQU:
29335 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29336 ? CODE_FOR_mmx_maskmovq
29337 : CODE_FOR_sse2_maskmovdqu);
29338 /* Note the arg order is different from the operand order. */
29339 arg1 = CALL_EXPR_ARG (exp, 0);
29340 arg2 = CALL_EXPR_ARG (exp, 1);
29341 arg0 = CALL_EXPR_ARG (exp, 2);
29342 op0 = expand_normal (arg0);
29343 op1 = expand_normal (arg1);
29344 op2 = expand_normal (arg2);
29345 mode0 = insn_data[icode].operand[0].mode;
29346 mode1 = insn_data[icode].operand[1].mode;
29347 mode2 = insn_data[icode].operand[2].mode;
29349 if (GET_MODE (op0) != Pmode)
29350 op0 = convert_to_mode (Pmode, op0, 1);
29351 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29353 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29354 op0 = copy_to_mode_reg (mode0, op0);
29355 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29356 op1 = copy_to_mode_reg (mode1, op1);
29357 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29358 op2 = copy_to_mode_reg (mode2, op2);
29359 pat = GEN_FCN (icode) (op0, op1, op2);
29365 case IX86_BUILTIN_LDMXCSR:
29366 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29367 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29368 emit_move_insn (target, op0);
29369 emit_insn (gen_sse_ldmxcsr (target));
29372 case IX86_BUILTIN_STMXCSR:
29373 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29374 emit_insn (gen_sse_stmxcsr (target));
29375 return copy_to_mode_reg (SImode, target);
29377 case IX86_BUILTIN_CLFLUSH:
29378 arg0 = CALL_EXPR_ARG (exp, 0);
29379 op0 = expand_normal (arg0);
29380 icode = CODE_FOR_sse2_clflush;
29381 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29383 if (GET_MODE (op0) != Pmode)
29384 op0 = convert_to_mode (Pmode, op0, 1);
29385 op0 = force_reg (Pmode, op0);
29388 emit_insn (gen_sse2_clflush (op0));
29391 case IX86_BUILTIN_MONITOR:
29392 arg0 = CALL_EXPR_ARG (exp, 0);
29393 arg1 = CALL_EXPR_ARG (exp, 1);
29394 arg2 = CALL_EXPR_ARG (exp, 2);
29395 op0 = expand_normal (arg0);
29396 op1 = expand_normal (arg1);
29397 op2 = expand_normal (arg2);
29400 if (GET_MODE (op0) != Pmode)
29401 op0 = convert_to_mode (Pmode, op0, 1);
29402 op0 = force_reg (Pmode, op0);
29405 op1 = copy_to_mode_reg (SImode, op1);
29407 op2 = copy_to_mode_reg (SImode, op2);
29408 emit_insn (ix86_gen_monitor (op0, op1, op2));
29411 case IX86_BUILTIN_MWAIT:
29412 arg0 = CALL_EXPR_ARG (exp, 0);
29413 arg1 = CALL_EXPR_ARG (exp, 1);
29414 op0 = expand_normal (arg0);
29415 op1 = expand_normal (arg1);
29417 op0 = copy_to_mode_reg (SImode, op0);
29419 op1 = copy_to_mode_reg (SImode, op1);
29420 emit_insn (gen_sse3_mwait (op0, op1));
29423 case IX86_BUILTIN_VEC_INIT_V2SI:
29424 case IX86_BUILTIN_VEC_INIT_V4HI:
29425 case IX86_BUILTIN_VEC_INIT_V8QI:
29426 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29428 case IX86_BUILTIN_VEC_EXT_V2DF:
29429 case IX86_BUILTIN_VEC_EXT_V2DI:
29430 case IX86_BUILTIN_VEC_EXT_V4SF:
29431 case IX86_BUILTIN_VEC_EXT_V4SI:
29432 case IX86_BUILTIN_VEC_EXT_V8HI:
29433 case IX86_BUILTIN_VEC_EXT_V2SI:
29434 case IX86_BUILTIN_VEC_EXT_V4HI:
29435 case IX86_BUILTIN_VEC_EXT_V16QI:
29436 return ix86_expand_vec_ext_builtin (exp, target);
29438 case IX86_BUILTIN_VEC_SET_V2DI:
29439 case IX86_BUILTIN_VEC_SET_V4SF:
29440 case IX86_BUILTIN_VEC_SET_V4SI:
29441 case IX86_BUILTIN_VEC_SET_V8HI:
29442 case IX86_BUILTIN_VEC_SET_V4HI:
29443 case IX86_BUILTIN_VEC_SET_V16QI:
29444 return ix86_expand_vec_set_builtin (exp);
29446 case IX86_BUILTIN_INFQ:
29447 case IX86_BUILTIN_HUGE_VALQ:
29449 REAL_VALUE_TYPE inf;
29453 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29455 tmp = validize_mem (force_const_mem (mode, tmp));
29458 target = gen_reg_rtx (mode);
29460 emit_move_insn (target, tmp);
29464 case IX86_BUILTIN_LLWPCB:
29465 arg0 = CALL_EXPR_ARG (exp, 0);
29466 op0 = expand_normal (arg0);
29467 icode = CODE_FOR_lwp_llwpcb;
29468 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29470 if (GET_MODE (op0) != Pmode)
29471 op0 = convert_to_mode (Pmode, op0, 1);
29472 op0 = force_reg (Pmode, op0);
29474 emit_insn (gen_lwp_llwpcb (op0));
29477 case IX86_BUILTIN_SLWPCB:
29478 icode = CODE_FOR_lwp_slwpcb;
29480 || !insn_data[icode].operand[0].predicate (target, Pmode))
29481 target = gen_reg_rtx (Pmode);
29482 emit_insn (gen_lwp_slwpcb (target));
29485 case IX86_BUILTIN_BEXTRI32:
29486 case IX86_BUILTIN_BEXTRI64:
29487 arg0 = CALL_EXPR_ARG (exp, 0);
29488 arg1 = CALL_EXPR_ARG (exp, 1);
29489 op0 = expand_normal (arg0);
29490 op1 = expand_normal (arg1);
29491 icode = (fcode == IX86_BUILTIN_BEXTRI32
29492 ? CODE_FOR_tbm_bextri_si
29493 : CODE_FOR_tbm_bextri_di);
29494 if (!CONST_INT_P (op1))
29496 error ("last argument must be an immediate");
29501 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29502 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29503 op1 = GEN_INT (length);
29504 op2 = GEN_INT (lsb_index);
29505 pat = GEN_FCN (icode) (target, op0, op1, op2);
29511 case IX86_BUILTIN_RDRAND16_STEP:
29512 icode = CODE_FOR_rdrandhi_1;
29516 case IX86_BUILTIN_RDRAND32_STEP:
29517 icode = CODE_FOR_rdrandsi_1;
29521 case IX86_BUILTIN_RDRAND64_STEP:
29522 icode = CODE_FOR_rdranddi_1;
29526 op0 = gen_reg_rtx (mode0);
29527 emit_insn (GEN_FCN (icode) (op0));
29529 arg0 = CALL_EXPR_ARG (exp, 0);
29530 op1 = expand_normal (arg0);
29531 if (!address_operand (op1, VOIDmode))
29533 op1 = convert_memory_address (Pmode, op1);
29534 op1 = copy_addr_to_reg (op1);
29536 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29538 op1 = gen_reg_rtx (SImode);
29539 emit_move_insn (op1, CONST1_RTX (SImode));
29541 /* Emit SImode conditional move. */
29542 if (mode0 == HImode)
29544 op2 = gen_reg_rtx (SImode);
29545 emit_insn (gen_zero_extendhisi2 (op2, op0));
29547 else if (mode0 == SImode)
29550 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29553 target = gen_reg_rtx (SImode);
29555 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29557 emit_insn (gen_rtx_SET (VOIDmode, target,
29558 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29561 case IX86_BUILTIN_GATHERSIV2DF:
29562 icode = CODE_FOR_avx2_gathersiv2df;
29564 case IX86_BUILTIN_GATHERSIV4DF:
29565 icode = CODE_FOR_avx2_gathersiv4df;
29567 case IX86_BUILTIN_GATHERDIV2DF:
29568 icode = CODE_FOR_avx2_gatherdiv2df;
29570 case IX86_BUILTIN_GATHERDIV4DF:
29571 icode = CODE_FOR_avx2_gatherdiv4df;
29573 case IX86_BUILTIN_GATHERSIV4SF:
29574 icode = CODE_FOR_avx2_gathersiv4sf;
29576 case IX86_BUILTIN_GATHERSIV8SF:
29577 icode = CODE_FOR_avx2_gathersiv8sf;
29579 case IX86_BUILTIN_GATHERDIV4SF:
29580 icode = CODE_FOR_avx2_gatherdiv4sf;
29582 case IX86_BUILTIN_GATHERDIV8SF:
29583 icode = CODE_FOR_avx2_gatherdiv8sf;
29585 case IX86_BUILTIN_GATHERSIV2DI:
29586 icode = CODE_FOR_avx2_gathersiv2di;
29588 case IX86_BUILTIN_GATHERSIV4DI:
29589 icode = CODE_FOR_avx2_gathersiv4di;
29591 case IX86_BUILTIN_GATHERDIV2DI:
29592 icode = CODE_FOR_avx2_gatherdiv2di;
29594 case IX86_BUILTIN_GATHERDIV4DI:
29595 icode = CODE_FOR_avx2_gatherdiv4di;
29597 case IX86_BUILTIN_GATHERSIV4SI:
29598 icode = CODE_FOR_avx2_gathersiv4si;
29600 case IX86_BUILTIN_GATHERSIV8SI:
29601 icode = CODE_FOR_avx2_gathersiv8si;
29603 case IX86_BUILTIN_GATHERDIV4SI:
29604 icode = CODE_FOR_avx2_gatherdiv4si;
29606 case IX86_BUILTIN_GATHERDIV8SI:
29607 icode = CODE_FOR_avx2_gatherdiv8si;
29609 case IX86_BUILTIN_GATHERALTSIV4DF:
29610 icode = CODE_FOR_avx2_gathersiv4df;
29612 case IX86_BUILTIN_GATHERALTDIV8SF:
29613 icode = CODE_FOR_avx2_gatherdiv8sf;
29615 case IX86_BUILTIN_GATHERALTSIV4DI:
29616 icode = CODE_FOR_avx2_gathersiv4di;
29618 case IX86_BUILTIN_GATHERALTDIV8SI:
29619 icode = CODE_FOR_avx2_gatherdiv8si;
29623 arg0 = CALL_EXPR_ARG (exp, 0);
29624 arg1 = CALL_EXPR_ARG (exp, 1);
29625 arg2 = CALL_EXPR_ARG (exp, 2);
29626 arg3 = CALL_EXPR_ARG (exp, 3);
29627 arg4 = CALL_EXPR_ARG (exp, 4);
29628 op0 = expand_normal (arg0);
29629 op1 = expand_normal (arg1);
29630 op2 = expand_normal (arg2);
29631 op3 = expand_normal (arg3);
29632 op4 = expand_normal (arg4);
29633 /* Note the arg order is different from the operand order. */
29634 mode0 = insn_data[icode].operand[1].mode;
29635 mode2 = insn_data[icode].operand[3].mode;
29636 mode3 = insn_data[icode].operand[4].mode;
29637 mode4 = insn_data[icode].operand[5].mode;
29639 if (target == NULL_RTX
29640 || GET_MODE (target) != insn_data[icode].operand[0].mode)
29641 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29643 subtarget = target;
29645 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29646 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29648 rtx half = gen_reg_rtx (V4SImode);
29649 if (!nonimmediate_operand (op2, V8SImode))
29650 op2 = copy_to_mode_reg (V8SImode, op2);
29651 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29654 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29655 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29657 rtx (*gen) (rtx, rtx);
29658 rtx half = gen_reg_rtx (mode0);
29659 if (mode0 == V4SFmode)
29660 gen = gen_vec_extract_lo_v8sf;
29662 gen = gen_vec_extract_lo_v8si;
29663 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29664 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29665 emit_insn (gen (half, op0));
29667 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29668 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29669 emit_insn (gen (half, op3));
29673 /* Force memory operand only with base register here. But we
29674 don't want to do it on memory operand for other builtin
29676 if (GET_MODE (op1) != Pmode)
29677 op1 = convert_to_mode (Pmode, op1, 1);
29678 op1 = force_reg (Pmode, op1);
29680 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29681 op0 = copy_to_mode_reg (mode0, op0);
29682 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29683 op1 = copy_to_mode_reg (Pmode, op1);
29684 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29685 op2 = copy_to_mode_reg (mode2, op2);
29686 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29687 op3 = copy_to_mode_reg (mode3, op3);
29688 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29690 error ("last argument must be scale 1, 2, 4, 8");
29694 /* Optimize. If mask is known to have all high bits set,
29695 replace op0 with pc_rtx to signal that the instruction
29696 overwrites the whole destination and doesn't use its
29697 previous contents. */
29700 if (TREE_CODE (arg3) == VECTOR_CST)
29703 unsigned int negative = 0;
29704 for (elt = TREE_VECTOR_CST_ELTS (arg3);
29705 elt; elt = TREE_CHAIN (elt))
29707 tree cst = TREE_VALUE (elt);
29708 if (TREE_CODE (cst) == INTEGER_CST
29709 && tree_int_cst_sign_bit (cst))
29711 else if (TREE_CODE (cst) == REAL_CST
29712 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
29715 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
29718 else if (TREE_CODE (arg3) == SSA_NAME)
29720 /* Recognize also when mask is like:
29721 __v2df src = _mm_setzero_pd ();
29722 __v2df mask = _mm_cmpeq_pd (src, src);
29724 __v8sf src = _mm256_setzero_ps ();
29725 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29726 as that is a cheaper way to load all ones into
29727 a register than having to load a constant from
29729 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
29730 if (is_gimple_call (def_stmt))
29732 tree fndecl = gimple_call_fndecl (def_stmt);
29734 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
29735 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
29737 case IX86_BUILTIN_CMPPD:
29738 case IX86_BUILTIN_CMPPS:
29739 case IX86_BUILTIN_CMPPD256:
29740 case IX86_BUILTIN_CMPPS256:
29741 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
29744 case IX86_BUILTIN_CMPEQPD:
29745 case IX86_BUILTIN_CMPEQPS:
29746 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
29747 && initializer_zerop (gimple_call_arg (def_stmt,
29758 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
29763 if (fcode == IX86_BUILTIN_GATHERDIV8SF
29764 || fcode == IX86_BUILTIN_GATHERDIV8SI)
29766 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
29767 ? V4SFmode : V4SImode;
29768 if (target == NULL_RTX)
29769 target = gen_reg_rtx (tmode);
29770 if (tmode == V4SFmode)
29771 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
29773 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
29776 target = subtarget;
29784 for (i = 0, d = bdesc_special_args;
29785 i < ARRAY_SIZE (bdesc_special_args);
29787 if (d->code == fcode)
29788 return ix86_expand_special_args_builtin (d, exp, target);
29790 for (i = 0, d = bdesc_args;
29791 i < ARRAY_SIZE (bdesc_args);
29793 if (d->code == fcode)
29796 case IX86_BUILTIN_FABSQ:
29797 case IX86_BUILTIN_COPYSIGNQ:
29799 /* Emit a normal call if SSE2 isn't available. */
29800 return expand_call (exp, target, ignore);
29802 return ix86_expand_args_builtin (d, exp, target);
29805 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29806 if (d->code == fcode)
29807 return ix86_expand_sse_comi (d, exp, target);
29809 for (i = 0, d = bdesc_pcmpestr;
29810 i < ARRAY_SIZE (bdesc_pcmpestr);
29812 if (d->code == fcode)
29813 return ix86_expand_sse_pcmpestr (d, exp, target);
29815 for (i = 0, d = bdesc_pcmpistr;
29816 i < ARRAY_SIZE (bdesc_pcmpistr);
29818 if (d->code == fcode)
29819 return ix86_expand_sse_pcmpistr (d, exp, target);
29821 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
29822 if (d->code == fcode)
29823 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
29824 (enum ix86_builtin_func_type)
29825 d->flag, d->comparison);
29827 gcc_unreachable ();
29830 /* Returns a function decl for a vectorized version of the builtin function
29831 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29832 if it is not available. */
29835 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
29838 enum machine_mode in_mode, out_mode;
29840 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29842 if (TREE_CODE (type_out) != VECTOR_TYPE
29843 || TREE_CODE (type_in) != VECTOR_TYPE
29844 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
29847 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29848 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29849 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29850 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29854 case BUILT_IN_SQRT:
29855 if (out_mode == DFmode && in_mode == DFmode)
29857 if (out_n == 2 && in_n == 2)
29858 return ix86_builtins[IX86_BUILTIN_SQRTPD];
29859 else if (out_n == 4 && in_n == 4)
29860 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
29864 case BUILT_IN_SQRTF:
29865 if (out_mode == SFmode && in_mode == SFmode)
29867 if (out_n == 4 && in_n == 4)
29868 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
29869 else if (out_n == 8 && in_n == 8)
29870 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
29874 case BUILT_IN_IFLOOR:
29875 case BUILT_IN_LFLOOR:
29876 case BUILT_IN_LLFLOOR:
29877 /* The round insn does not trap on denormals. */
29878 if (flag_trapping_math || !TARGET_ROUND)
29881 if (out_mode == SImode && in_mode == DFmode)
29883 if (out_n == 4 && in_n == 2)
29884 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
29885 else if (out_n == 8 && in_n == 4)
29886 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
29890 case BUILT_IN_IFLOORF:
29891 case BUILT_IN_LFLOORF:
29892 case BUILT_IN_LLFLOORF:
29893 /* The round insn does not trap on denormals. */
29894 if (flag_trapping_math || !TARGET_ROUND)
29897 if (out_mode == SImode && in_mode == SFmode)
29899 if (out_n == 4 && in_n == 4)
29900 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
29901 else if (out_n == 8 && in_n == 8)
29902 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
29906 case BUILT_IN_ICEIL:
29907 case BUILT_IN_LCEIL:
29908 case BUILT_IN_LLCEIL:
29909 /* The round insn does not trap on denormals. */
29910 if (flag_trapping_math || !TARGET_ROUND)
29913 if (out_mode == SImode && in_mode == DFmode)
29915 if (out_n == 4 && in_n == 2)
29916 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
29917 else if (out_n == 8 && in_n == 4)
29918 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
29922 case BUILT_IN_ICEILF:
29923 case BUILT_IN_LCEILF:
29924 case BUILT_IN_LLCEILF:
29925 /* The round insn does not trap on denormals. */
29926 if (flag_trapping_math || !TARGET_ROUND)
29929 if (out_mode == SImode && in_mode == SFmode)
29931 if (out_n == 4 && in_n == 4)
29932 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
29933 else if (out_n == 8 && in_n == 8)
29934 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
29938 case BUILT_IN_IRINT:
29939 case BUILT_IN_LRINT:
29940 case BUILT_IN_LLRINT:
29941 if (out_mode == SImode && in_mode == DFmode)
29943 if (out_n == 4 && in_n == 2)
29944 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
29945 else if (out_n == 8 && in_n == 4)
29946 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
29950 case BUILT_IN_IRINTF:
29951 case BUILT_IN_LRINTF:
29952 case BUILT_IN_LLRINTF:
29953 if (out_mode == SImode && in_mode == SFmode)
29955 if (out_n == 4 && in_n == 4)
29956 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
29957 else if (out_n == 8 && in_n == 8)
29958 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
29962 case BUILT_IN_IROUND:
29963 case BUILT_IN_LROUND:
29964 case BUILT_IN_LLROUND:
29965 /* The round insn does not trap on denormals. */
29966 if (flag_trapping_math || !TARGET_ROUND)
29969 if (out_mode == SImode && in_mode == DFmode)
29971 if (out_n == 4 && in_n == 2)
29972 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
29973 else if (out_n == 8 && in_n == 4)
29974 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
29978 case BUILT_IN_IROUNDF:
29979 case BUILT_IN_LROUNDF:
29980 case BUILT_IN_LLROUNDF:
29981 /* The round insn does not trap on denormals. */
29982 if (flag_trapping_math || !TARGET_ROUND)
29985 if (out_mode == SImode && in_mode == SFmode)
29987 if (out_n == 4 && in_n == 4)
29988 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
29989 else if (out_n == 8 && in_n == 8)
29990 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
29994 case BUILT_IN_COPYSIGN:
29995 if (out_mode == DFmode && in_mode == DFmode)
29997 if (out_n == 2 && in_n == 2)
29998 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
29999 else if (out_n == 4 && in_n == 4)
30000 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
30004 case BUILT_IN_COPYSIGNF:
30005 if (out_mode == SFmode && in_mode == SFmode)
30007 if (out_n == 4 && in_n == 4)
30008 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
30009 else if (out_n == 8 && in_n == 8)
30010 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
30014 case BUILT_IN_FLOOR:
30015 /* The round insn does not trap on denormals. */
30016 if (flag_trapping_math || !TARGET_ROUND)
30019 if (out_mode == DFmode && in_mode == DFmode)
30021 if (out_n == 2 && in_n == 2)
30022 return ix86_builtins[IX86_BUILTIN_FLOORPD];
30023 else if (out_n == 4 && in_n == 4)
30024 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
30028 case BUILT_IN_FLOORF:
30029 /* The round insn does not trap on denormals. */
30030 if (flag_trapping_math || !TARGET_ROUND)
30033 if (out_mode == SFmode && in_mode == SFmode)
30035 if (out_n == 4 && in_n == 4)
30036 return ix86_builtins[IX86_BUILTIN_FLOORPS];
30037 else if (out_n == 8 && in_n == 8)
30038 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
30042 case BUILT_IN_CEIL:
30043 /* The round insn does not trap on denormals. */
30044 if (flag_trapping_math || !TARGET_ROUND)
30047 if (out_mode == DFmode && in_mode == DFmode)
30049 if (out_n == 2 && in_n == 2)
30050 return ix86_builtins[IX86_BUILTIN_CEILPD];
30051 else if (out_n == 4 && in_n == 4)
30052 return ix86_builtins[IX86_BUILTIN_CEILPD256];
30056 case BUILT_IN_CEILF:
30057 /* The round insn does not trap on denormals. */
30058 if (flag_trapping_math || !TARGET_ROUND)
30061 if (out_mode == SFmode && in_mode == SFmode)
30063 if (out_n == 4 && in_n == 4)
30064 return ix86_builtins[IX86_BUILTIN_CEILPS];
30065 else if (out_n == 8 && in_n == 8)
30066 return ix86_builtins[IX86_BUILTIN_CEILPS256];
30070 case BUILT_IN_TRUNC:
30071 /* The round insn does not trap on denormals. */
30072 if (flag_trapping_math || !TARGET_ROUND)
30075 if (out_mode == DFmode && in_mode == DFmode)
30077 if (out_n == 2 && in_n == 2)
30078 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
30079 else if (out_n == 4 && in_n == 4)
30080 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
30084 case BUILT_IN_TRUNCF:
30085 /* The round insn does not trap on denormals. */
30086 if (flag_trapping_math || !TARGET_ROUND)
30089 if (out_mode == SFmode && in_mode == SFmode)
30091 if (out_n == 4 && in_n == 4)
30092 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
30093 else if (out_n == 8 && in_n == 8)
30094 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
30098 case BUILT_IN_RINT:
30099 /* The round insn does not trap on denormals. */
30100 if (flag_trapping_math || !TARGET_ROUND)
30103 if (out_mode == DFmode && in_mode == DFmode)
30105 if (out_n == 2 && in_n == 2)
30106 return ix86_builtins[IX86_BUILTIN_RINTPD];
30107 else if (out_n == 4 && in_n == 4)
30108 return ix86_builtins[IX86_BUILTIN_RINTPD256];
30112 case BUILT_IN_RINTF:
30113 /* The round insn does not trap on denormals. */
30114 if (flag_trapping_math || !TARGET_ROUND)
30117 if (out_mode == SFmode && in_mode == SFmode)
30119 if (out_n == 4 && in_n == 4)
30120 return ix86_builtins[IX86_BUILTIN_RINTPS];
30121 else if (out_n == 8 && in_n == 8)
30122 return ix86_builtins[IX86_BUILTIN_RINTPS256];
30126 case BUILT_IN_ROUND:
30127 /* The round insn does not trap on denormals. */
30128 if (flag_trapping_math || !TARGET_ROUND)
30131 if (out_mode == DFmode && in_mode == DFmode)
30133 if (out_n == 2 && in_n == 2)
30134 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
30135 else if (out_n == 4 && in_n == 4)
30136 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
30140 case BUILT_IN_ROUNDF:
30141 /* The round insn does not trap on denormals. */
30142 if (flag_trapping_math || !TARGET_ROUND)
30145 if (out_mode == SFmode && in_mode == SFmode)
30147 if (out_n == 4 && in_n == 4)
30148 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
30149 else if (out_n == 8 && in_n == 8)
30150 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
30155 if (out_mode == DFmode && in_mode == DFmode)
30157 if (out_n == 2 && in_n == 2)
30158 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
30159 if (out_n == 4 && in_n == 4)
30160 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
30164 case BUILT_IN_FMAF:
30165 if (out_mode == SFmode && in_mode == SFmode)
30167 if (out_n == 4 && in_n == 4)
30168 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
30169 if (out_n == 8 && in_n == 8)
30170 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
30178 /* Dispatch to a handler for a vectorization library. */
30179 if (ix86_veclib_handler)
30180 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
30186 /* Handler for an SVML-style interface to
30187 a library with vectorized intrinsics. */
30190 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
30193 tree fntype, new_fndecl, args;
30196 enum machine_mode el_mode, in_mode;
30199 /* The SVML is suitable for unsafe math only. */
30200 if (!flag_unsafe_math_optimizations)
30203 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30204 n = TYPE_VECTOR_SUBPARTS (type_out);
30205 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30206 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30207 if (el_mode != in_mode
30215 case BUILT_IN_LOG10:
30217 case BUILT_IN_TANH:
30219 case BUILT_IN_ATAN:
30220 case BUILT_IN_ATAN2:
30221 case BUILT_IN_ATANH:
30222 case BUILT_IN_CBRT:
30223 case BUILT_IN_SINH:
30225 case BUILT_IN_ASINH:
30226 case BUILT_IN_ASIN:
30227 case BUILT_IN_COSH:
30229 case BUILT_IN_ACOSH:
30230 case BUILT_IN_ACOS:
30231 if (el_mode != DFmode || n != 2)
30235 case BUILT_IN_EXPF:
30236 case BUILT_IN_LOGF:
30237 case BUILT_IN_LOG10F:
30238 case BUILT_IN_POWF:
30239 case BUILT_IN_TANHF:
30240 case BUILT_IN_TANF:
30241 case BUILT_IN_ATANF:
30242 case BUILT_IN_ATAN2F:
30243 case BUILT_IN_ATANHF:
30244 case BUILT_IN_CBRTF:
30245 case BUILT_IN_SINHF:
30246 case BUILT_IN_SINF:
30247 case BUILT_IN_ASINHF:
30248 case BUILT_IN_ASINF:
30249 case BUILT_IN_COSHF:
30250 case BUILT_IN_COSF:
30251 case BUILT_IN_ACOSHF:
30252 case BUILT_IN_ACOSF:
30253 if (el_mode != SFmode || n != 4)
30261 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30263 if (fn == BUILT_IN_LOGF)
30264 strcpy (name, "vmlsLn4");
30265 else if (fn == BUILT_IN_LOG)
30266 strcpy (name, "vmldLn2");
30269 sprintf (name, "vmls%s", bname+10);
30270 name[strlen (name)-1] = '4';
30273 sprintf (name, "vmld%s2", bname+10);
30275 /* Convert to uppercase. */
30279 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30281 args = TREE_CHAIN (args))
30285 fntype = build_function_type_list (type_out, type_in, NULL);
30287 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30289 /* Build a function declaration for the vectorized function. */
30290 new_fndecl = build_decl (BUILTINS_LOCATION,
30291 FUNCTION_DECL, get_identifier (name), fntype);
30292 TREE_PUBLIC (new_fndecl) = 1;
30293 DECL_EXTERNAL (new_fndecl) = 1;
30294 DECL_IS_NOVOPS (new_fndecl) = 1;
30295 TREE_READONLY (new_fndecl) = 1;
30300 /* Handler for an ACML-style interface to
30301 a library with vectorized intrinsics. */
30304 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30306 char name[20] = "__vr.._";
30307 tree fntype, new_fndecl, args;
30310 enum machine_mode el_mode, in_mode;
30313 /* The ACML is 64bits only and suitable for unsafe math only as
30314 it does not correctly support parts of IEEE with the required
30315 precision such as denormals. */
30317 || !flag_unsafe_math_optimizations)
30320 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30321 n = TYPE_VECTOR_SUBPARTS (type_out);
30322 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30323 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30324 if (el_mode != in_mode
30334 case BUILT_IN_LOG2:
30335 case BUILT_IN_LOG10:
30338 if (el_mode != DFmode
30343 case BUILT_IN_SINF:
30344 case BUILT_IN_COSF:
30345 case BUILT_IN_EXPF:
30346 case BUILT_IN_POWF:
30347 case BUILT_IN_LOGF:
30348 case BUILT_IN_LOG2F:
30349 case BUILT_IN_LOG10F:
30352 if (el_mode != SFmode
30361 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30362 sprintf (name + 7, "%s", bname+10);
30365 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30367 args = TREE_CHAIN (args))
30371 fntype = build_function_type_list (type_out, type_in, NULL);
30373 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30375 /* Build a function declaration for the vectorized function. */
30376 new_fndecl = build_decl (BUILTINS_LOCATION,
30377 FUNCTION_DECL, get_identifier (name), fntype);
30378 TREE_PUBLIC (new_fndecl) = 1;
30379 DECL_EXTERNAL (new_fndecl) = 1;
30380 DECL_IS_NOVOPS (new_fndecl) = 1;
30381 TREE_READONLY (new_fndecl) = 1;
30386 /* Returns a decl of a function that implements gather load with
30387 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30388 Return NULL_TREE if it is not available. */
30391 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30392 const_tree index_type, int scale)
30395 enum ix86_builtins code;
30400 if ((TREE_CODE (index_type) != INTEGER_TYPE
30401 && !POINTER_TYPE_P (index_type))
30402 || (TYPE_MODE (index_type) != SImode
30403 && TYPE_MODE (index_type) != DImode))
30406 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30409 /* v*gather* insn sign extends index to pointer mode. */
30410 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30411 && TYPE_UNSIGNED (index_type))
30416 || (scale & (scale - 1)) != 0)
30419 si = TYPE_MODE (index_type) == SImode;
30420 switch (TYPE_MODE (mem_vectype))
30423 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30426 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30429 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30432 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30435 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30438 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30441 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30444 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30450 return ix86_builtins[code];
30453 /* Returns a code for a target-specific builtin that implements
30454 reciprocal of the function, or NULL_TREE if not available. */
30457 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30458 bool sqrt ATTRIBUTE_UNUSED)
30460 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30461 && flag_finite_math_only && !flag_trapping_math
30462 && flag_unsafe_math_optimizations))
30466 /* Machine dependent builtins. */
30469 /* Vectorized version of sqrt to rsqrt conversion. */
30470 case IX86_BUILTIN_SQRTPS_NR:
30471 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30473 case IX86_BUILTIN_SQRTPS_NR256:
30474 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30480 /* Normal builtins. */
30483 /* Sqrt to rsqrt conversion. */
30484 case BUILT_IN_SQRTF:
30485 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30492 /* Helper for avx_vpermilps256_operand et al. This is also used by
30493 the expansion functions to turn the parallel back into a mask.
30494 The return value is 0 for no match and the imm8+1 for a match. */
30497 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30499 unsigned i, nelt = GET_MODE_NUNITS (mode);
30501 unsigned char ipar[8];
30503 if (XVECLEN (par, 0) != (int) nelt)
30506 /* Validate that all of the elements are constants, and not totally
30507 out of range. Copy the data into an integral array to make the
30508 subsequent checks easier. */
30509 for (i = 0; i < nelt; ++i)
30511 rtx er = XVECEXP (par, 0, i);
30512 unsigned HOST_WIDE_INT ei;
30514 if (!CONST_INT_P (er))
30525 /* In the 256-bit DFmode case, we can only move elements within
30527 for (i = 0; i < 2; ++i)
30531 mask |= ipar[i] << i;
30533 for (i = 2; i < 4; ++i)
30537 mask |= (ipar[i] - 2) << i;
30542 /* In the 256-bit SFmode case, we have full freedom of movement
30543 within the low 128-bit lane, but the high 128-bit lane must
30544 mirror the exact same pattern. */
30545 for (i = 0; i < 4; ++i)
30546 if (ipar[i] + 4 != ipar[i + 4])
30553 /* In the 128-bit case, we've full freedom in the placement of
30554 the elements from the source operand. */
30555 for (i = 0; i < nelt; ++i)
30556 mask |= ipar[i] << (i * (nelt / 2));
30560 gcc_unreachable ();
30563 /* Make sure success has a non-zero value by adding one. */
30567 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30568 the expansion functions to turn the parallel back into a mask.
30569 The return value is 0 for no match and the imm8+1 for a match. */
30572 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30574 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30576 unsigned char ipar[8];
30578 if (XVECLEN (par, 0) != (int) nelt)
30581 /* Validate that all of the elements are constants, and not totally
30582 out of range. Copy the data into an integral array to make the
30583 subsequent checks easier. */
30584 for (i = 0; i < nelt; ++i)
30586 rtx er = XVECEXP (par, 0, i);
30587 unsigned HOST_WIDE_INT ei;
30589 if (!CONST_INT_P (er))
30592 if (ei >= 2 * nelt)
30597 /* Validate that the halves of the permute are halves. */
30598 for (i = 0; i < nelt2 - 1; ++i)
30599 if (ipar[i] + 1 != ipar[i + 1])
30601 for (i = nelt2; i < nelt - 1; ++i)
30602 if (ipar[i] + 1 != ipar[i + 1])
30605 /* Reconstruct the mask. */
30606 for (i = 0; i < 2; ++i)
30608 unsigned e = ipar[i * nelt2];
30612 mask |= e << (i * 4);
30615 /* Make sure success has a non-zero value by adding one. */
30619 /* Store OPERAND to the memory after reload is completed. This means
30620 that we can't easily use assign_stack_local. */
30622 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30626 gcc_assert (reload_completed);
30627 if (ix86_using_red_zone ())
30629 result = gen_rtx_MEM (mode,
30630 gen_rtx_PLUS (Pmode,
30632 GEN_INT (-RED_ZONE_SIZE)));
30633 emit_move_insn (result, operand);
30635 else if (TARGET_64BIT)
30641 operand = gen_lowpart (DImode, operand);
30645 gen_rtx_SET (VOIDmode,
30646 gen_rtx_MEM (DImode,
30647 gen_rtx_PRE_DEC (DImode,
30648 stack_pointer_rtx)),
30652 gcc_unreachable ();
30654 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30663 split_double_mode (mode, &operand, 1, operands, operands + 1);
30665 gen_rtx_SET (VOIDmode,
30666 gen_rtx_MEM (SImode,
30667 gen_rtx_PRE_DEC (Pmode,
30668 stack_pointer_rtx)),
30671 gen_rtx_SET (VOIDmode,
30672 gen_rtx_MEM (SImode,
30673 gen_rtx_PRE_DEC (Pmode,
30674 stack_pointer_rtx)),
30679 /* Store HImodes as SImodes. */
30680 operand = gen_lowpart (SImode, operand);
30684 gen_rtx_SET (VOIDmode,
30685 gen_rtx_MEM (GET_MODE (operand),
30686 gen_rtx_PRE_DEC (SImode,
30687 stack_pointer_rtx)),
30691 gcc_unreachable ();
30693 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30698 /* Free operand from the memory. */
30700 ix86_free_from_memory (enum machine_mode mode)
30702 if (!ix86_using_red_zone ())
30706 if (mode == DImode || TARGET_64BIT)
30710 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30711 to pop or add instruction if registers are available. */
30712 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
30713 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
30718 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30720 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30721 QImode must go into class Q_REGS.
30722 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30723 movdf to do mem-to-mem moves through integer regs. */
30726 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
30728 enum machine_mode mode = GET_MODE (x);
30730 /* We're only allowed to return a subclass of CLASS. Many of the
30731 following checks fail for NO_REGS, so eliminate that early. */
30732 if (regclass == NO_REGS)
30735 /* All classes can load zeros. */
30736 if (x == CONST0_RTX (mode))
30739 /* Force constants into memory if we are loading a (nonzero) constant into
30740 an MMX or SSE register. This is because there are no MMX/SSE instructions
30741 to load from a constant. */
30743 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
30746 /* Prefer SSE regs only, if we can use them for math. */
30747 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
30748 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
30750 /* Floating-point constants need more complex checks. */
30751 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
30753 /* General regs can load everything. */
30754 if (reg_class_subset_p (regclass, GENERAL_REGS))
30757 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30758 zero above. We only want to wind up preferring 80387 registers if
30759 we plan on doing computation with them. */
30761 && standard_80387_constant_p (x) > 0)
30763 /* Limit class to non-sse. */
30764 if (regclass == FLOAT_SSE_REGS)
30766 if (regclass == FP_TOP_SSE_REGS)
30768 if (regclass == FP_SECOND_SSE_REGS)
30769 return FP_SECOND_REG;
30770 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
30777 /* Generally when we see PLUS here, it's the function invariant
30778 (plus soft-fp const_int). Which can only be computed into general
30780 if (GET_CODE (x) == PLUS)
30781 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
30783 /* QImode constants are easy to load, but non-constant QImode data
30784 must go into Q_REGS. */
30785 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
30787 if (reg_class_subset_p (regclass, Q_REGS))
30789 if (reg_class_subset_p (Q_REGS, regclass))
30797 /* Discourage putting floating-point values in SSE registers unless
30798 SSE math is being used, and likewise for the 387 registers. */
30800 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
30802 enum machine_mode mode = GET_MODE (x);
30804 /* Restrict the output reload class to the register bank that we are doing
30805 math on. If we would like not to return a subset of CLASS, reject this
30806 alternative: if reload cannot do this, it will still use its choice. */
30807 mode = GET_MODE (x);
30808 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
30809 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
30811 if (X87_FLOAT_MODE_P (mode))
30813 if (regclass == FP_TOP_SSE_REGS)
30815 else if (regclass == FP_SECOND_SSE_REGS)
30816 return FP_SECOND_REG;
30818 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
30825 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
30826 enum machine_mode mode, secondary_reload_info *sri)
30828 /* Double-word spills from general registers to non-offsettable memory
30829 references (zero-extended addresses) require special handling. */
30832 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
30833 && rclass == GENERAL_REGS
30834 && !offsettable_memref_p (x))
30837 ? CODE_FOR_reload_noff_load
30838 : CODE_FOR_reload_noff_store);
30839 /* Add the cost of moving address to a temporary. */
30840 sri->extra_cost = 1;
30845 /* QImode spills from non-QI registers require
30846 intermediate register on 32bit targets. */
30848 && !in_p && mode == QImode
30849 && (rclass == GENERAL_REGS
30850 || rclass == LEGACY_REGS
30851 || rclass == INDEX_REGS))
30860 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
30861 regno = true_regnum (x);
30863 /* Return Q_REGS if the operand is in memory. */
30868 /* This condition handles corner case where an expression involving
30869 pointers gets vectorized. We're trying to use the address of a
30870 stack slot as a vector initializer.
30872 (set (reg:V2DI 74 [ vect_cst_.2 ])
30873 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30875 Eventually frame gets turned into sp+offset like this:
30877 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30878 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30879 (const_int 392 [0x188]))))
30881 That later gets turned into:
30883 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30884 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30885 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30887 We'll have the following reload recorded:
30889 Reload 0: reload_in (DI) =
30890 (plus:DI (reg/f:DI 7 sp)
30891 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30892 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30893 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30894 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30895 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30896 reload_reg_rtx: (reg:V2DI 22 xmm1)
30898 Which isn't going to work since SSE instructions can't handle scalar
30899 additions. Returning GENERAL_REGS forces the addition into integer
30900 register and reload can handle subsequent reloads without problems. */
30902 if (in_p && GET_CODE (x) == PLUS
30903 && SSE_CLASS_P (rclass)
30904 && SCALAR_INT_MODE_P (mode))
30905 return GENERAL_REGS;
30910 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30913 ix86_class_likely_spilled_p (reg_class_t rclass)
30924 case SSE_FIRST_REG:
30926 case FP_SECOND_REG:
30936 /* If we are copying between general and FP registers, we need a memory
30937 location. The same is true for SSE and MMX registers.
30939 To optimize register_move_cost performance, allow inline variant.
30941 The macro can't work reliably when one of the CLASSES is class containing
30942 registers from multiple units (SSE, MMX, integer). We avoid this by never
30943 combining those units in single alternative in the machine description.
30944 Ensure that this constraint holds to avoid unexpected surprises.
30946 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30947 enforce these sanity checks. */
30950 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30951 enum machine_mode mode, int strict)
30953 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
30954 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
30955 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
30956 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
30957 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
30958 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
30960 gcc_assert (!strict);
30964 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
30967 /* ??? This is a lie. We do have moves between mmx/general, and for
30968 mmx/sse2. But by saying we need secondary memory we discourage the
30969 register allocator from using the mmx registers unless needed. */
30970 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
30973 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30975 /* SSE1 doesn't have any direct moves from other classes. */
30979 /* If the target says that inter-unit moves are more expensive
30980 than moving through memory, then don't generate them. */
30981 if (!TARGET_INTER_UNIT_MOVES)
30984 /* Between SSE and general, we have moves no larger than word size. */
30985 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
30993 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30994 enum machine_mode mode, int strict)
30996 return inline_secondary_memory_needed (class1, class2, mode, strict);
30999 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31001 On the 80386, this is the size of MODE in words,
31002 except in the FP regs, where a single reg is always enough. */
31004 static unsigned char
31005 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
31007 if (MAYBE_INTEGER_CLASS_P (rclass))
31009 if (mode == XFmode)
31010 return (TARGET_64BIT ? 2 : 3);
31011 else if (mode == XCmode)
31012 return (TARGET_64BIT ? 4 : 6);
31014 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
31018 if (COMPLEX_MODE_P (mode))
31025 /* Return true if the registers in CLASS cannot represent the change from
31026 modes FROM to TO. */
31029 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
31030 enum reg_class regclass)
31035 /* x87 registers can't do subreg at all, as all values are reformatted
31036 to extended precision. */
31037 if (MAYBE_FLOAT_CLASS_P (regclass))
31040 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
31042 /* Vector registers do not support QI or HImode loads. If we don't
31043 disallow a change to these modes, reload will assume it's ok to
31044 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31045 the vec_dupv4hi pattern. */
31046 if (GET_MODE_SIZE (from) < 4)
31049 /* Vector registers do not support subreg with nonzero offsets, which
31050 are otherwise valid for integer registers. Since we can't see
31051 whether we have a nonzero offset from here, prohibit all
31052 nonparadoxical subregs changing size. */
31053 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
31060 /* Return the cost of moving data of mode M between a
31061 register and memory. A value of 2 is the default; this cost is
31062 relative to those in `REGISTER_MOVE_COST'.
31064 This function is used extensively by register_move_cost that is used to
31065 build tables at startup. Make it inline in this case.
31066 When IN is 2, return maximum of in and out move cost.
31068 If moving between registers and memory is more expensive than
31069 between two registers, you should define this macro to express the
31072 Model also increased moving costs of QImode registers in non
31076 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
31080 if (FLOAT_CLASS_P (regclass))
31098 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
31099 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
31101 if (SSE_CLASS_P (regclass))
31104 switch (GET_MODE_SIZE (mode))
31119 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
31120 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
31122 if (MMX_CLASS_P (regclass))
31125 switch (GET_MODE_SIZE (mode))
31137 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
31138 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
31140 switch (GET_MODE_SIZE (mode))
31143 if (Q_CLASS_P (regclass) || TARGET_64BIT)
31146 return ix86_cost->int_store[0];
31147 if (TARGET_PARTIAL_REG_DEPENDENCY
31148 && optimize_function_for_speed_p (cfun))
31149 cost = ix86_cost->movzbl_load;
31151 cost = ix86_cost->int_load[0];
31153 return MAX (cost, ix86_cost->int_store[0]);
31159 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
31161 return ix86_cost->movzbl_load;
31163 return ix86_cost->int_store[0] + 4;
31168 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
31169 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
31171 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31172 if (mode == TFmode)
31175 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
31177 cost = ix86_cost->int_load[2];
31179 cost = ix86_cost->int_store[2];
31180 return (cost * (((int) GET_MODE_SIZE (mode)
31181 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
31186 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
31189 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
31193 /* Return the cost of moving data from a register in class CLASS1 to
31194 one in class CLASS2.
31196 It is not required that the cost always equal 2 when FROM is the same as TO;
31197 on some machines it is expensive to move between registers if they are not
31198 general registers. */
31201 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
31202 reg_class_t class2_i)
31204 enum reg_class class1 = (enum reg_class) class1_i;
31205 enum reg_class class2 = (enum reg_class) class2_i;
31207 /* In case we require secondary memory, compute cost of the store followed
31208 by load. In order to avoid bad register allocation choices, we need
31209 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31211 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31215 cost += inline_memory_move_cost (mode, class1, 2);
31216 cost += inline_memory_move_cost (mode, class2, 2);
31218 /* In case of copying from general_purpose_register we may emit multiple
31219 stores followed by single load causing memory size mismatch stall.
31220 Count this as arbitrarily high cost of 20. */
31221 if (targetm.class_max_nregs (class1, mode)
31222 > targetm.class_max_nregs (class2, mode))
31225 /* In the case of FP/MMX moves, the registers actually overlap, and we
31226 have to switch modes in order to treat them differently. */
31227 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31228 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31234 /* Moves between SSE/MMX and integer unit are expensive. */
31235 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31236 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31238 /* ??? By keeping returned value relatively high, we limit the number
31239 of moves between integer and MMX/SSE registers for all targets.
31240 Additionally, high value prevents problem with x86_modes_tieable_p(),
31241 where integer modes in MMX/SSE registers are not tieable
31242 because of missing QImode and HImode moves to, from or between
31243 MMX/SSE registers. */
31244 return MAX (8, ix86_cost->mmxsse_to_integer);
31246 if (MAYBE_FLOAT_CLASS_P (class1))
31247 return ix86_cost->fp_move;
31248 if (MAYBE_SSE_CLASS_P (class1))
31249 return ix86_cost->sse_move;
31250 if (MAYBE_MMX_CLASS_P (class1))
31251 return ix86_cost->mmx_move;
31255 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31259 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31261 /* Flags and only flags can only hold CCmode values. */
31262 if (CC_REGNO_P (regno))
31263 return GET_MODE_CLASS (mode) == MODE_CC;
31264 if (GET_MODE_CLASS (mode) == MODE_CC
31265 || GET_MODE_CLASS (mode) == MODE_RANDOM
31266 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31268 if (FP_REGNO_P (regno))
31269 return VALID_FP_MODE_P (mode);
31270 if (SSE_REGNO_P (regno))
31272 /* We implement the move patterns for all vector modes into and
31273 out of SSE registers, even when no operation instructions
31274 are available. OImode move is available only when AVX is
31276 return ((TARGET_AVX && mode == OImode)
31277 || VALID_AVX256_REG_MODE (mode)
31278 || VALID_SSE_REG_MODE (mode)
31279 || VALID_SSE2_REG_MODE (mode)
31280 || VALID_MMX_REG_MODE (mode)
31281 || VALID_MMX_REG_MODE_3DNOW (mode));
31283 if (MMX_REGNO_P (regno))
31285 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31286 so if the register is available at all, then we can move data of
31287 the given mode into or out of it. */
31288 return (VALID_MMX_REG_MODE (mode)
31289 || VALID_MMX_REG_MODE_3DNOW (mode));
31292 if (mode == QImode)
31294 /* Take care for QImode values - they can be in non-QI regs,
31295 but then they do cause partial register stalls. */
31296 if (regno <= BX_REG || TARGET_64BIT)
31298 if (!TARGET_PARTIAL_REG_STALL)
31300 return !can_create_pseudo_p ();
31302 /* We handle both integer and floats in the general purpose registers. */
31303 else if (VALID_INT_MODE_P (mode))
31305 else if (VALID_FP_MODE_P (mode))
31307 else if (VALID_DFP_MODE_P (mode))
31309 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31310 on to use that value in smaller contexts, this can easily force a
31311 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31312 supporting DImode, allow it. */
31313 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31319 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31320 tieable integer mode. */
31323 ix86_tieable_integer_mode_p (enum machine_mode mode)
31332 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31335 return TARGET_64BIT;
31342 /* Return true if MODE1 is accessible in a register that can hold MODE2
31343 without copying. That is, all register classes that can hold MODE2
31344 can also hold MODE1. */
31347 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31349 if (mode1 == mode2)
31352 if (ix86_tieable_integer_mode_p (mode1)
31353 && ix86_tieable_integer_mode_p (mode2))
31356 /* MODE2 being XFmode implies fp stack or general regs, which means we
31357 can tie any smaller floating point modes to it. Note that we do not
31358 tie this with TFmode. */
31359 if (mode2 == XFmode)
31360 return mode1 == SFmode || mode1 == DFmode;
31362 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31363 that we can tie it with SFmode. */
31364 if (mode2 == DFmode)
31365 return mode1 == SFmode;
31367 /* If MODE2 is only appropriate for an SSE register, then tie with
31368 any other mode acceptable to SSE registers. */
31369 if (GET_MODE_SIZE (mode2) == 16
31370 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31371 return (GET_MODE_SIZE (mode1) == 16
31372 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31374 /* If MODE2 is appropriate for an MMX register, then tie
31375 with any other mode acceptable to MMX registers. */
31376 if (GET_MODE_SIZE (mode2) == 8
31377 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31378 return (GET_MODE_SIZE (mode1) == 8
31379 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31384 /* Compute a (partial) cost for rtx X. Return true if the complete
31385 cost has been computed, and false if subexpressions should be
31386 scanned. In either case, *TOTAL contains the cost result. */
31389 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31392 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31393 enum machine_mode mode = GET_MODE (x);
31394 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31402 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31404 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31406 else if (flag_pic && SYMBOLIC_CONST (x)
31408 || (!GET_CODE (x) != LABEL_REF
31409 && (GET_CODE (x) != SYMBOL_REF
31410 || !SYMBOL_REF_LOCAL_P (x)))))
31417 if (mode == VOIDmode)
31420 switch (standard_80387_constant_p (x))
31425 default: /* Other constants */
31430 /* Start with (MEM (SYMBOL_REF)), since that's where
31431 it'll probably end up. Add a penalty for size. */
31432 *total = (COSTS_N_INSNS (1)
31433 + (flag_pic != 0 && !TARGET_64BIT)
31434 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31440 /* The zero extensions is often completely free on x86_64, so make
31441 it as cheap as possible. */
31442 if (TARGET_64BIT && mode == DImode
31443 && GET_MODE (XEXP (x, 0)) == SImode)
31445 else if (TARGET_ZERO_EXTEND_WITH_AND)
31446 *total = cost->add;
31448 *total = cost->movzx;
31452 *total = cost->movsx;
31456 if (CONST_INT_P (XEXP (x, 1))
31457 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31459 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31462 *total = cost->add;
31465 if ((value == 2 || value == 3)
31466 && cost->lea <= cost->shift_const)
31468 *total = cost->lea;
31478 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31480 if (CONST_INT_P (XEXP (x, 1)))
31482 if (INTVAL (XEXP (x, 1)) > 32)
31483 *total = cost->shift_const + COSTS_N_INSNS (2);
31485 *total = cost->shift_const * 2;
31489 if (GET_CODE (XEXP (x, 1)) == AND)
31490 *total = cost->shift_var * 2;
31492 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31497 if (CONST_INT_P (XEXP (x, 1)))
31498 *total = cost->shift_const;
31500 *total = cost->shift_var;
31508 gcc_assert (FLOAT_MODE_P (mode));
31509 gcc_assert (TARGET_FMA || TARGET_FMA4);
31511 /* ??? SSE scalar/vector cost should be used here. */
31512 /* ??? Bald assumption that fma has the same cost as fmul. */
31513 *total = cost->fmul;
31514 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31516 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31518 if (GET_CODE (sub) == NEG)
31519 sub = XEXP (sub, 0);
31520 *total += rtx_cost (sub, FMA, 0, speed);
31523 if (GET_CODE (sub) == NEG)
31524 sub = XEXP (sub, 0);
31525 *total += rtx_cost (sub, FMA, 2, speed);
31530 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31532 /* ??? SSE scalar cost should be used here. */
31533 *total = cost->fmul;
31536 else if (X87_FLOAT_MODE_P (mode))
31538 *total = cost->fmul;
31541 else if (FLOAT_MODE_P (mode))
31543 /* ??? SSE vector cost should be used here. */
31544 *total = cost->fmul;
31549 rtx op0 = XEXP (x, 0);
31550 rtx op1 = XEXP (x, 1);
31552 if (CONST_INT_P (XEXP (x, 1)))
31554 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31555 for (nbits = 0; value != 0; value &= value - 1)
31559 /* This is arbitrary. */
31562 /* Compute costs correctly for widening multiplication. */
31563 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31564 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31565 == GET_MODE_SIZE (mode))
31567 int is_mulwiden = 0;
31568 enum machine_mode inner_mode = GET_MODE (op0);
31570 if (GET_CODE (op0) == GET_CODE (op1))
31571 is_mulwiden = 1, op1 = XEXP (op1, 0);
31572 else if (CONST_INT_P (op1))
31574 if (GET_CODE (op0) == SIGN_EXTEND)
31575 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31578 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31582 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31585 *total = (cost->mult_init[MODE_INDEX (mode)]
31586 + nbits * cost->mult_bit
31587 + rtx_cost (op0, outer_code, opno, speed)
31588 + rtx_cost (op1, outer_code, opno, speed));
31597 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31598 /* ??? SSE cost should be used here. */
31599 *total = cost->fdiv;
31600 else if (X87_FLOAT_MODE_P (mode))
31601 *total = cost->fdiv;
31602 else if (FLOAT_MODE_P (mode))
31603 /* ??? SSE vector cost should be used here. */
31604 *total = cost->fdiv;
31606 *total = cost->divide[MODE_INDEX (mode)];
31610 if (GET_MODE_CLASS (mode) == MODE_INT
31611 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31613 if (GET_CODE (XEXP (x, 0)) == PLUS
31614 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31615 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31616 && CONSTANT_P (XEXP (x, 1)))
31618 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31619 if (val == 2 || val == 4 || val == 8)
31621 *total = cost->lea;
31622 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31623 outer_code, opno, speed);
31624 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31625 outer_code, opno, speed);
31626 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31630 else if (GET_CODE (XEXP (x, 0)) == MULT
31631 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31633 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31634 if (val == 2 || val == 4 || val == 8)
31636 *total = cost->lea;
31637 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31638 outer_code, opno, speed);
31639 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31643 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31645 *total = cost->lea;
31646 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31647 outer_code, opno, speed);
31648 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31649 outer_code, opno, speed);
31650 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31657 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31659 /* ??? SSE cost should be used here. */
31660 *total = cost->fadd;
31663 else if (X87_FLOAT_MODE_P (mode))
31665 *total = cost->fadd;
31668 else if (FLOAT_MODE_P (mode))
31670 /* ??? SSE vector cost should be used here. */
31671 *total = cost->fadd;
31679 if (!TARGET_64BIT && mode == DImode)
31681 *total = (cost->add * 2
31682 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31683 << (GET_MODE (XEXP (x, 0)) != DImode))
31684 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31685 << (GET_MODE (XEXP (x, 1)) != DImode)));
31691 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31693 /* ??? SSE cost should be used here. */
31694 *total = cost->fchs;
31697 else if (X87_FLOAT_MODE_P (mode))
31699 *total = cost->fchs;
31702 else if (FLOAT_MODE_P (mode))
31704 /* ??? SSE vector cost should be used here. */
31705 *total = cost->fchs;
31711 if (!TARGET_64BIT && mode == DImode)
31712 *total = cost->add * 2;
31714 *total = cost->add;
31718 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
31719 && XEXP (XEXP (x, 0), 1) == const1_rtx
31720 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
31721 && XEXP (x, 1) == const0_rtx)
31723 /* This kind of construct is implemented using test[bwl].
31724 Treat it as if we had an AND. */
31725 *total = (cost->add
31726 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
31727 + rtx_cost (const1_rtx, outer_code, opno, speed));
31733 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
31738 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31739 /* ??? SSE cost should be used here. */
31740 *total = cost->fabs;
31741 else if (X87_FLOAT_MODE_P (mode))
31742 *total = cost->fabs;
31743 else if (FLOAT_MODE_P (mode))
31744 /* ??? SSE vector cost should be used here. */
31745 *total = cost->fabs;
31749 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31750 /* ??? SSE cost should be used here. */
31751 *total = cost->fsqrt;
31752 else if (X87_FLOAT_MODE_P (mode))
31753 *total = cost->fsqrt;
31754 else if (FLOAT_MODE_P (mode))
31755 /* ??? SSE vector cost should be used here. */
31756 *total = cost->fsqrt;
31760 if (XINT (x, 1) == UNSPEC_TP)
31767 case VEC_DUPLICATE:
31768 /* ??? Assume all of these vector manipulation patterns are
31769 recognizable. In which case they all pretty much have the
31771 *total = COSTS_N_INSNS (1);
31781 static int current_machopic_label_num;
31783 /* Given a symbol name and its associated stub, write out the
31784 definition of the stub. */
31787 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31789 unsigned int length;
31790 char *binder_name, *symbol_name, lazy_ptr_name[32];
31791 int label = ++current_machopic_label_num;
31793 /* For 64-bit we shouldn't get here. */
31794 gcc_assert (!TARGET_64BIT);
31796 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31797 symb = targetm.strip_name_encoding (symb);
31799 length = strlen (stub);
31800 binder_name = XALLOCAVEC (char, length + 32);
31801 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
31803 length = strlen (symb);
31804 symbol_name = XALLOCAVEC (char, length + 32);
31805 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31807 sprintf (lazy_ptr_name, "L%d$lz", label);
31809 if (MACHOPIC_ATT_STUB)
31810 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
31811 else if (MACHOPIC_PURE)
31812 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
31814 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
31816 fprintf (file, "%s:\n", stub);
31817 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31819 if (MACHOPIC_ATT_STUB)
31821 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31823 else if (MACHOPIC_PURE)
31826 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31827 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
31828 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31829 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31830 label, lazy_ptr_name, label);
31831 fprintf (file, "\tjmp\t*%%ecx\n");
31834 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
31836 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31837 it needs no stub-binding-helper. */
31838 if (MACHOPIC_ATT_STUB)
31841 fprintf (file, "%s:\n", binder_name);
31845 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
31846 fprintf (file, "\tpushl\t%%ecx\n");
31849 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
31851 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
31853 /* N.B. Keep the correspondence of these
31854 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31855 old-pic/new-pic/non-pic stubs; altering this will break
31856 compatibility with existing dylibs. */
31859 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31860 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
31863 /* 16-byte -mdynamic-no-pic stub. */
31864 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
31866 fprintf (file, "%s:\n", lazy_ptr_name);
31867 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31868 fprintf (file, ASM_LONG "%s\n", binder_name);
31870 #endif /* TARGET_MACHO */
31872 /* Order the registers for register allocator. */
31875 x86_order_regs_for_local_alloc (void)
31880 /* First allocate the local general purpose registers. */
31881 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31882 if (GENERAL_REGNO_P (i) && call_used_regs[i])
31883 reg_alloc_order [pos++] = i;
31885 /* Global general purpose registers. */
31886 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31887 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
31888 reg_alloc_order [pos++] = i;
31890 /* x87 registers come first in case we are doing FP math
31892 if (!TARGET_SSE_MATH)
31893 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31894 reg_alloc_order [pos++] = i;
31896 /* SSE registers. */
31897 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
31898 reg_alloc_order [pos++] = i;
31899 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
31900 reg_alloc_order [pos++] = i;
31902 /* x87 registers. */
31903 if (TARGET_SSE_MATH)
31904 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31905 reg_alloc_order [pos++] = i;
31907 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
31908 reg_alloc_order [pos++] = i;
31910 /* Initialize the rest of array as we do not allocate some registers
31912 while (pos < FIRST_PSEUDO_REGISTER)
31913 reg_alloc_order [pos++] = 0;
31916 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31917 in struct attribute_spec handler. */
31919 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
31921 int flags ATTRIBUTE_UNUSED,
31922 bool *no_add_attrs)
31924 if (TREE_CODE (*node) != FUNCTION_TYPE
31925 && TREE_CODE (*node) != METHOD_TYPE
31926 && TREE_CODE (*node) != FIELD_DECL
31927 && TREE_CODE (*node) != TYPE_DECL)
31929 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31931 *no_add_attrs = true;
31936 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
31938 *no_add_attrs = true;
31941 if (is_attribute_p ("callee_pop_aggregate_return", name))
31945 cst = TREE_VALUE (args);
31946 if (TREE_CODE (cst) != INTEGER_CST)
31948 warning (OPT_Wattributes,
31949 "%qE attribute requires an integer constant argument",
31951 *no_add_attrs = true;
31953 else if (compare_tree_int (cst, 0) != 0
31954 && compare_tree_int (cst, 1) != 0)
31956 warning (OPT_Wattributes,
31957 "argument to %qE attribute is neither zero, nor one",
31959 *no_add_attrs = true;
31968 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31969 struct attribute_spec.handler. */
31971 ix86_handle_abi_attribute (tree *node, tree name,
31972 tree args ATTRIBUTE_UNUSED,
31973 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31975 if (TREE_CODE (*node) != FUNCTION_TYPE
31976 && TREE_CODE (*node) != METHOD_TYPE
31977 && TREE_CODE (*node) != FIELD_DECL
31978 && TREE_CODE (*node) != TYPE_DECL)
31980 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31982 *no_add_attrs = true;
31986 /* Can combine regparm with all attributes but fastcall. */
31987 if (is_attribute_p ("ms_abi", name))
31989 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
31991 error ("ms_abi and sysv_abi attributes are not compatible");
31996 else if (is_attribute_p ("sysv_abi", name))
31998 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
32000 error ("ms_abi and sysv_abi attributes are not compatible");
32009 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32010 struct attribute_spec.handler. */
32012 ix86_handle_struct_attribute (tree *node, tree name,
32013 tree args ATTRIBUTE_UNUSED,
32014 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32017 if (DECL_P (*node))
32019 if (TREE_CODE (*node) == TYPE_DECL)
32020 type = &TREE_TYPE (*node);
32025 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
32027 warning (OPT_Wattributes, "%qE attribute ignored",
32029 *no_add_attrs = true;
32032 else if ((is_attribute_p ("ms_struct", name)
32033 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32034 || ((is_attribute_p ("gcc_struct", name)
32035 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32037 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32039 *no_add_attrs = true;
32046 ix86_handle_fndecl_attribute (tree *node, tree name,
32047 tree args ATTRIBUTE_UNUSED,
32048 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32050 if (TREE_CODE (*node) != FUNCTION_DECL)
32052 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32054 *no_add_attrs = true;
32060 ix86_ms_bitfield_layout_p (const_tree record_type)
32062 return ((TARGET_MS_BITFIELD_LAYOUT
32063 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32064 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
32067 /* Returns an expression indicating where the this parameter is
32068 located on entry to the FUNCTION. */
32071 x86_this_parameter (tree function)
32073 tree type = TREE_TYPE (function);
32074 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
32079 const int *parm_regs;
32081 if (ix86_function_type_abi (type) == MS_ABI)
32082 parm_regs = x86_64_ms_abi_int_parameter_registers;
32084 parm_regs = x86_64_int_parameter_registers;
32085 return gen_rtx_REG (DImode, parm_regs[aggr]);
32088 nregs = ix86_function_regparm (type, function);
32090 if (nregs > 0 && !stdarg_p (type))
32093 unsigned int ccvt = ix86_get_callcvt (type);
32095 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32096 regno = aggr ? DX_REG : CX_REG;
32097 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32101 return gen_rtx_MEM (SImode,
32102 plus_constant (stack_pointer_rtx, 4));
32111 return gen_rtx_MEM (SImode,
32112 plus_constant (stack_pointer_rtx, 4));
32115 return gen_rtx_REG (SImode, regno);
32118 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
32121 /* Determine whether x86_output_mi_thunk can succeed. */
32124 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
32125 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
32126 HOST_WIDE_INT vcall_offset, const_tree function)
32128 /* 64-bit can handle anything. */
32132 /* For 32-bit, everything's fine if we have one free register. */
32133 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
32136 /* Need a free register for vcall_offset. */
32140 /* Need a free register for GOT references. */
32141 if (flag_pic && !targetm.binds_local_p (function))
32144 /* Otherwise ok. */
32148 /* Output the assembler code for a thunk function. THUNK_DECL is the
32149 declaration for the thunk function itself, FUNCTION is the decl for
32150 the target function. DELTA is an immediate constant offset to be
32151 added to THIS. If VCALL_OFFSET is nonzero, the word at
32152 *(*this + vcall_offset) should be added to THIS. */
32155 x86_output_mi_thunk (FILE *file,
32156 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
32157 HOST_WIDE_INT vcall_offset, tree function)
32159 rtx this_param = x86_this_parameter (function);
32160 rtx this_reg, tmp, fnaddr;
32161 unsigned int tmp_regno;
32164 tmp_regno = R10_REG;
32167 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32168 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
32169 tmp_regno = AX_REG;
32171 tmp_regno = CX_REG;
32174 emit_note (NOTE_INSN_PROLOGUE_END);
32176 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32177 pull it in now and let DELTA benefit. */
32178 if (REG_P (this_param))
32179 this_reg = this_param;
32180 else if (vcall_offset)
32182 /* Put the this parameter into %eax. */
32183 this_reg = gen_rtx_REG (Pmode, AX_REG);
32184 emit_move_insn (this_reg, this_param);
32187 this_reg = NULL_RTX;
32189 /* Adjust the this parameter by a fixed constant. */
32192 rtx delta_rtx = GEN_INT (delta);
32193 rtx delta_dst = this_reg ? this_reg : this_param;
32197 if (!x86_64_general_operand (delta_rtx, Pmode))
32199 tmp = gen_rtx_REG (Pmode, tmp_regno);
32200 emit_move_insn (tmp, delta_rtx);
32205 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
32208 /* Adjust the this parameter by a value stored in the vtable. */
32211 rtx vcall_addr, vcall_mem, this_mem;
32213 tmp = gen_rtx_REG (Pmode, tmp_regno);
32215 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32216 if (Pmode != ptr_mode)
32217 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32218 emit_move_insn (tmp, this_mem);
32220 /* Adjust the this parameter. */
32221 vcall_addr = plus_constant (tmp, vcall_offset);
32223 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32225 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32226 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32227 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32230 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32231 if (Pmode != ptr_mode)
32232 emit_insn (gen_addsi_1_zext (this_reg,
32233 gen_rtx_REG (ptr_mode,
32237 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32240 /* If necessary, drop THIS back to its stack slot. */
32241 if (this_reg && this_reg != this_param)
32242 emit_move_insn (this_param, this_reg);
32244 fnaddr = XEXP (DECL_RTL (function), 0);
32247 if (!flag_pic || targetm.binds_local_p (function)
32248 || cfun->machine->call_abi == MS_ABI)
32252 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32253 tmp = gen_rtx_CONST (Pmode, tmp);
32254 fnaddr = gen_rtx_MEM (Pmode, tmp);
32259 if (!flag_pic || targetm.binds_local_p (function))
32262 else if (TARGET_MACHO)
32264 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32265 fnaddr = XEXP (fnaddr, 0);
32267 #endif /* TARGET_MACHO */
32270 tmp = gen_rtx_REG (Pmode, CX_REG);
32271 output_set_got (tmp, NULL_RTX);
32273 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32274 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32275 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32279 /* Our sibling call patterns do not allow memories, because we have no
32280 predicate that can distinguish between frame and non-frame memory.
32281 For our purposes here, we can get away with (ab)using a jump pattern,
32282 because we're going to do no optimization. */
32283 if (MEM_P (fnaddr))
32284 emit_jump_insn (gen_indirect_jump (fnaddr));
32287 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
32288 fnaddr = legitimize_pic_address (fnaddr,
32289 gen_rtx_REG (Pmode, tmp_regno));
32291 if (!sibcall_insn_operand (fnaddr, Pmode))
32293 tmp = gen_rtx_REG (Pmode, tmp_regno);
32294 if (GET_MODE (fnaddr) != Pmode)
32295 fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
32296 emit_move_insn (tmp, fnaddr);
32300 tmp = gen_rtx_MEM (QImode, fnaddr);
32301 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32302 tmp = emit_call_insn (tmp);
32303 SIBLING_CALL_P (tmp) = 1;
32307 /* Emit just enough of rest_of_compilation to get the insns emitted.
32308 Note that use_thunk calls assemble_start_function et al. */
32309 tmp = get_insns ();
32310 insn_locators_alloc ();
32311 shorten_branches (tmp);
32312 final_start_function (tmp, file, 1);
32313 final (tmp, file, 1);
32314 final_end_function ();
32318 x86_file_start (void)
32320 default_file_start ();
32322 darwin_file_start ();
32324 if (X86_FILE_START_VERSION_DIRECTIVE)
32325 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32326 if (X86_FILE_START_FLTUSED)
32327 fputs ("\t.global\t__fltused\n", asm_out_file);
32328 if (ix86_asm_dialect == ASM_INTEL)
32329 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32333 x86_field_alignment (tree field, int computed)
32335 enum machine_mode mode;
32336 tree type = TREE_TYPE (field);
32338 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32340 mode = TYPE_MODE (strip_array_types (type));
32341 if (mode == DFmode || mode == DCmode
32342 || GET_MODE_CLASS (mode) == MODE_INT
32343 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32344 return MIN (32, computed);
32348 /* Output assembler code to FILE to increment profiler label # LABELNO
32349 for profiling a function entry. */
32351 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32353 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32358 #ifndef NO_PROFILE_COUNTERS
32359 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32362 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32363 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32365 fprintf (file, "\tcall\t%s\n", mcount_name);
32369 #ifndef NO_PROFILE_COUNTERS
32370 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32373 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32377 #ifndef NO_PROFILE_COUNTERS
32378 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32381 fprintf (file, "\tcall\t%s\n", mcount_name);
32385 /* We don't have exact information about the insn sizes, but we may assume
32386 quite safely that we are informed about all 1 byte insns and memory
32387 address sizes. This is enough to eliminate unnecessary padding in
32391 min_insn_size (rtx insn)
32395 if (!INSN_P (insn) || !active_insn_p (insn))
32398 /* Discard alignments we've emit and jump instructions. */
32399 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32400 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32402 if (JUMP_TABLE_DATA_P (insn))
32405 /* Important case - calls are always 5 bytes.
32406 It is common to have many calls in the row. */
32408 && symbolic_reference_mentioned_p (PATTERN (insn))
32409 && !SIBLING_CALL_P (insn))
32411 len = get_attr_length (insn);
32415 /* For normal instructions we rely on get_attr_length being exact,
32416 with a few exceptions. */
32417 if (!JUMP_P (insn))
32419 enum attr_type type = get_attr_type (insn);
32424 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32425 || asm_noperands (PATTERN (insn)) >= 0)
32432 /* Otherwise trust get_attr_length. */
32436 l = get_attr_length_address (insn);
32437 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32446 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32448 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32452 ix86_avoid_jump_mispredicts (void)
32454 rtx insn, start = get_insns ();
32455 int nbytes = 0, njumps = 0;
32458 /* Look for all minimal intervals of instructions containing 4 jumps.
32459 The intervals are bounded by START and INSN. NBYTES is the total
32460 size of instructions in the interval including INSN and not including
32461 START. When the NBYTES is smaller than 16 bytes, it is possible
32462 that the end of START and INSN ends up in the same 16byte page.
32464 The smallest offset in the page INSN can start is the case where START
32465 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32466 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32468 for (insn = start; insn; insn = NEXT_INSN (insn))
32472 if (LABEL_P (insn))
32474 int align = label_to_alignment (insn);
32475 int max_skip = label_to_max_skip (insn);
32479 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32480 already in the current 16 byte page, because otherwise
32481 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32482 bytes to reach 16 byte boundary. */
32484 || (align <= 3 && max_skip != (1 << align) - 1))
32487 fprintf (dump_file, "Label %i with max_skip %i\n",
32488 INSN_UID (insn), max_skip);
32491 while (nbytes + max_skip >= 16)
32493 start = NEXT_INSN (start);
32494 if ((JUMP_P (start)
32495 && GET_CODE (PATTERN (start)) != ADDR_VEC
32496 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32498 njumps--, isjump = 1;
32501 nbytes -= min_insn_size (start);
32507 min_size = min_insn_size (insn);
32508 nbytes += min_size;
32510 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32511 INSN_UID (insn), min_size);
32513 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32514 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32522 start = NEXT_INSN (start);
32523 if ((JUMP_P (start)
32524 && GET_CODE (PATTERN (start)) != ADDR_VEC
32525 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32527 njumps--, isjump = 1;
32530 nbytes -= min_insn_size (start);
32532 gcc_assert (njumps >= 0);
32534 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32535 INSN_UID (start), INSN_UID (insn), nbytes);
32537 if (njumps == 3 && isjump && nbytes < 16)
32539 int padsize = 15 - nbytes + min_insn_size (insn);
32542 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32543 INSN_UID (insn), padsize);
32544 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32550 /* AMD Athlon works faster
32551 when RET is not destination of conditional jump or directly preceded
32552 by other jump instruction. We avoid the penalty by inserting NOP just
32553 before the RET instructions in such cases. */
32555 ix86_pad_returns (void)
32560 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32562 basic_block bb = e->src;
32563 rtx ret = BB_END (bb);
32565 bool replace = false;
32567 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32568 || optimize_bb_for_size_p (bb))
32570 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32571 if (active_insn_p (prev) || LABEL_P (prev))
32573 if (prev && LABEL_P (prev))
32578 FOR_EACH_EDGE (e, ei, bb->preds)
32579 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32580 && !(e->flags & EDGE_FALLTHRU))
32585 prev = prev_active_insn (ret);
32587 && ((JUMP_P (prev) && any_condjump_p (prev))
32590 /* Empty functions get branch mispredict even when
32591 the jump destination is not visible to us. */
32592 if (!prev && !optimize_function_for_size_p (cfun))
32597 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32603 /* Count the minimum number of instructions in BB. Return 4 if the
32604 number of instructions >= 4. */
32607 ix86_count_insn_bb (basic_block bb)
32610 int insn_count = 0;
32612 /* Count number of instructions in this block. Return 4 if the number
32613 of instructions >= 4. */
32614 FOR_BB_INSNS (bb, insn)
32616 /* Only happen in exit blocks. */
32618 && ANY_RETURN_P (PATTERN (insn)))
32621 if (NONDEBUG_INSN_P (insn)
32622 && GET_CODE (PATTERN (insn)) != USE
32623 && GET_CODE (PATTERN (insn)) != CLOBBER)
32626 if (insn_count >= 4)
32635 /* Count the minimum number of instructions in code path in BB.
32636 Return 4 if the number of instructions >= 4. */
32639 ix86_count_insn (basic_block bb)
32643 int min_prev_count;
32645 /* Only bother counting instructions along paths with no
32646 more than 2 basic blocks between entry and exit. Given
32647 that BB has an edge to exit, determine if a predecessor
32648 of BB has an edge from entry. If so, compute the number
32649 of instructions in the predecessor block. If there
32650 happen to be multiple such blocks, compute the minimum. */
32651 min_prev_count = 4;
32652 FOR_EACH_EDGE (e, ei, bb->preds)
32655 edge_iterator prev_ei;
32657 if (e->src == ENTRY_BLOCK_PTR)
32659 min_prev_count = 0;
32662 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32664 if (prev_e->src == ENTRY_BLOCK_PTR)
32666 int count = ix86_count_insn_bb (e->src);
32667 if (count < min_prev_count)
32668 min_prev_count = count;
32674 if (min_prev_count < 4)
32675 min_prev_count += ix86_count_insn_bb (bb);
32677 return min_prev_count;
32680 /* Pad short funtion to 4 instructions. */
32683 ix86_pad_short_function (void)
32688 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32690 rtx ret = BB_END (e->src);
32691 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32693 int insn_count = ix86_count_insn (e->src);
32695 /* Pad short function. */
32696 if (insn_count < 4)
32700 /* Find epilogue. */
32703 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32704 insn = PREV_INSN (insn);
32709 /* Two NOPs count as one instruction. */
32710 insn_count = 2 * (4 - insn_count);
32711 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
32717 /* Implement machine specific optimizations. We implement padding of returns
32718 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32722 /* We are freeing block_for_insn in the toplev to keep compatibility
32723 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32724 compute_bb_for_insn ();
32726 /* Run the vzeroupper optimization if needed. */
32727 if (TARGET_VZEROUPPER)
32728 move_or_delete_vzeroupper ();
32730 if (optimize && optimize_function_for_speed_p (cfun))
32732 if (TARGET_PAD_SHORT_FUNCTION)
32733 ix86_pad_short_function ();
32734 else if (TARGET_PAD_RETURNS)
32735 ix86_pad_returns ();
32736 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32737 if (TARGET_FOUR_JUMP_LIMIT)
32738 ix86_avoid_jump_mispredicts ();
32743 /* Return nonzero when QImode register that must be represented via REX prefix
32746 x86_extended_QIreg_mentioned_p (rtx insn)
32749 extract_insn_cached (insn);
32750 for (i = 0; i < recog_data.n_operands; i++)
32751 if (REG_P (recog_data.operand[i])
32752 && REGNO (recog_data.operand[i]) > BX_REG)
32757 /* Return nonzero when P points to register encoded via REX prefix.
32758 Called via for_each_rtx. */
32760 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
32762 unsigned int regno;
32765 regno = REGNO (*p);
32766 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
32769 /* Return true when INSN mentions register that must be encoded using REX
32772 x86_extended_reg_mentioned_p (rtx insn)
32774 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
32775 extended_reg_mentioned_1, NULL);
32778 /* If profitable, negate (without causing overflow) integer constant
32779 of mode MODE at location LOC. Return true in this case. */
32781 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
32785 if (!CONST_INT_P (*loc))
32791 /* DImode x86_64 constants must fit in 32 bits. */
32792 gcc_assert (x86_64_immediate_operand (*loc, mode));
32803 gcc_unreachable ();
32806 /* Avoid overflows. */
32807 if (mode_signbit_p (mode, *loc))
32810 val = INTVAL (*loc);
32812 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32813 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32814 if ((val < 0 && val != -128)
32817 *loc = GEN_INT (-val);
32824 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32825 optabs would emit if we didn't have TFmode patterns. */
32828 x86_emit_floatuns (rtx operands[2])
32830 rtx neglab, donelab, i0, i1, f0, in, out;
32831 enum machine_mode mode, inmode;
32833 inmode = GET_MODE (operands[1]);
32834 gcc_assert (inmode == SImode || inmode == DImode);
32837 in = force_reg (inmode, operands[1]);
32838 mode = GET_MODE (out);
32839 neglab = gen_label_rtx ();
32840 donelab = gen_label_rtx ();
32841 f0 = gen_reg_rtx (mode);
32843 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
32845 expand_float (out, in, 0);
32847 emit_jump_insn (gen_jump (donelab));
32850 emit_label (neglab);
32852 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
32854 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
32856 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
32858 expand_float (f0, i0, 0);
32860 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
32862 emit_label (donelab);
32865 /* AVX2 does support 32-byte integer vector operations,
32866 thus the longest vector we are faced with is V32QImode. */
32867 #define MAX_VECT_LEN 32
32869 struct expand_vec_perm_d
32871 rtx target, op0, op1;
32872 unsigned char perm[MAX_VECT_LEN];
32873 enum machine_mode vmode;
32874 unsigned char nelt;
32878 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
32879 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
32881 /* Get a vector mode of the same size as the original but with elements
32882 twice as wide. This is only guaranteed to apply to integral vectors. */
32884 static inline enum machine_mode
32885 get_mode_wider_vector (enum machine_mode o)
32887 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32888 enum machine_mode n = GET_MODE_WIDER_MODE (o);
32889 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
32890 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
32894 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32895 with all elements equal to VAR. Return true if successful. */
32898 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
32899 rtx target, rtx val)
32922 /* First attempt to recognize VAL as-is. */
32923 dup = gen_rtx_VEC_DUPLICATE (mode, val);
32924 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
32925 if (recog_memoized (insn) < 0)
32928 /* If that fails, force VAL into a register. */
32931 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
32932 seq = get_insns ();
32935 emit_insn_before (seq, insn);
32937 ok = recog_memoized (insn) >= 0;
32946 if (TARGET_SSE || TARGET_3DNOW_A)
32950 val = gen_lowpart (SImode, val);
32951 x = gen_rtx_TRUNCATE (HImode, val);
32952 x = gen_rtx_VEC_DUPLICATE (mode, x);
32953 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32966 struct expand_vec_perm_d dperm;
32970 memset (&dperm, 0, sizeof (dperm));
32971 dperm.target = target;
32972 dperm.vmode = mode;
32973 dperm.nelt = GET_MODE_NUNITS (mode);
32974 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
32976 /* Extend to SImode using a paradoxical SUBREG. */
32977 tmp1 = gen_reg_rtx (SImode);
32978 emit_move_insn (tmp1, gen_lowpart (SImode, val));
32980 /* Insert the SImode value as low element of a V4SImode vector. */
32981 tmp2 = gen_lowpart (V4SImode, dperm.op0);
32982 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
32984 ok = (expand_vec_perm_1 (&dperm)
32985 || expand_vec_perm_broadcast_1 (&dperm));
32997 /* Replicate the value once into the next wider mode and recurse. */
32999 enum machine_mode smode, wsmode, wvmode;
33002 smode = GET_MODE_INNER (mode);
33003 wvmode = get_mode_wider_vector (mode);
33004 wsmode = GET_MODE_INNER (wvmode);
33006 val = convert_modes (wsmode, smode, val, true);
33007 x = expand_simple_binop (wsmode, ASHIFT, val,
33008 GEN_INT (GET_MODE_BITSIZE (smode)),
33009 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33010 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
33012 x = gen_lowpart (wvmode, target);
33013 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
33021 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
33022 rtx x = gen_reg_rtx (hvmode);
33024 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
33027 x = gen_rtx_VEC_CONCAT (mode, x, x);
33028 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33037 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33038 whose ONE_VAR element is VAR, and other elements are zero. Return true
33042 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
33043 rtx target, rtx var, int one_var)
33045 enum machine_mode vsimode;
33048 bool use_vector_set = false;
33053 /* For SSE4.1, we normally use vector set. But if the second
33054 element is zero and inter-unit moves are OK, we use movq
33056 use_vector_set = (TARGET_64BIT
33058 && !(TARGET_INTER_UNIT_MOVES
33064 use_vector_set = TARGET_SSE4_1;
33067 use_vector_set = TARGET_SSE2;
33070 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
33077 use_vector_set = TARGET_AVX;
33080 /* Use ix86_expand_vector_set in 64bit mode only. */
33081 use_vector_set = TARGET_AVX && TARGET_64BIT;
33087 if (use_vector_set)
33089 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
33090 var = force_reg (GET_MODE_INNER (mode), var);
33091 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33107 var = force_reg (GET_MODE_INNER (mode), var);
33108 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
33109 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33114 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
33115 new_target = gen_reg_rtx (mode);
33117 new_target = target;
33118 var = force_reg (GET_MODE_INNER (mode), var);
33119 x = gen_rtx_VEC_DUPLICATE (mode, var);
33120 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
33121 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
33124 /* We need to shuffle the value to the correct position, so
33125 create a new pseudo to store the intermediate result. */
33127 /* With SSE2, we can use the integer shuffle insns. */
33128 if (mode != V4SFmode && TARGET_SSE2)
33130 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
33132 GEN_INT (one_var == 1 ? 0 : 1),
33133 GEN_INT (one_var == 2 ? 0 : 1),
33134 GEN_INT (one_var == 3 ? 0 : 1)));
33135 if (target != new_target)
33136 emit_move_insn (target, new_target);
33140 /* Otherwise convert the intermediate result to V4SFmode and
33141 use the SSE1 shuffle instructions. */
33142 if (mode != V4SFmode)
33144 tmp = gen_reg_rtx (V4SFmode);
33145 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
33150 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
33152 GEN_INT (one_var == 1 ? 0 : 1),
33153 GEN_INT (one_var == 2 ? 0+4 : 1+4),
33154 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
33156 if (mode != V4SFmode)
33157 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
33158 else if (tmp != target)
33159 emit_move_insn (target, tmp);
33161 else if (target != new_target)
33162 emit_move_insn (target, new_target);
33167 vsimode = V4SImode;
33173 vsimode = V2SImode;
33179 /* Zero extend the variable element to SImode and recurse. */
33180 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
33182 x = gen_reg_rtx (vsimode);
33183 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
33185 gcc_unreachable ();
33187 emit_move_insn (target, gen_lowpart (mode, x));
33195 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33196 consisting of the values in VALS. It is known that all elements
33197 except ONE_VAR are constants. Return true if successful. */
33200 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
33201 rtx target, rtx vals, int one_var)
33203 rtx var = XVECEXP (vals, 0, one_var);
33204 enum machine_mode wmode;
33207 const_vec = copy_rtx (vals);
33208 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
33209 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33217 /* For the two element vectors, it's just as easy to use
33218 the general case. */
33222 /* Use ix86_expand_vector_set in 64bit mode only. */
33245 /* There's no way to set one QImode entry easily. Combine
33246 the variable value with its adjacent constant value, and
33247 promote to an HImode set. */
33248 x = XVECEXP (vals, 0, one_var ^ 1);
33251 var = convert_modes (HImode, QImode, var, true);
33252 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33253 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33254 x = GEN_INT (INTVAL (x) & 0xff);
33258 var = convert_modes (HImode, QImode, var, true);
33259 x = gen_int_mode (INTVAL (x) << 8, HImode);
33261 if (x != const0_rtx)
33262 var = expand_simple_binop (HImode, IOR, var, x, var,
33263 1, OPTAB_LIB_WIDEN);
33265 x = gen_reg_rtx (wmode);
33266 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33267 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33269 emit_move_insn (target, gen_lowpart (mode, x));
33276 emit_move_insn (target, const_vec);
33277 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33281 /* A subroutine of ix86_expand_vector_init_general. Use vector
33282 concatenate to handle the most general case: all values variable,
33283 and none identical. */
33286 ix86_expand_vector_init_concat (enum machine_mode mode,
33287 rtx target, rtx *ops, int n)
33289 enum machine_mode cmode, hmode = VOIDmode;
33290 rtx first[8], second[4];
33330 gcc_unreachable ();
33333 if (!register_operand (ops[1], cmode))
33334 ops[1] = force_reg (cmode, ops[1]);
33335 if (!register_operand (ops[0], cmode))
33336 ops[0] = force_reg (cmode, ops[0]);
33337 emit_insn (gen_rtx_SET (VOIDmode, target,
33338 gen_rtx_VEC_CONCAT (mode, ops[0],
33358 gcc_unreachable ();
33374 gcc_unreachable ();
33379 /* FIXME: We process inputs backward to help RA. PR 36222. */
33382 for (; i > 0; i -= 2, j--)
33384 first[j] = gen_reg_rtx (cmode);
33385 v = gen_rtvec (2, ops[i - 1], ops[i]);
33386 ix86_expand_vector_init (false, first[j],
33387 gen_rtx_PARALLEL (cmode, v));
33393 gcc_assert (hmode != VOIDmode);
33394 for (i = j = 0; i < n; i += 2, j++)
33396 second[j] = gen_reg_rtx (hmode);
33397 ix86_expand_vector_init_concat (hmode, second [j],
33401 ix86_expand_vector_init_concat (mode, target, second, n);
33404 ix86_expand_vector_init_concat (mode, target, first, n);
33408 gcc_unreachable ();
33412 /* A subroutine of ix86_expand_vector_init_general. Use vector
33413 interleave to handle the most general case: all values variable,
33414 and none identical. */
33417 ix86_expand_vector_init_interleave (enum machine_mode mode,
33418 rtx target, rtx *ops, int n)
33420 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33423 rtx (*gen_load_even) (rtx, rtx, rtx);
33424 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33425 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33430 gen_load_even = gen_vec_setv8hi;
33431 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33432 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33433 inner_mode = HImode;
33434 first_imode = V4SImode;
33435 second_imode = V2DImode;
33436 third_imode = VOIDmode;
33439 gen_load_even = gen_vec_setv16qi;
33440 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33441 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33442 inner_mode = QImode;
33443 first_imode = V8HImode;
33444 second_imode = V4SImode;
33445 third_imode = V2DImode;
33448 gcc_unreachable ();
33451 for (i = 0; i < n; i++)
33453 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33454 op0 = gen_reg_rtx (SImode);
33455 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33457 /* Insert the SImode value as low element of V4SImode vector. */
33458 op1 = gen_reg_rtx (V4SImode);
33459 op0 = gen_rtx_VEC_MERGE (V4SImode,
33460 gen_rtx_VEC_DUPLICATE (V4SImode,
33462 CONST0_RTX (V4SImode),
33464 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33466 /* Cast the V4SImode vector back to a vector in orignal mode. */
33467 op0 = gen_reg_rtx (mode);
33468 emit_move_insn (op0, gen_lowpart (mode, op1));
33470 /* Load even elements into the second positon. */
33471 emit_insn (gen_load_even (op0,
33472 force_reg (inner_mode,
33476 /* Cast vector to FIRST_IMODE vector. */
33477 ops[i] = gen_reg_rtx (first_imode);
33478 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33481 /* Interleave low FIRST_IMODE vectors. */
33482 for (i = j = 0; i < n; i += 2, j++)
33484 op0 = gen_reg_rtx (first_imode);
33485 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33487 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33488 ops[j] = gen_reg_rtx (second_imode);
33489 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33492 /* Interleave low SECOND_IMODE vectors. */
33493 switch (second_imode)
33496 for (i = j = 0; i < n / 2; i += 2, j++)
33498 op0 = gen_reg_rtx (second_imode);
33499 emit_insn (gen_interleave_second_low (op0, ops[i],
33502 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33504 ops[j] = gen_reg_rtx (third_imode);
33505 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33507 second_imode = V2DImode;
33508 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33512 op0 = gen_reg_rtx (second_imode);
33513 emit_insn (gen_interleave_second_low (op0, ops[0],
33516 /* Cast the SECOND_IMODE vector back to a vector on original
33518 emit_insn (gen_rtx_SET (VOIDmode, target,
33519 gen_lowpart (mode, op0)));
33523 gcc_unreachable ();
33527 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33528 all values variable, and none identical. */
33531 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33532 rtx target, rtx vals)
33534 rtx ops[32], op0, op1;
33535 enum machine_mode half_mode = VOIDmode;
33542 if (!mmx_ok && !TARGET_SSE)
33554 n = GET_MODE_NUNITS (mode);
33555 for (i = 0; i < n; i++)
33556 ops[i] = XVECEXP (vals, 0, i);
33557 ix86_expand_vector_init_concat (mode, target, ops, n);
33561 half_mode = V16QImode;
33565 half_mode = V8HImode;
33569 n = GET_MODE_NUNITS (mode);
33570 for (i = 0; i < n; i++)
33571 ops[i] = XVECEXP (vals, 0, i);
33572 op0 = gen_reg_rtx (half_mode);
33573 op1 = gen_reg_rtx (half_mode);
33574 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33576 ix86_expand_vector_init_interleave (half_mode, op1,
33577 &ops [n >> 1], n >> 2);
33578 emit_insn (gen_rtx_SET (VOIDmode, target,
33579 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33583 if (!TARGET_SSE4_1)
33591 /* Don't use ix86_expand_vector_init_interleave if we can't
33592 move from GPR to SSE register directly. */
33593 if (!TARGET_INTER_UNIT_MOVES)
33596 n = GET_MODE_NUNITS (mode);
33597 for (i = 0; i < n; i++)
33598 ops[i] = XVECEXP (vals, 0, i);
33599 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33607 gcc_unreachable ();
33611 int i, j, n_elts, n_words, n_elt_per_word;
33612 enum machine_mode inner_mode;
33613 rtx words[4], shift;
33615 inner_mode = GET_MODE_INNER (mode);
33616 n_elts = GET_MODE_NUNITS (mode);
33617 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33618 n_elt_per_word = n_elts / n_words;
33619 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33621 for (i = 0; i < n_words; ++i)
33623 rtx word = NULL_RTX;
33625 for (j = 0; j < n_elt_per_word; ++j)
33627 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33628 elt = convert_modes (word_mode, inner_mode, elt, true);
33634 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33635 word, 1, OPTAB_LIB_WIDEN);
33636 word = expand_simple_binop (word_mode, IOR, word, elt,
33637 word, 1, OPTAB_LIB_WIDEN);
33645 emit_move_insn (target, gen_lowpart (mode, words[0]));
33646 else if (n_words == 2)
33648 rtx tmp = gen_reg_rtx (mode);
33649 emit_clobber (tmp);
33650 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33651 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33652 emit_move_insn (target, tmp);
33654 else if (n_words == 4)
33656 rtx tmp = gen_reg_rtx (V4SImode);
33657 gcc_assert (word_mode == SImode);
33658 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33659 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33660 emit_move_insn (target, gen_lowpart (mode, tmp));
33663 gcc_unreachable ();
33667 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33668 instructions unless MMX_OK is true. */
33671 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33673 enum machine_mode mode = GET_MODE (target);
33674 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33675 int n_elts = GET_MODE_NUNITS (mode);
33676 int n_var = 0, one_var = -1;
33677 bool all_same = true, all_const_zero = true;
33681 for (i = 0; i < n_elts; ++i)
33683 x = XVECEXP (vals, 0, i);
33684 if (!(CONST_INT_P (x)
33685 || GET_CODE (x) == CONST_DOUBLE
33686 || GET_CODE (x) == CONST_FIXED))
33687 n_var++, one_var = i;
33688 else if (x != CONST0_RTX (inner_mode))
33689 all_const_zero = false;
33690 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33694 /* Constants are best loaded from the constant pool. */
33697 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33701 /* If all values are identical, broadcast the value. */
33703 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33704 XVECEXP (vals, 0, 0)))
33707 /* Values where only one field is non-constant are best loaded from
33708 the pool and overwritten via move later. */
33712 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
33713 XVECEXP (vals, 0, one_var),
33717 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
33721 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
33725 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
33727 enum machine_mode mode = GET_MODE (target);
33728 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33729 enum machine_mode half_mode;
33730 bool use_vec_merge = false;
33732 static rtx (*gen_extract[6][2]) (rtx, rtx)
33734 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
33735 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
33736 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
33737 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
33738 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
33739 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
33741 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
33743 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
33744 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
33745 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
33746 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
33747 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
33748 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
33758 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33759 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
33761 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33763 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33764 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33770 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
33774 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33775 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
33777 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33779 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33780 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33787 /* For the two element vectors, we implement a VEC_CONCAT with
33788 the extraction of the other element. */
33790 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
33791 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
33794 op0 = val, op1 = tmp;
33796 op0 = tmp, op1 = val;
33798 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
33799 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33804 use_vec_merge = TARGET_SSE4_1;
33811 use_vec_merge = true;
33815 /* tmp = target = A B C D */
33816 tmp = copy_to_reg (target);
33817 /* target = A A B B */
33818 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
33819 /* target = X A B B */
33820 ix86_expand_vector_set (false, target, val, 0);
33821 /* target = A X C D */
33822 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33823 const1_rtx, const0_rtx,
33824 GEN_INT (2+4), GEN_INT (3+4)));
33828 /* tmp = target = A B C D */
33829 tmp = copy_to_reg (target);
33830 /* tmp = X B C D */
33831 ix86_expand_vector_set (false, tmp, val, 0);
33832 /* target = A B X D */
33833 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33834 const0_rtx, const1_rtx,
33835 GEN_INT (0+4), GEN_INT (3+4)));
33839 /* tmp = target = A B C D */
33840 tmp = copy_to_reg (target);
33841 /* tmp = X B C D */
33842 ix86_expand_vector_set (false, tmp, val, 0);
33843 /* target = A B X D */
33844 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33845 const0_rtx, const1_rtx,
33846 GEN_INT (2+4), GEN_INT (0+4)));
33850 gcc_unreachable ();
33855 use_vec_merge = TARGET_SSE4_1;
33859 /* Element 0 handled by vec_merge below. */
33862 use_vec_merge = true;
33868 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33869 store into element 0, then shuffle them back. */
33873 order[0] = GEN_INT (elt);
33874 order[1] = const1_rtx;
33875 order[2] = const2_rtx;
33876 order[3] = GEN_INT (3);
33877 order[elt] = const0_rtx;
33879 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33880 order[1], order[2], order[3]));
33882 ix86_expand_vector_set (false, target, val, 0);
33884 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33885 order[1], order[2], order[3]));
33889 /* For SSE1, we have to reuse the V4SF code. */
33890 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
33891 gen_lowpart (SFmode, val), elt);
33896 use_vec_merge = TARGET_SSE2;
33899 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33903 use_vec_merge = TARGET_SSE4_1;
33910 half_mode = V16QImode;
33916 half_mode = V8HImode;
33922 half_mode = V4SImode;
33928 half_mode = V2DImode;
33934 half_mode = V4SFmode;
33940 half_mode = V2DFmode;
33946 /* Compute offset. */
33950 gcc_assert (i <= 1);
33952 /* Extract the half. */
33953 tmp = gen_reg_rtx (half_mode);
33954 emit_insn (gen_extract[j][i] (tmp, target));
33956 /* Put val in tmp at elt. */
33957 ix86_expand_vector_set (false, tmp, val, elt);
33960 emit_insn (gen_insert[j][i] (target, target, tmp));
33969 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
33970 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
33971 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33975 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
33977 emit_move_insn (mem, target);
33979 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
33980 emit_move_insn (tmp, val);
33982 emit_move_insn (target, mem);
33987 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
33989 enum machine_mode mode = GET_MODE (vec);
33990 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33991 bool use_vec_extr = false;
34004 use_vec_extr = true;
34008 use_vec_extr = TARGET_SSE4_1;
34020 tmp = gen_reg_rtx (mode);
34021 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
34022 GEN_INT (elt), GEN_INT (elt),
34023 GEN_INT (elt+4), GEN_INT (elt+4)));
34027 tmp = gen_reg_rtx (mode);
34028 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
34032 gcc_unreachable ();
34035 use_vec_extr = true;
34040 use_vec_extr = TARGET_SSE4_1;
34054 tmp = gen_reg_rtx (mode);
34055 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
34056 GEN_INT (elt), GEN_INT (elt),
34057 GEN_INT (elt), GEN_INT (elt)));
34061 tmp = gen_reg_rtx (mode);
34062 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
34066 gcc_unreachable ();
34069 use_vec_extr = true;
34074 /* For SSE1, we have to reuse the V4SF code. */
34075 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
34076 gen_lowpart (V4SFmode, vec), elt);
34082 use_vec_extr = TARGET_SSE2;
34085 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34089 use_vec_extr = TARGET_SSE4_1;
34095 tmp = gen_reg_rtx (V4SFmode);
34097 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
34099 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
34100 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34108 tmp = gen_reg_rtx (V2DFmode);
34110 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
34112 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
34113 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34121 tmp = gen_reg_rtx (V16QImode);
34123 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
34125 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
34126 ix86_expand_vector_extract (false, target, tmp, elt & 15);
34134 tmp = gen_reg_rtx (V8HImode);
34136 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
34138 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
34139 ix86_expand_vector_extract (false, target, tmp, elt & 7);
34147 tmp = gen_reg_rtx (V4SImode);
34149 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
34151 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
34152 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34160 tmp = gen_reg_rtx (V2DImode);
34162 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
34164 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
34165 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34171 /* ??? Could extract the appropriate HImode element and shift. */
34178 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
34179 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
34181 /* Let the rtl optimizers know about the zero extension performed. */
34182 if (inner_mode == QImode || inner_mode == HImode)
34184 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
34185 target = gen_lowpart (SImode, target);
34188 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34192 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34194 emit_move_insn (mem, vec);
34196 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34197 emit_move_insn (target, tmp);
34201 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34202 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34203 The upper bits of DEST are undefined, though they shouldn't cause
34204 exceptions (some bits from src or all zeros are ok). */
34207 emit_reduc_half (rtx dest, rtx src, int i)
34210 switch (GET_MODE (src))
34214 tem = gen_sse_movhlps (dest, src, src);
34216 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34217 GEN_INT (1 + 4), GEN_INT (1 + 4));
34220 tem = gen_vec_interleave_highv2df (dest, src, src);
34226 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34227 gen_lowpart (V1TImode, src),
34232 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34234 tem = gen_avx_shufps256 (dest, src, src,
34235 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34239 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34241 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34248 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34249 gen_lowpart (V4DImode, src),
34250 gen_lowpart (V4DImode, src),
34253 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34254 gen_lowpart (V2TImode, src),
34258 gcc_unreachable ();
34263 /* Expand a vector reduction. FN is the binary pattern to reduce;
34264 DEST is the destination; IN is the input vector. */
34267 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34269 rtx half, dst, vec = in;
34270 enum machine_mode mode = GET_MODE (in);
34273 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34275 && mode == V8HImode
34276 && fn == gen_uminv8hi3)
34278 emit_insn (gen_sse4_1_phminposuw (dest, in));
34282 for (i = GET_MODE_BITSIZE (mode);
34283 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34286 half = gen_reg_rtx (mode);
34287 emit_reduc_half (half, vec, i);
34288 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34291 dst = gen_reg_rtx (mode);
34292 emit_insn (fn (dst, half, vec));
34297 /* Target hook for scalar_mode_supported_p. */
34299 ix86_scalar_mode_supported_p (enum machine_mode mode)
34301 if (DECIMAL_FLOAT_MODE_P (mode))
34302 return default_decimal_float_supported_p ();
34303 else if (mode == TFmode)
34306 return default_scalar_mode_supported_p (mode);
34309 /* Implements target hook vector_mode_supported_p. */
34311 ix86_vector_mode_supported_p (enum machine_mode mode)
34313 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34315 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34317 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34319 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34321 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34326 /* Target hook for c_mode_for_suffix. */
34327 static enum machine_mode
34328 ix86_c_mode_for_suffix (char suffix)
34338 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34340 We do this in the new i386 backend to maintain source compatibility
34341 with the old cc0-based compiler. */
34344 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34345 tree inputs ATTRIBUTE_UNUSED,
34348 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34350 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34355 /* Implements target vector targetm.asm.encode_section_info. */
34357 static void ATTRIBUTE_UNUSED
34358 ix86_encode_section_info (tree decl, rtx rtl, int first)
34360 default_encode_section_info (decl, rtl, first);
34362 if (TREE_CODE (decl) == VAR_DECL
34363 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34364 && ix86_in_large_data_p (decl))
34365 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34368 /* Worker function for REVERSE_CONDITION. */
34371 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34373 return (mode != CCFPmode && mode != CCFPUmode
34374 ? reverse_condition (code)
34375 : reverse_condition_maybe_unordered (code));
34378 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34382 output_387_reg_move (rtx insn, rtx *operands)
34384 if (REG_P (operands[0]))
34386 if (REG_P (operands[1])
34387 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34389 if (REGNO (operands[0]) == FIRST_STACK_REG)
34390 return output_387_ffreep (operands, 0);
34391 return "fstp\t%y0";
34393 if (STACK_TOP_P (operands[0]))
34394 return "fld%Z1\t%y1";
34397 else if (MEM_P (operands[0]))
34399 gcc_assert (REG_P (operands[1]));
34400 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34401 return "fstp%Z0\t%y0";
34404 /* There is no non-popping store to memory for XFmode.
34405 So if we need one, follow the store with a load. */
34406 if (GET_MODE (operands[0]) == XFmode)
34407 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34409 return "fst%Z0\t%y0";
34416 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34417 FP status register is set. */
34420 ix86_emit_fp_unordered_jump (rtx label)
34422 rtx reg = gen_reg_rtx (HImode);
34425 emit_insn (gen_x86_fnstsw_1 (reg));
34427 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34429 emit_insn (gen_x86_sahf_1 (reg));
34431 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34432 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34436 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34438 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34439 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34442 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34443 gen_rtx_LABEL_REF (VOIDmode, label),
34445 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34447 emit_jump_insn (temp);
34448 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34451 /* Output code to perform a log1p XFmode calculation. */
34453 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34455 rtx label1 = gen_label_rtx ();
34456 rtx label2 = gen_label_rtx ();
34458 rtx tmp = gen_reg_rtx (XFmode);
34459 rtx tmp2 = gen_reg_rtx (XFmode);
34462 emit_insn (gen_absxf2 (tmp, op1));
34463 test = gen_rtx_GE (VOIDmode, tmp,
34464 CONST_DOUBLE_FROM_REAL_VALUE (
34465 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34467 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34469 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34470 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34471 emit_jump (label2);
34473 emit_label (label1);
34474 emit_move_insn (tmp, CONST1_RTX (XFmode));
34475 emit_insn (gen_addxf3 (tmp, op1, tmp));
34476 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34477 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34479 emit_label (label2);
34482 /* Emit code for round calculation. */
34483 void ix86_emit_i387_round (rtx op0, rtx op1)
34485 enum machine_mode inmode = GET_MODE (op1);
34486 enum machine_mode outmode = GET_MODE (op0);
34487 rtx e1, e2, res, tmp, tmp1, half;
34488 rtx scratch = gen_reg_rtx (HImode);
34489 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34490 rtx jump_label = gen_label_rtx ();
34492 rtx (*gen_abs) (rtx, rtx);
34493 rtx (*gen_neg) (rtx, rtx);
34498 gen_abs = gen_abssf2;
34501 gen_abs = gen_absdf2;
34504 gen_abs = gen_absxf2;
34507 gcc_unreachable ();
34513 gen_neg = gen_negsf2;
34516 gen_neg = gen_negdf2;
34519 gen_neg = gen_negxf2;
34522 gen_neg = gen_neghi2;
34525 gen_neg = gen_negsi2;
34528 gen_neg = gen_negdi2;
34531 gcc_unreachable ();
34534 e1 = gen_reg_rtx (inmode);
34535 e2 = gen_reg_rtx (inmode);
34536 res = gen_reg_rtx (outmode);
34538 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34540 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34542 /* scratch = fxam(op1) */
34543 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34544 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34546 /* e1 = fabs(op1) */
34547 emit_insn (gen_abs (e1, op1));
34549 /* e2 = e1 + 0.5 */
34550 half = force_reg (inmode, half);
34551 emit_insn (gen_rtx_SET (VOIDmode, e2,
34552 gen_rtx_PLUS (inmode, e1, half)));
34554 /* res = floor(e2) */
34555 if (inmode != XFmode)
34557 tmp1 = gen_reg_rtx (XFmode);
34559 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34560 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34570 rtx tmp0 = gen_reg_rtx (XFmode);
34572 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34574 emit_insn (gen_rtx_SET (VOIDmode, res,
34575 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34576 UNSPEC_TRUNC_NOOP)));
34580 emit_insn (gen_frndintxf2_floor (res, tmp1));
34583 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34586 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34589 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34592 gcc_unreachable ();
34595 /* flags = signbit(a) */
34596 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34598 /* if (flags) then res = -res */
34599 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34600 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34601 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34603 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34604 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34605 JUMP_LABEL (insn) = jump_label;
34607 emit_insn (gen_neg (res, res));
34609 emit_label (jump_label);
34610 LABEL_NUSES (jump_label) = 1;
34612 emit_move_insn (op0, res);
34615 /* Output code to perform a Newton-Rhapson approximation of a single precision
34616 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34618 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34620 rtx x0, x1, e0, e1;
34622 x0 = gen_reg_rtx (mode);
34623 e0 = gen_reg_rtx (mode);
34624 e1 = gen_reg_rtx (mode);
34625 x1 = gen_reg_rtx (mode);
34627 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34629 b = force_reg (mode, b);
34631 /* x0 = rcp(b) estimate */
34632 emit_insn (gen_rtx_SET (VOIDmode, x0,
34633 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34636 emit_insn (gen_rtx_SET (VOIDmode, e0,
34637 gen_rtx_MULT (mode, x0, b)));
34640 emit_insn (gen_rtx_SET (VOIDmode, e0,
34641 gen_rtx_MULT (mode, x0, e0)));
34644 emit_insn (gen_rtx_SET (VOIDmode, e1,
34645 gen_rtx_PLUS (mode, x0, x0)));
34648 emit_insn (gen_rtx_SET (VOIDmode, x1,
34649 gen_rtx_MINUS (mode, e1, e0)));
34652 emit_insn (gen_rtx_SET (VOIDmode, res,
34653 gen_rtx_MULT (mode, a, x1)));
34656 /* Output code to perform a Newton-Rhapson approximation of a
34657 single precision floating point [reciprocal] square root. */
34659 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34662 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34665 x0 = gen_reg_rtx (mode);
34666 e0 = gen_reg_rtx (mode);
34667 e1 = gen_reg_rtx (mode);
34668 e2 = gen_reg_rtx (mode);
34669 e3 = gen_reg_rtx (mode);
34671 real_from_integer (&r, VOIDmode, -3, -1, 0);
34672 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34674 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34675 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34677 if (VECTOR_MODE_P (mode))
34679 mthree = ix86_build_const_vector (mode, true, mthree);
34680 mhalf = ix86_build_const_vector (mode, true, mhalf);
34683 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34684 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34686 a = force_reg (mode, a);
34688 /* x0 = rsqrt(a) estimate */
34689 emit_insn (gen_rtx_SET (VOIDmode, x0,
34690 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34693 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34698 zero = gen_reg_rtx (mode);
34699 mask = gen_reg_rtx (mode);
34701 zero = force_reg (mode, CONST0_RTX(mode));
34702 emit_insn (gen_rtx_SET (VOIDmode, mask,
34703 gen_rtx_NE (mode, zero, a)));
34705 emit_insn (gen_rtx_SET (VOIDmode, x0,
34706 gen_rtx_AND (mode, x0, mask)));
34710 emit_insn (gen_rtx_SET (VOIDmode, e0,
34711 gen_rtx_MULT (mode, x0, a)));
34713 emit_insn (gen_rtx_SET (VOIDmode, e1,
34714 gen_rtx_MULT (mode, e0, x0)));
34717 mthree = force_reg (mode, mthree);
34718 emit_insn (gen_rtx_SET (VOIDmode, e2,
34719 gen_rtx_PLUS (mode, e1, mthree)));
34721 mhalf = force_reg (mode, mhalf);
34723 /* e3 = -.5 * x0 */
34724 emit_insn (gen_rtx_SET (VOIDmode, e3,
34725 gen_rtx_MULT (mode, x0, mhalf)));
34727 /* e3 = -.5 * e0 */
34728 emit_insn (gen_rtx_SET (VOIDmode, e3,
34729 gen_rtx_MULT (mode, e0, mhalf)));
34730 /* ret = e2 * e3 */
34731 emit_insn (gen_rtx_SET (VOIDmode, res,
34732 gen_rtx_MULT (mode, e2, e3)));
34735 #ifdef TARGET_SOLARIS
34736 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34739 i386_solaris_elf_named_section (const char *name, unsigned int flags,
34742 /* With Binutils 2.15, the "@unwind" marker must be specified on
34743 every occurrence of the ".eh_frame" section, not just the first
34746 && strcmp (name, ".eh_frame") == 0)
34748 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
34749 flags & SECTION_WRITE ? "aw" : "a");
34754 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
34756 solaris_elf_asm_comdat_section (name, flags, decl);
34761 default_elf_asm_named_section (name, flags, decl);
34763 #endif /* TARGET_SOLARIS */
34765 /* Return the mangling of TYPE if it is an extended fundamental type. */
34767 static const char *
34768 ix86_mangle_type (const_tree type)
34770 type = TYPE_MAIN_VARIANT (type);
34772 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34773 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34776 switch (TYPE_MODE (type))
34779 /* __float128 is "g". */
34782 /* "long double" or __float80 is "e". */
34789 /* For 32-bit code we can save PIC register setup by using
34790 __stack_chk_fail_local hidden function instead of calling
34791 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34792 register, so it is better to call __stack_chk_fail directly. */
34794 static tree ATTRIBUTE_UNUSED
34795 ix86_stack_protect_fail (void)
34797 return TARGET_64BIT
34798 ? default_external_stack_protect_fail ()
34799 : default_hidden_stack_protect_fail ();
34802 /* Select a format to encode pointers in exception handling data. CODE
34803 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34804 true if the symbol may be affected by dynamic relocations.
34806 ??? All x86 object file formats are capable of representing this.
34807 After all, the relocation needed is the same as for the call insn.
34808 Whether or not a particular assembler allows us to enter such, I
34809 guess we'll have to see. */
34811 asm_preferred_eh_data_format (int code, int global)
34815 int type = DW_EH_PE_sdata8;
34817 || ix86_cmodel == CM_SMALL_PIC
34818 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
34819 type = DW_EH_PE_sdata4;
34820 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
34822 if (ix86_cmodel == CM_SMALL
34823 || (ix86_cmodel == CM_MEDIUM && code))
34824 return DW_EH_PE_udata4;
34825 return DW_EH_PE_absptr;
34828 /* Expand copysign from SIGN to the positive value ABS_VALUE
34829 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34832 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
34834 enum machine_mode mode = GET_MODE (sign);
34835 rtx sgn = gen_reg_rtx (mode);
34836 if (mask == NULL_RTX)
34838 enum machine_mode vmode;
34840 if (mode == SFmode)
34842 else if (mode == DFmode)
34847 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
34848 if (!VECTOR_MODE_P (mode))
34850 /* We need to generate a scalar mode mask in this case. */
34851 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34852 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34853 mask = gen_reg_rtx (mode);
34854 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34858 mask = gen_rtx_NOT (mode, mask);
34859 emit_insn (gen_rtx_SET (VOIDmode, sgn,
34860 gen_rtx_AND (mode, mask, sign)));
34861 emit_insn (gen_rtx_SET (VOIDmode, result,
34862 gen_rtx_IOR (mode, abs_value, sgn)));
34865 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34866 mask for masking out the sign-bit is stored in *SMASK, if that is
34869 ix86_expand_sse_fabs (rtx op0, rtx *smask)
34871 enum machine_mode vmode, mode = GET_MODE (op0);
34874 xa = gen_reg_rtx (mode);
34875 if (mode == SFmode)
34877 else if (mode == DFmode)
34881 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
34882 if (!VECTOR_MODE_P (mode))
34884 /* We need to generate a scalar mode mask in this case. */
34885 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34886 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34887 mask = gen_reg_rtx (mode);
34888 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34890 emit_insn (gen_rtx_SET (VOIDmode, xa,
34891 gen_rtx_AND (mode, op0, mask)));
34899 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34900 swapping the operands if SWAP_OPERANDS is true. The expanded
34901 code is a forward jump to a newly created label in case the
34902 comparison is true. The generated label rtx is returned. */
34904 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
34905 bool swap_operands)
34916 label = gen_label_rtx ();
34917 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
34918 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34919 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
34920 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
34921 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
34922 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
34923 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34924 JUMP_LABEL (tmp) = label;
34929 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34930 using comparison code CODE. Operands are swapped for the comparison if
34931 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34933 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
34934 bool swap_operands)
34936 rtx (*insn)(rtx, rtx, rtx, rtx);
34937 enum machine_mode mode = GET_MODE (op0);
34938 rtx mask = gen_reg_rtx (mode);
34947 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
34949 emit_insn (insn (mask, op0, op1,
34950 gen_rtx_fmt_ee (code, mode, op0, op1)));
34954 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34955 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34957 ix86_gen_TWO52 (enum machine_mode mode)
34959 REAL_VALUE_TYPE TWO52r;
34962 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
34963 TWO52 = const_double_from_real_value (TWO52r, mode);
34964 TWO52 = force_reg (mode, TWO52);
34969 /* Expand SSE sequence for computing lround from OP1 storing
34972 ix86_expand_lround (rtx op0, rtx op1)
34974 /* C code for the stuff we're doing below:
34975 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34978 enum machine_mode mode = GET_MODE (op1);
34979 const struct real_format *fmt;
34980 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34983 /* load nextafter (0.5, 0.0) */
34984 fmt = REAL_MODE_FORMAT (mode);
34985 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34986 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34988 /* adj = copysign (0.5, op1) */
34989 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
34990 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
34992 /* adj = op1 + adj */
34993 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
34995 /* op0 = (imode)adj */
34996 expand_fix (op0, adj, 0);
34999 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35002 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
35004 /* C code for the stuff we're doing below (for do_floor):
35006 xi -= (double)xi > op1 ? 1 : 0;
35009 enum machine_mode fmode = GET_MODE (op1);
35010 enum machine_mode imode = GET_MODE (op0);
35011 rtx ireg, freg, label, tmp;
35013 /* reg = (long)op1 */
35014 ireg = gen_reg_rtx (imode);
35015 expand_fix (ireg, op1, 0);
35017 /* freg = (double)reg */
35018 freg = gen_reg_rtx (fmode);
35019 expand_float (freg, ireg, 0);
35021 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35022 label = ix86_expand_sse_compare_and_jump (UNLE,
35023 freg, op1, !do_floor);
35024 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
35025 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
35026 emit_move_insn (ireg, tmp);
35028 emit_label (label);
35029 LABEL_NUSES (label) = 1;
35031 emit_move_insn (op0, ireg);
35034 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35035 result in OPERAND0. */
35037 ix86_expand_rint (rtx operand0, rtx operand1)
35039 /* C code for the stuff we're doing below:
35040 xa = fabs (operand1);
35041 if (!isless (xa, 2**52))
35043 xa = xa + 2**52 - 2**52;
35044 return copysign (xa, operand1);
35046 enum machine_mode mode = GET_MODE (operand0);
35047 rtx res, xa, label, TWO52, mask;
35049 res = gen_reg_rtx (mode);
35050 emit_move_insn (res, operand1);
35052 /* xa = abs (operand1) */
35053 xa = ix86_expand_sse_fabs (res, &mask);
35055 /* if (!isless (xa, TWO52)) goto label; */
35056 TWO52 = ix86_gen_TWO52 (mode);
35057 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35059 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35060 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35062 ix86_sse_copysign_to_positive (res, xa, res, mask);
35064 emit_label (label);
35065 LABEL_NUSES (label) = 1;
35067 emit_move_insn (operand0, res);
35070 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35073 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
35075 /* C code for the stuff we expand below.
35076 double xa = fabs (x), x2;
35077 if (!isless (xa, TWO52))
35079 xa = xa + TWO52 - TWO52;
35080 x2 = copysign (xa, x);
35089 enum machine_mode mode = GET_MODE (operand0);
35090 rtx xa, TWO52, tmp, label, one, res, mask;
35092 TWO52 = ix86_gen_TWO52 (mode);
35094 /* Temporary for holding the result, initialized to the input
35095 operand to ease control flow. */
35096 res = gen_reg_rtx (mode);
35097 emit_move_insn (res, operand1);
35099 /* xa = abs (operand1) */
35100 xa = ix86_expand_sse_fabs (res, &mask);
35102 /* if (!isless (xa, TWO52)) goto label; */
35103 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35105 /* xa = xa + TWO52 - TWO52; */
35106 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35107 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35109 /* xa = copysign (xa, operand1) */
35110 ix86_sse_copysign_to_positive (xa, xa, res, mask);
35112 /* generate 1.0 or -1.0 */
35113 one = force_reg (mode,
35114 const_double_from_real_value (do_floor
35115 ? dconst1 : dconstm1, mode));
35117 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35118 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35119 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35120 gen_rtx_AND (mode, one, tmp)));
35121 /* We always need to subtract here to preserve signed zero. */
35122 tmp = expand_simple_binop (mode, MINUS,
35123 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35124 emit_move_insn (res, tmp);
35126 emit_label (label);
35127 LABEL_NUSES (label) = 1;
35129 emit_move_insn (operand0, res);
35132 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35135 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
35137 /* C code for the stuff we expand below.
35138 double xa = fabs (x), x2;
35139 if (!isless (xa, TWO52))
35141 x2 = (double)(long)x;
35148 if (HONOR_SIGNED_ZEROS (mode))
35149 return copysign (x2, x);
35152 enum machine_mode mode = GET_MODE (operand0);
35153 rtx xa, xi, TWO52, tmp, label, one, res, mask;
35155 TWO52 = ix86_gen_TWO52 (mode);
35157 /* Temporary for holding the result, initialized to the input
35158 operand to ease control flow. */
35159 res = gen_reg_rtx (mode);
35160 emit_move_insn (res, operand1);
35162 /* xa = abs (operand1) */
35163 xa = ix86_expand_sse_fabs (res, &mask);
35165 /* if (!isless (xa, TWO52)) goto label; */
35166 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35168 /* xa = (double)(long)x */
35169 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35170 expand_fix (xi, res, 0);
35171 expand_float (xa, xi, 0);
35174 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35176 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35177 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35178 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35179 gen_rtx_AND (mode, one, tmp)));
35180 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
35181 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35182 emit_move_insn (res, tmp);
35184 if (HONOR_SIGNED_ZEROS (mode))
35185 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35187 emit_label (label);
35188 LABEL_NUSES (label) = 1;
35190 emit_move_insn (operand0, res);
35193 /* Expand SSE sequence for computing round from OPERAND1 storing
35194 into OPERAND0. Sequence that works without relying on DImode truncation
35195 via cvttsd2siq that is only available on 64bit targets. */
35197 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
35199 /* C code for the stuff we expand below.
35200 double xa = fabs (x), xa2, x2;
35201 if (!isless (xa, TWO52))
35203 Using the absolute value and copying back sign makes
35204 -0.0 -> -0.0 correct.
35205 xa2 = xa + TWO52 - TWO52;
35210 else if (dxa > 0.5)
35212 x2 = copysign (xa2, x);
35215 enum machine_mode mode = GET_MODE (operand0);
35216 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35218 TWO52 = ix86_gen_TWO52 (mode);
35220 /* Temporary for holding the result, initialized to the input
35221 operand to ease control flow. */
35222 res = gen_reg_rtx (mode);
35223 emit_move_insn (res, operand1);
35225 /* xa = abs (operand1) */
35226 xa = ix86_expand_sse_fabs (res, &mask);
35228 /* if (!isless (xa, TWO52)) goto label; */
35229 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35231 /* xa2 = xa + TWO52 - TWO52; */
35232 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35233 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35235 /* dxa = xa2 - xa; */
35236 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35238 /* generate 0.5, 1.0 and -0.5 */
35239 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35240 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35241 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35245 tmp = gen_reg_rtx (mode);
35246 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35247 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35248 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35249 gen_rtx_AND (mode, one, tmp)));
35250 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35251 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35252 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35253 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35254 gen_rtx_AND (mode, one, tmp)));
35255 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35257 /* res = copysign (xa2, operand1) */
35258 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35260 emit_label (label);
35261 LABEL_NUSES (label) = 1;
35263 emit_move_insn (operand0, res);
35266 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35269 ix86_expand_trunc (rtx operand0, rtx operand1)
35271 /* C code for SSE variant we expand below.
35272 double xa = fabs (x), x2;
35273 if (!isless (xa, TWO52))
35275 x2 = (double)(long)x;
35276 if (HONOR_SIGNED_ZEROS (mode))
35277 return copysign (x2, x);
35280 enum machine_mode mode = GET_MODE (operand0);
35281 rtx xa, xi, TWO52, label, res, mask;
35283 TWO52 = ix86_gen_TWO52 (mode);
35285 /* Temporary for holding the result, initialized to the input
35286 operand to ease control flow. */
35287 res = gen_reg_rtx (mode);
35288 emit_move_insn (res, operand1);
35290 /* xa = abs (operand1) */
35291 xa = ix86_expand_sse_fabs (res, &mask);
35293 /* if (!isless (xa, TWO52)) goto label; */
35294 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35296 /* x = (double)(long)x */
35297 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35298 expand_fix (xi, res, 0);
35299 expand_float (res, xi, 0);
35301 if (HONOR_SIGNED_ZEROS (mode))
35302 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35304 emit_label (label);
35305 LABEL_NUSES (label) = 1;
35307 emit_move_insn (operand0, res);
35310 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35313 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35315 enum machine_mode mode = GET_MODE (operand0);
35316 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35318 /* C code for SSE variant we expand below.
35319 double xa = fabs (x), x2;
35320 if (!isless (xa, TWO52))
35322 xa2 = xa + TWO52 - TWO52;
35326 x2 = copysign (xa2, x);
35330 TWO52 = ix86_gen_TWO52 (mode);
35332 /* Temporary for holding the result, initialized to the input
35333 operand to ease control flow. */
35334 res = gen_reg_rtx (mode);
35335 emit_move_insn (res, operand1);
35337 /* xa = abs (operand1) */
35338 xa = ix86_expand_sse_fabs (res, &smask);
35340 /* if (!isless (xa, TWO52)) goto label; */
35341 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35343 /* res = xa + TWO52 - TWO52; */
35344 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35345 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35346 emit_move_insn (res, tmp);
35349 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35351 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35352 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35353 emit_insn (gen_rtx_SET (VOIDmode, mask,
35354 gen_rtx_AND (mode, mask, one)));
35355 tmp = expand_simple_binop (mode, MINUS,
35356 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35357 emit_move_insn (res, tmp);
35359 /* res = copysign (res, operand1) */
35360 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35362 emit_label (label);
35363 LABEL_NUSES (label) = 1;
35365 emit_move_insn (operand0, res);
35368 /* Expand SSE sequence for computing round from OPERAND1 storing
35371 ix86_expand_round (rtx operand0, rtx operand1)
35373 /* C code for the stuff we're doing below:
35374 double xa = fabs (x);
35375 if (!isless (xa, TWO52))
35377 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35378 return copysign (xa, x);
35380 enum machine_mode mode = GET_MODE (operand0);
35381 rtx res, TWO52, xa, label, xi, half, mask;
35382 const struct real_format *fmt;
35383 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35385 /* Temporary for holding the result, initialized to the input
35386 operand to ease control flow. */
35387 res = gen_reg_rtx (mode);
35388 emit_move_insn (res, operand1);
35390 TWO52 = ix86_gen_TWO52 (mode);
35391 xa = ix86_expand_sse_fabs (res, &mask);
35392 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35394 /* load nextafter (0.5, 0.0) */
35395 fmt = REAL_MODE_FORMAT (mode);
35396 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35397 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35399 /* xa = xa + 0.5 */
35400 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35401 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35403 /* xa = (double)(int64_t)xa */
35404 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35405 expand_fix (xi, xa, 0);
35406 expand_float (xa, xi, 0);
35408 /* res = copysign (xa, operand1) */
35409 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35411 emit_label (label);
35412 LABEL_NUSES (label) = 1;
35414 emit_move_insn (operand0, res);
35417 /* Expand SSE sequence for computing round
35418 from OP1 storing into OP0 using sse4 round insn. */
35420 ix86_expand_round_sse4 (rtx op0, rtx op1)
35422 enum machine_mode mode = GET_MODE (op0);
35423 rtx e1, e2, res, half;
35424 const struct real_format *fmt;
35425 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35426 rtx (*gen_copysign) (rtx, rtx, rtx);
35427 rtx (*gen_round) (rtx, rtx, rtx);
35432 gen_copysign = gen_copysignsf3;
35433 gen_round = gen_sse4_1_roundsf2;
35436 gen_copysign = gen_copysigndf3;
35437 gen_round = gen_sse4_1_rounddf2;
35440 gcc_unreachable ();
35443 /* round (a) = trunc (a + copysign (0.5, a)) */
35445 /* load nextafter (0.5, 0.0) */
35446 fmt = REAL_MODE_FORMAT (mode);
35447 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35448 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35449 half = const_double_from_real_value (pred_half, mode);
35451 /* e1 = copysign (0.5, op1) */
35452 e1 = gen_reg_rtx (mode);
35453 emit_insn (gen_copysign (e1, half, op1));
35455 /* e2 = op1 + e1 */
35456 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35458 /* res = trunc (e2) */
35459 res = gen_reg_rtx (mode);
35460 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35462 emit_move_insn (op0, res);
35466 /* Table of valid machine attributes. */
35467 static const struct attribute_spec ix86_attribute_table[] =
35469 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35470 affects_type_identity } */
35471 /* Stdcall attribute says callee is responsible for popping arguments
35472 if they are not variable. */
35473 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35475 /* Fastcall attribute says callee is responsible for popping arguments
35476 if they are not variable. */
35477 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35479 /* Thiscall attribute says callee is responsible for popping arguments
35480 if they are not variable. */
35481 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35483 /* Cdecl attribute says the callee is a normal C declaration */
35484 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35486 /* Regparm attribute specifies how many integer arguments are to be
35487 passed in registers. */
35488 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35490 /* Sseregparm attribute says we are using x86_64 calling conventions
35491 for FP arguments. */
35492 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35494 /* The transactional memory builtins are implicitly regparm or fastcall
35495 depending on the ABI. Override the generic do-nothing attribute that
35496 these builtins were declared with. */
35497 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35499 /* force_align_arg_pointer says this function realigns the stack at entry. */
35500 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35501 false, true, true, ix86_handle_cconv_attribute, false },
35502 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35503 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35504 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35505 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35508 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35510 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35512 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35513 SUBTARGET_ATTRIBUTE_TABLE,
35515 /* ms_abi and sysv_abi calling convention function attributes. */
35516 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35517 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35518 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35520 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35521 ix86_handle_callee_pop_aggregate_return, true },
35523 { NULL, 0, 0, false, false, false, NULL, false }
35526 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35528 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35529 tree vectype ATTRIBUTE_UNUSED,
35530 int misalign ATTRIBUTE_UNUSED)
35532 switch (type_of_cost)
35535 return ix86_cost->scalar_stmt_cost;
35538 return ix86_cost->scalar_load_cost;
35541 return ix86_cost->scalar_store_cost;
35544 return ix86_cost->vec_stmt_cost;
35547 return ix86_cost->vec_align_load_cost;
35550 return ix86_cost->vec_store_cost;
35552 case vec_to_scalar:
35553 return ix86_cost->vec_to_scalar_cost;
35555 case scalar_to_vec:
35556 return ix86_cost->scalar_to_vec_cost;
35558 case unaligned_load:
35559 case unaligned_store:
35560 return ix86_cost->vec_unalign_load_cost;
35562 case cond_branch_taken:
35563 return ix86_cost->cond_taken_branch_cost;
35565 case cond_branch_not_taken:
35566 return ix86_cost->cond_not_taken_branch_cost;
35569 case vec_promote_demote:
35570 return ix86_cost->vec_stmt_cost;
35573 gcc_unreachable ();
35577 /* Construct (set target (vec_select op0 (parallel perm))) and
35578 return true if that's a valid instruction in the active ISA. */
35581 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35583 rtx rperm[MAX_VECT_LEN], x;
35586 for (i = 0; i < nelt; ++i)
35587 rperm[i] = GEN_INT (perm[i]);
35589 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35590 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35591 x = gen_rtx_SET (VOIDmode, target, x);
35594 if (recog_memoized (x) < 0)
35602 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35605 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35606 const unsigned char *perm, unsigned nelt)
35608 enum machine_mode v2mode;
35611 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
35612 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35613 return expand_vselect (target, x, perm, nelt);
35616 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35617 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35620 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35622 enum machine_mode vmode = d->vmode;
35623 unsigned i, mask, nelt = d->nelt;
35624 rtx target, op0, op1, x;
35625 rtx rperm[32], vperm;
35627 if (d->op0 == d->op1)
35629 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35631 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35633 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35638 /* This is a blend, not a permute. Elements must stay in their
35639 respective lanes. */
35640 for (i = 0; i < nelt; ++i)
35642 unsigned e = d->perm[i];
35643 if (!(e == i || e == i + nelt))
35650 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35651 decision should be extracted elsewhere, so that we only try that
35652 sequence once all budget==3 options have been tried. */
35653 target = d->target;
35666 for (i = 0; i < nelt; ++i)
35667 mask |= (d->perm[i] >= nelt) << i;
35671 for (i = 0; i < 2; ++i)
35672 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35677 for (i = 0; i < 4; ++i)
35678 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35683 /* See if bytes move in pairs so we can use pblendw with
35684 an immediate argument, rather than pblendvb with a vector
35686 for (i = 0; i < 16; i += 2)
35687 if (d->perm[i] + 1 != d->perm[i + 1])
35690 for (i = 0; i < nelt; ++i)
35691 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
35694 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
35695 vperm = force_reg (vmode, vperm);
35697 if (GET_MODE_SIZE (vmode) == 16)
35698 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
35700 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
35704 for (i = 0; i < 8; ++i)
35705 mask |= (d->perm[i * 2] >= 16) << i;
35710 target = gen_lowpart (vmode, target);
35711 op0 = gen_lowpart (vmode, op0);
35712 op1 = gen_lowpart (vmode, op1);
35716 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35717 for (i = 0; i < 32; i += 2)
35718 if (d->perm[i] + 1 != d->perm[i + 1])
35720 /* See if bytes move in quadruplets. If yes, vpblendd
35721 with immediate can be used. */
35722 for (i = 0; i < 32; i += 4)
35723 if (d->perm[i] + 2 != d->perm[i + 2])
35727 /* See if bytes move the same in both lanes. If yes,
35728 vpblendw with immediate can be used. */
35729 for (i = 0; i < 16; i += 2)
35730 if (d->perm[i] + 16 != d->perm[i + 16])
35733 /* Use vpblendw. */
35734 for (i = 0; i < 16; ++i)
35735 mask |= (d->perm[i * 2] >= 32) << i;
35740 /* Use vpblendd. */
35741 for (i = 0; i < 8; ++i)
35742 mask |= (d->perm[i * 4] >= 32) << i;
35747 /* See if words move in pairs. If yes, vpblendd can be used. */
35748 for (i = 0; i < 16; i += 2)
35749 if (d->perm[i] + 1 != d->perm[i + 1])
35753 /* See if words move the same in both lanes. If not,
35754 vpblendvb must be used. */
35755 for (i = 0; i < 8; i++)
35756 if (d->perm[i] + 8 != d->perm[i + 8])
35758 /* Use vpblendvb. */
35759 for (i = 0; i < 32; ++i)
35760 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
35764 target = gen_lowpart (vmode, target);
35765 op0 = gen_lowpart (vmode, op0);
35766 op1 = gen_lowpart (vmode, op1);
35767 goto finish_pblendvb;
35770 /* Use vpblendw. */
35771 for (i = 0; i < 16; ++i)
35772 mask |= (d->perm[i] >= 16) << i;
35776 /* Use vpblendd. */
35777 for (i = 0; i < 8; ++i)
35778 mask |= (d->perm[i * 2] >= 16) << i;
35783 /* Use vpblendd. */
35784 for (i = 0; i < 4; ++i)
35785 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35790 gcc_unreachable ();
35793 /* This matches five different patterns with the different modes. */
35794 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
35795 x = gen_rtx_SET (VOIDmode, target, x);
35801 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35802 in terms of the variable form of vpermilps.
35804 Note that we will have already failed the immediate input vpermilps,
35805 which requires that the high and low part shuffle be identical; the
35806 variable form doesn't require that. */
35809 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
35811 rtx rperm[8], vperm;
35814 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
35817 /* We can only permute within the 128-bit lane. */
35818 for (i = 0; i < 8; ++i)
35820 unsigned e = d->perm[i];
35821 if (i < 4 ? e >= 4 : e < 4)
35828 for (i = 0; i < 8; ++i)
35830 unsigned e = d->perm[i];
35832 /* Within each 128-bit lane, the elements of op0 are numbered
35833 from 0 and the elements of op1 are numbered from 4. */
35839 rperm[i] = GEN_INT (e);
35842 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
35843 vperm = force_reg (V8SImode, vperm);
35844 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
35849 /* Return true if permutation D can be performed as VMODE permutation
35853 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
35855 unsigned int i, j, chunk;
35857 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
35858 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
35859 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
35862 if (GET_MODE_NUNITS (vmode) >= d->nelt)
35865 chunk = d->nelt / GET_MODE_NUNITS (vmode);
35866 for (i = 0; i < d->nelt; i += chunk)
35867 if (d->perm[i] & (chunk - 1))
35870 for (j = 1; j < chunk; ++j)
35871 if (d->perm[i] + j != d->perm[i + j])
35877 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35878 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35881 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
35883 unsigned i, nelt, eltsz, mask;
35884 unsigned char perm[32];
35885 enum machine_mode vmode = V16QImode;
35886 rtx rperm[32], vperm, target, op0, op1;
35890 if (d->op0 != d->op1)
35892 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
35895 && valid_perm_using_mode_p (V2TImode, d))
35900 /* Use vperm2i128 insn. The pattern uses
35901 V4DImode instead of V2TImode. */
35902 target = gen_lowpart (V4DImode, d->target);
35903 op0 = gen_lowpart (V4DImode, d->op0);
35904 op1 = gen_lowpart (V4DImode, d->op1);
35906 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
35907 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
35908 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
35916 if (GET_MODE_SIZE (d->vmode) == 16)
35921 else if (GET_MODE_SIZE (d->vmode) == 32)
35926 /* V4DImode should be already handled through
35927 expand_vselect by vpermq instruction. */
35928 gcc_assert (d->vmode != V4DImode);
35931 if (d->vmode == V8SImode
35932 || d->vmode == V16HImode
35933 || d->vmode == V32QImode)
35935 /* First see if vpermq can be used for
35936 V8SImode/V16HImode/V32QImode. */
35937 if (valid_perm_using_mode_p (V4DImode, d))
35939 for (i = 0; i < 4; i++)
35940 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
35943 return expand_vselect (gen_lowpart (V4DImode, d->target),
35944 gen_lowpart (V4DImode, d->op0),
35948 /* Next see if vpermd can be used. */
35949 if (valid_perm_using_mode_p (V8SImode, d))
35953 if (vmode == V32QImode)
35955 /* vpshufb only works intra lanes, it is not
35956 possible to shuffle bytes in between the lanes. */
35957 for (i = 0; i < nelt; ++i)
35958 if ((d->perm[i] ^ i) & (nelt / 2))
35969 if (vmode == V8SImode)
35970 for (i = 0; i < 8; ++i)
35971 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
35974 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35975 if (d->op0 != d->op1)
35976 mask = 2 * nelt - 1;
35977 else if (vmode == V16QImode)
35980 mask = nelt / 2 - 1;
35982 for (i = 0; i < nelt; ++i)
35984 unsigned j, e = d->perm[i] & mask;
35985 for (j = 0; j < eltsz; ++j)
35986 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
35990 vperm = gen_rtx_CONST_VECTOR (vmode,
35991 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
35992 vperm = force_reg (vmode, vperm);
35994 target = gen_lowpart (vmode, d->target);
35995 op0 = gen_lowpart (vmode, d->op0);
35996 if (d->op0 == d->op1)
35998 if (vmode == V16QImode)
35999 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
36000 else if (vmode == V32QImode)
36001 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
36003 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
36007 op1 = gen_lowpart (vmode, d->op1);
36008 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
36014 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36015 in a single instruction. */
36018 expand_vec_perm_1 (struct expand_vec_perm_d *d)
36020 unsigned i, nelt = d->nelt;
36021 unsigned char perm2[MAX_VECT_LEN];
36023 /* Check plain VEC_SELECT first, because AVX has instructions that could
36024 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36025 input where SEL+CONCAT may not. */
36026 if (d->op0 == d->op1)
36028 int mask = nelt - 1;
36029 bool identity_perm = true;
36030 bool broadcast_perm = true;
36032 for (i = 0; i < nelt; i++)
36034 perm2[i] = d->perm[i] & mask;
36036 identity_perm = false;
36038 broadcast_perm = false;
36044 emit_move_insn (d->target, d->op0);
36047 else if (broadcast_perm && TARGET_AVX2)
36049 /* Use vpbroadcast{b,w,d}. */
36050 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
36054 op = gen_lowpart (V16QImode, op);
36055 gen = gen_avx2_pbroadcastv32qi;
36058 op = gen_lowpart (V8HImode, op);
36059 gen = gen_avx2_pbroadcastv16hi;
36062 op = gen_lowpart (V4SImode, op);
36063 gen = gen_avx2_pbroadcastv8si;
36066 gen = gen_avx2_pbroadcastv16qi;
36069 gen = gen_avx2_pbroadcastv8hi;
36071 /* For other modes prefer other shuffles this function creates. */
36077 emit_insn (gen (d->target, op));
36082 if (expand_vselect (d->target, d->op0, perm2, nelt))
36085 /* There are plenty of patterns in sse.md that are written for
36086 SEL+CONCAT and are not replicated for a single op. Perhaps
36087 that should be changed, to avoid the nastiness here. */
36089 /* Recognize interleave style patterns, which means incrementing
36090 every other permutation operand. */
36091 for (i = 0; i < nelt; i += 2)
36093 perm2[i] = d->perm[i] & mask;
36094 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
36096 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36099 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36102 for (i = 0; i < nelt; i += 4)
36104 perm2[i + 0] = d->perm[i + 0] & mask;
36105 perm2[i + 1] = d->perm[i + 1] & mask;
36106 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
36107 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
36110 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36115 /* Finally, try the fully general two operand permute. */
36116 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
36119 /* Recognize interleave style patterns with reversed operands. */
36120 if (d->op0 != d->op1)
36122 for (i = 0; i < nelt; ++i)
36124 unsigned e = d->perm[i];
36132 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
36136 /* Try the SSE4.1 blend variable merge instructions. */
36137 if (expand_vec_perm_blend (d))
36140 /* Try one of the AVX vpermil variable permutations. */
36141 if (expand_vec_perm_vpermil (d))
36144 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36145 vpshufb, vpermd or vpermq variable permutation. */
36146 if (expand_vec_perm_pshufb (d))
36152 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36153 in terms of a pair of pshuflw + pshufhw instructions. */
36156 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
36158 unsigned char perm2[MAX_VECT_LEN];
36162 if (d->vmode != V8HImode || d->op0 != d->op1)
36165 /* The two permutations only operate in 64-bit lanes. */
36166 for (i = 0; i < 4; ++i)
36167 if (d->perm[i] >= 4)
36169 for (i = 4; i < 8; ++i)
36170 if (d->perm[i] < 4)
36176 /* Emit the pshuflw. */
36177 memcpy (perm2, d->perm, 4);
36178 for (i = 4; i < 8; ++i)
36180 ok = expand_vselect (d->target, d->op0, perm2, 8);
36183 /* Emit the pshufhw. */
36184 memcpy (perm2 + 4, d->perm + 4, 4);
36185 for (i = 0; i < 4; ++i)
36187 ok = expand_vselect (d->target, d->target, perm2, 8);
36193 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36194 the permutation using the SSSE3 palignr instruction. This succeeds
36195 when all of the elements in PERM fit within one vector and we merely
36196 need to shift them down so that a single vector permutation has a
36197 chance to succeed. */
36200 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
36202 unsigned i, nelt = d->nelt;
36207 /* Even with AVX, palignr only operates on 128-bit vectors. */
36208 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36211 min = nelt, max = 0;
36212 for (i = 0; i < nelt; ++i)
36214 unsigned e = d->perm[i];
36220 if (min == 0 || max - min >= nelt)
36223 /* Given that we have SSSE3, we know we'll be able to implement the
36224 single operand permutation after the palignr with pshufb. */
36228 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36229 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36230 gen_lowpart (TImode, d->op1),
36231 gen_lowpart (TImode, d->op0), shift));
36233 d->op0 = d->op1 = d->target;
36236 for (i = 0; i < nelt; ++i)
36238 unsigned e = d->perm[i] - min;
36244 /* Test for the degenerate case where the alignment by itself
36245 produces the desired permutation. */
36249 ok = expand_vec_perm_1 (d);
36255 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
36257 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36258 a two vector permutation into a single vector permutation by using
36259 an interleave operation to merge the vectors. */
36262 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36264 struct expand_vec_perm_d dremap, dfinal;
36265 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36266 unsigned HOST_WIDE_INT contents;
36267 unsigned char remap[2 * MAX_VECT_LEN];
36269 bool ok, same_halves = false;
36271 if (GET_MODE_SIZE (d->vmode) == 16)
36273 if (d->op0 == d->op1)
36276 else if (GET_MODE_SIZE (d->vmode) == 32)
36280 /* For 32-byte modes allow even d->op0 == d->op1.
36281 The lack of cross-lane shuffling in some instructions
36282 might prevent a single insn shuffle. */
36284 dfinal.testing_p = true;
36285 /* If expand_vec_perm_interleave3 can expand this into
36286 a 3 insn sequence, give up and let it be expanded as
36287 3 insn sequence. While that is one insn longer,
36288 it doesn't need a memory operand and in the common
36289 case that both interleave low and high permutations
36290 with the same operands are adjacent needs 4 insns
36291 for both after CSE. */
36292 if (expand_vec_perm_interleave3 (&dfinal))
36298 /* Examine from whence the elements come. */
36300 for (i = 0; i < nelt; ++i)
36301 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36303 memset (remap, 0xff, sizeof (remap));
36306 if (GET_MODE_SIZE (d->vmode) == 16)
36308 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36310 /* Split the two input vectors into 4 halves. */
36311 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36316 /* If the elements from the low halves use interleave low, and similarly
36317 for interleave high. If the elements are from mis-matched halves, we
36318 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36319 if ((contents & (h1 | h3)) == contents)
36322 for (i = 0; i < nelt2; ++i)
36325 remap[i + nelt] = i * 2 + 1;
36326 dremap.perm[i * 2] = i;
36327 dremap.perm[i * 2 + 1] = i + nelt;
36329 if (!TARGET_SSE2 && d->vmode == V4SImode)
36330 dremap.vmode = V4SFmode;
36332 else if ((contents & (h2 | h4)) == contents)
36335 for (i = 0; i < nelt2; ++i)
36337 remap[i + nelt2] = i * 2;
36338 remap[i + nelt + nelt2] = i * 2 + 1;
36339 dremap.perm[i * 2] = i + nelt2;
36340 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36342 if (!TARGET_SSE2 && d->vmode == V4SImode)
36343 dremap.vmode = V4SFmode;
36345 else if ((contents & (h1 | h4)) == contents)
36348 for (i = 0; i < nelt2; ++i)
36351 remap[i + nelt + nelt2] = i + nelt2;
36352 dremap.perm[i] = i;
36353 dremap.perm[i + nelt2] = i + nelt + nelt2;
36358 dremap.vmode = V2DImode;
36360 dremap.perm[0] = 0;
36361 dremap.perm[1] = 3;
36364 else if ((contents & (h2 | h3)) == contents)
36367 for (i = 0; i < nelt2; ++i)
36369 remap[i + nelt2] = i;
36370 remap[i + nelt] = i + nelt2;
36371 dremap.perm[i] = i + nelt2;
36372 dremap.perm[i + nelt2] = i + nelt;
36377 dremap.vmode = V2DImode;
36379 dremap.perm[0] = 1;
36380 dremap.perm[1] = 2;
36388 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36389 unsigned HOST_WIDE_INT q[8];
36390 unsigned int nonzero_halves[4];
36392 /* Split the two input vectors into 8 quarters. */
36393 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36394 for (i = 1; i < 8; ++i)
36395 q[i] = q[0] << (nelt4 * i);
36396 for (i = 0; i < 4; ++i)
36397 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36399 nonzero_halves[nzcnt] = i;
36405 gcc_assert (d->op0 == d->op1);
36406 nonzero_halves[1] = nonzero_halves[0];
36407 same_halves = true;
36409 else if (d->op0 == d->op1)
36411 gcc_assert (nonzero_halves[0] == 0);
36412 gcc_assert (nonzero_halves[1] == 1);
36417 if (d->perm[0] / nelt2 == nonzero_halves[1])
36419 /* Attempt to increase the likelyhood that dfinal
36420 shuffle will be intra-lane. */
36421 char tmph = nonzero_halves[0];
36422 nonzero_halves[0] = nonzero_halves[1];
36423 nonzero_halves[1] = tmph;
36426 /* vperm2f128 or vperm2i128. */
36427 for (i = 0; i < nelt2; ++i)
36429 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36430 remap[i + nonzero_halves[0] * nelt2] = i;
36431 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36432 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36435 if (d->vmode != V8SFmode
36436 && d->vmode != V4DFmode
36437 && d->vmode != V8SImode)
36439 dremap.vmode = V8SImode;
36441 for (i = 0; i < 4; ++i)
36443 dremap.perm[i] = i + nonzero_halves[0] * 4;
36444 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36448 else if (d->op0 == d->op1)
36450 else if (TARGET_AVX2
36451 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36454 for (i = 0; i < nelt4; ++i)
36457 remap[i + nelt] = i * 2 + 1;
36458 remap[i + nelt2] = i * 2 + nelt2;
36459 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36460 dremap.perm[i * 2] = i;
36461 dremap.perm[i * 2 + 1] = i + nelt;
36462 dremap.perm[i * 2 + nelt2] = i + nelt2;
36463 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36466 else if (TARGET_AVX2
36467 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36470 for (i = 0; i < nelt4; ++i)
36472 remap[i + nelt4] = i * 2;
36473 remap[i + nelt + nelt4] = i * 2 + 1;
36474 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36475 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36476 dremap.perm[i * 2] = i + nelt4;
36477 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36478 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36479 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36486 /* Use the remapping array set up above to move the elements from their
36487 swizzled locations into their final destinations. */
36489 for (i = 0; i < nelt; ++i)
36491 unsigned e = remap[d->perm[i]];
36492 gcc_assert (e < nelt);
36493 /* If same_halves is true, both halves of the remapped vector are the
36494 same. Avoid cross-lane accesses if possible. */
36495 if (same_halves && i >= nelt2)
36497 gcc_assert (e < nelt2);
36498 dfinal.perm[i] = e + nelt2;
36501 dfinal.perm[i] = e;
36503 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36504 dfinal.op1 = dfinal.op0;
36505 dremap.target = dfinal.op0;
36507 /* Test if the final remap can be done with a single insn. For V4SFmode or
36508 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36510 ok = expand_vec_perm_1 (&dfinal);
36511 seq = get_insns ();
36520 if (dremap.vmode != dfinal.vmode)
36522 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36523 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36524 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36527 ok = expand_vec_perm_1 (&dremap);
36534 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36535 a single vector cross-lane permutation into vpermq followed
36536 by any of the single insn permutations. */
36539 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36541 struct expand_vec_perm_d dremap, dfinal;
36542 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36543 unsigned contents[2];
36547 && (d->vmode == V32QImode || d->vmode == V16HImode)
36548 && d->op0 == d->op1))
36553 for (i = 0; i < nelt2; ++i)
36555 contents[0] |= 1u << (d->perm[i] / nelt4);
36556 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36559 for (i = 0; i < 2; ++i)
36561 unsigned int cnt = 0;
36562 for (j = 0; j < 4; ++j)
36563 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36571 dremap.vmode = V4DImode;
36573 dremap.target = gen_reg_rtx (V4DImode);
36574 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36575 dremap.op1 = dremap.op0;
36576 for (i = 0; i < 2; ++i)
36578 unsigned int cnt = 0;
36579 for (j = 0; j < 4; ++j)
36580 if ((contents[i] & (1u << j)) != 0)
36581 dremap.perm[2 * i + cnt++] = j;
36582 for (; cnt < 2; ++cnt)
36583 dremap.perm[2 * i + cnt] = 0;
36587 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36588 dfinal.op1 = dfinal.op0;
36589 for (i = 0, j = 0; i < nelt; ++i)
36593 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36594 if ((d->perm[i] / nelt4) == dremap.perm[j])
36596 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36597 dfinal.perm[i] |= nelt4;
36599 gcc_unreachable ();
36602 ok = expand_vec_perm_1 (&dremap);
36605 ok = expand_vec_perm_1 (&dfinal);
36611 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36612 a two vector permutation using 2 intra-lane interleave insns
36613 and cross-lane shuffle for 32-byte vectors. */
36616 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36619 rtx (*gen) (rtx, rtx, rtx);
36621 if (d->op0 == d->op1)
36623 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36625 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36631 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36633 for (i = 0; i < nelt; i += 2)
36634 if (d->perm[i] != d->perm[0] + i / 2
36635 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36645 gen = gen_vec_interleave_highv32qi;
36647 gen = gen_vec_interleave_lowv32qi;
36651 gen = gen_vec_interleave_highv16hi;
36653 gen = gen_vec_interleave_lowv16hi;
36657 gen = gen_vec_interleave_highv8si;
36659 gen = gen_vec_interleave_lowv8si;
36663 gen = gen_vec_interleave_highv4di;
36665 gen = gen_vec_interleave_lowv4di;
36669 gen = gen_vec_interleave_highv8sf;
36671 gen = gen_vec_interleave_lowv8sf;
36675 gen = gen_vec_interleave_highv4df;
36677 gen = gen_vec_interleave_lowv4df;
36680 gcc_unreachable ();
36683 emit_insn (gen (d->target, d->op0, d->op1));
36687 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36688 permutation with two pshufb insns and an ior. We should have already
36689 failed all two instruction sequences. */
36692 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
36694 rtx rperm[2][16], vperm, l, h, op, m128;
36695 unsigned int i, nelt, eltsz;
36697 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36699 gcc_assert (d->op0 != d->op1);
36702 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36704 /* Generate two permutation masks. If the required element is within
36705 the given vector it is shuffled into the proper lane. If the required
36706 element is in the other vector, force a zero into the lane by setting
36707 bit 7 in the permutation mask. */
36708 m128 = GEN_INT (-128);
36709 for (i = 0; i < nelt; ++i)
36711 unsigned j, e = d->perm[i];
36712 unsigned which = (e >= nelt);
36716 for (j = 0; j < eltsz; ++j)
36718 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
36719 rperm[1-which][i*eltsz + j] = m128;
36723 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
36724 vperm = force_reg (V16QImode, vperm);
36726 l = gen_reg_rtx (V16QImode);
36727 op = gen_lowpart (V16QImode, d->op0);
36728 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
36730 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
36731 vperm = force_reg (V16QImode, vperm);
36733 h = gen_reg_rtx (V16QImode);
36734 op = gen_lowpart (V16QImode, d->op1);
36735 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
36737 op = gen_lowpart (V16QImode, d->target);
36738 emit_insn (gen_iorv16qi3 (op, l, h));
36743 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36744 with two vpshufb insns, vpermq and vpor. We should have already failed
36745 all two or three instruction sequences. */
36748 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
36750 rtx rperm[2][32], vperm, l, h, hp, op, m128;
36751 unsigned int i, nelt, eltsz;
36754 || d->op0 != d->op1
36755 || (d->vmode != V32QImode && d->vmode != V16HImode))
36762 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36764 /* Generate two permutation masks. If the required element is within
36765 the same lane, it is shuffled in. If the required element from the
36766 other lane, force a zero by setting bit 7 in the permutation mask.
36767 In the other mask the mask has non-negative elements if element
36768 is requested from the other lane, but also moved to the other lane,
36769 so that the result of vpshufb can have the two V2TImode halves
36771 m128 = GEN_INT (-128);
36772 for (i = 0; i < nelt; ++i)
36774 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36775 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36777 for (j = 0; j < eltsz; ++j)
36779 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
36780 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
36784 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36785 vperm = force_reg (V32QImode, vperm);
36787 h = gen_reg_rtx (V32QImode);
36788 op = gen_lowpart (V32QImode, d->op0);
36789 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36791 /* Swap the 128-byte lanes of h into hp. */
36792 hp = gen_reg_rtx (V4DImode);
36793 op = gen_lowpart (V4DImode, h);
36794 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
36797 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36798 vperm = force_reg (V32QImode, vperm);
36800 l = gen_reg_rtx (V32QImode);
36801 op = gen_lowpart (V32QImode, d->op0);
36802 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36804 op = gen_lowpart (V32QImode, d->target);
36805 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
36810 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36811 and extract-odd permutations of two V32QImode and V16QImode operand
36812 with two vpshufb insns, vpor and vpermq. We should have already
36813 failed all two or three instruction sequences. */
36816 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
36818 rtx rperm[2][32], vperm, l, h, ior, op, m128;
36819 unsigned int i, nelt, eltsz;
36822 || d->op0 == d->op1
36823 || (d->vmode != V32QImode && d->vmode != V16HImode))
36826 for (i = 0; i < d->nelt; ++i)
36827 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
36834 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36836 /* Generate two permutation masks. In the first permutation mask
36837 the first quarter will contain indexes for the first half
36838 of the op0, the second quarter will contain bit 7 set, third quarter
36839 will contain indexes for the second half of the op0 and the
36840 last quarter bit 7 set. In the second permutation mask
36841 the first quarter will contain bit 7 set, the second quarter
36842 indexes for the first half of the op1, the third quarter bit 7 set
36843 and last quarter indexes for the second half of the op1.
36844 I.e. the first mask e.g. for V32QImode extract even will be:
36845 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36846 (all values masked with 0xf except for -128) and second mask
36847 for extract even will be
36848 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36849 m128 = GEN_INT (-128);
36850 for (i = 0; i < nelt; ++i)
36852 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36853 unsigned which = d->perm[i] >= nelt;
36854 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
36856 for (j = 0; j < eltsz; ++j)
36858 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
36859 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
36863 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36864 vperm = force_reg (V32QImode, vperm);
36866 l = gen_reg_rtx (V32QImode);
36867 op = gen_lowpart (V32QImode, d->op0);
36868 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36870 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36871 vperm = force_reg (V32QImode, vperm);
36873 h = gen_reg_rtx (V32QImode);
36874 op = gen_lowpart (V32QImode, d->op1);
36875 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36877 ior = gen_reg_rtx (V32QImode);
36878 emit_insn (gen_iorv32qi3 (ior, l, h));
36880 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36881 op = gen_lowpart (V4DImode, d->target);
36882 ior = gen_lowpart (V4DImode, ior);
36883 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
36884 const1_rtx, GEN_INT (3)));
36889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36890 and extract-odd permutations. */
36893 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
36900 t1 = gen_reg_rtx (V4DFmode);
36901 t2 = gen_reg_rtx (V4DFmode);
36903 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36904 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
36905 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
36907 /* Now an unpck[lh]pd will produce the result required. */
36909 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
36911 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
36917 int mask = odd ? 0xdd : 0x88;
36919 t1 = gen_reg_rtx (V8SFmode);
36920 t2 = gen_reg_rtx (V8SFmode);
36921 t3 = gen_reg_rtx (V8SFmode);
36923 /* Shuffle within the 128-bit lanes to produce:
36924 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36925 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
36928 /* Shuffle the lanes around to produce:
36929 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36930 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
36933 /* Shuffle within the 128-bit lanes to produce:
36934 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36935 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
36937 /* Shuffle within the 128-bit lanes to produce:
36938 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36939 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
36941 /* Shuffle the lanes around to produce:
36942 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36943 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
36952 /* These are always directly implementable by expand_vec_perm_1. */
36953 gcc_unreachable ();
36957 return expand_vec_perm_pshufb2 (d);
36960 /* We need 2*log2(N)-1 operations to achieve odd/even
36961 with interleave. */
36962 t1 = gen_reg_rtx (V8HImode);
36963 t2 = gen_reg_rtx (V8HImode);
36964 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
36965 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
36966 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
36967 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
36969 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
36971 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
36978 return expand_vec_perm_pshufb2 (d);
36981 t1 = gen_reg_rtx (V16QImode);
36982 t2 = gen_reg_rtx (V16QImode);
36983 t3 = gen_reg_rtx (V16QImode);
36984 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
36985 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
36986 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
36987 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
36988 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
36989 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
36991 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
36993 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
37000 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
37005 struct expand_vec_perm_d d_copy = *d;
37006 d_copy.vmode = V4DFmode;
37007 d_copy.target = gen_lowpart (V4DFmode, d->target);
37008 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
37009 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
37010 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37013 t1 = gen_reg_rtx (V4DImode);
37014 t2 = gen_reg_rtx (V4DImode);
37016 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37017 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
37018 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
37020 /* Now an vpunpck[lh]qdq will produce the result required. */
37022 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
37024 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
37031 struct expand_vec_perm_d d_copy = *d;
37032 d_copy.vmode = V8SFmode;
37033 d_copy.target = gen_lowpart (V8SFmode, d->target);
37034 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
37035 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
37036 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37039 t1 = gen_reg_rtx (V8SImode);
37040 t2 = gen_reg_rtx (V8SImode);
37042 /* Shuffle the lanes around into
37043 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37044 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
37045 gen_lowpart (V4DImode, d->op0),
37046 gen_lowpart (V4DImode, d->op1),
37048 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
37049 gen_lowpart (V4DImode, d->op0),
37050 gen_lowpart (V4DImode, d->op1),
37053 /* Swap the 2nd and 3rd position in each lane into
37054 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37055 emit_insn (gen_avx2_pshufdv3 (t1, t1,
37056 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37057 emit_insn (gen_avx2_pshufdv3 (t2, t2,
37058 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37060 /* Now an vpunpck[lh]qdq will produce
37061 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37063 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
37064 gen_lowpart (V4DImode, t1),
37065 gen_lowpart (V4DImode, t2));
37067 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
37068 gen_lowpart (V4DImode, t1),
37069 gen_lowpart (V4DImode, t2));
37074 gcc_unreachable ();
37080 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37081 extract-even and extract-odd permutations. */
37084 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
37086 unsigned i, odd, nelt = d->nelt;
37089 if (odd != 0 && odd != 1)
37092 for (i = 1; i < nelt; ++i)
37093 if (d->perm[i] != 2 * i + odd)
37096 return expand_vec_perm_even_odd_1 (d, odd);
37099 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37100 permutations. We assume that expand_vec_perm_1 has already failed. */
37103 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
37105 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
37106 enum machine_mode vmode = d->vmode;
37107 unsigned char perm2[4];
37115 /* These are special-cased in sse.md so that we can optionally
37116 use the vbroadcast instruction. They expand to two insns
37117 if the input happens to be in a register. */
37118 gcc_unreachable ();
37124 /* These are always implementable using standard shuffle patterns. */
37125 gcc_unreachable ();
37129 /* These can be implemented via interleave. We save one insn by
37130 stopping once we have promoted to V4SImode and then use pshufd. */
37134 rtx (*gen) (rtx, rtx, rtx)
37135 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
37136 : gen_vec_interleave_lowv8hi;
37140 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
37141 : gen_vec_interleave_highv8hi;
37146 dest = gen_reg_rtx (vmode);
37147 emit_insn (gen (dest, op0, op0));
37148 vmode = get_mode_wider_vector (vmode);
37149 op0 = gen_lowpart (vmode, dest);
37151 while (vmode != V4SImode);
37153 memset (perm2, elt, 4);
37154 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
37162 /* For AVX2 broadcasts of the first element vpbroadcast* or
37163 vpermq should be used by expand_vec_perm_1. */
37164 gcc_assert (!TARGET_AVX2 || d->perm[0]);
37168 gcc_unreachable ();
37172 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37173 broadcast permutations. */
37176 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
37178 unsigned i, elt, nelt = d->nelt;
37180 if (d->op0 != d->op1)
37184 for (i = 1; i < nelt; ++i)
37185 if (d->perm[i] != elt)
37188 return expand_vec_perm_broadcast_1 (d);
37191 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37192 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37193 all the shorter instruction sequences. */
37196 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
37198 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
37199 unsigned int i, nelt, eltsz;
37203 || d->op0 == d->op1
37204 || (d->vmode != V32QImode && d->vmode != V16HImode))
37211 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37213 /* Generate 4 permutation masks. If the required element is within
37214 the same lane, it is shuffled in. If the required element from the
37215 other lane, force a zero by setting bit 7 in the permutation mask.
37216 In the other mask the mask has non-negative elements if element
37217 is requested from the other lane, but also moved to the other lane,
37218 so that the result of vpshufb can have the two V2TImode halves
37220 m128 = GEN_INT (-128);
37221 for (i = 0; i < 32; ++i)
37223 rperm[0][i] = m128;
37224 rperm[1][i] = m128;
37225 rperm[2][i] = m128;
37226 rperm[3][i] = m128;
37232 for (i = 0; i < nelt; ++i)
37234 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37235 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37236 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37238 for (j = 0; j < eltsz; ++j)
37239 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37240 used[which] = true;
37243 for (i = 0; i < 2; ++i)
37245 if (!used[2 * i + 1])
37250 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37251 gen_rtvec_v (32, rperm[2 * i + 1]));
37252 vperm = force_reg (V32QImode, vperm);
37253 h[i] = gen_reg_rtx (V32QImode);
37254 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37255 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37258 /* Swap the 128-byte lanes of h[X]. */
37259 for (i = 0; i < 2; ++i)
37261 if (h[i] == NULL_RTX)
37263 op = gen_reg_rtx (V4DImode);
37264 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37265 const2_rtx, GEN_INT (3), const0_rtx,
37267 h[i] = gen_lowpart (V32QImode, op);
37270 for (i = 0; i < 2; ++i)
37277 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37278 vperm = force_reg (V32QImode, vperm);
37279 l[i] = gen_reg_rtx (V32QImode);
37280 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37281 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37284 for (i = 0; i < 2; ++i)
37288 op = gen_reg_rtx (V32QImode);
37289 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37296 gcc_assert (l[0] && l[1]);
37297 op = gen_lowpart (V32QImode, d->target);
37298 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37302 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37303 With all of the interface bits taken care of, perform the expansion
37304 in D and return true on success. */
37307 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37309 /* Try a single instruction expansion. */
37310 if (expand_vec_perm_1 (d))
37313 /* Try sequences of two instructions. */
37315 if (expand_vec_perm_pshuflw_pshufhw (d))
37318 if (expand_vec_perm_palignr (d))
37321 if (expand_vec_perm_interleave2 (d))
37324 if (expand_vec_perm_broadcast (d))
37327 if (expand_vec_perm_vpermq_perm_1 (d))
37330 /* Try sequences of three instructions. */
37332 if (expand_vec_perm_pshufb2 (d))
37335 if (expand_vec_perm_interleave3 (d))
37338 /* Try sequences of four instructions. */
37340 if (expand_vec_perm_vpshufb2_vpermq (d))
37343 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37346 /* ??? Look for narrow permutations whose element orderings would
37347 allow the promotion to a wider mode. */
37349 /* ??? Look for sequences of interleave or a wider permute that place
37350 the data into the correct lanes for a half-vector shuffle like
37351 pshuf[lh]w or vpermilps. */
37353 /* ??? Look for sequences of interleave that produce the desired results.
37354 The combinatorics of punpck[lh] get pretty ugly... */
37356 if (expand_vec_perm_even_odd (d))
37359 /* Even longer sequences. */
37360 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37367 ix86_expand_vec_perm_const (rtx operands[4])
37369 struct expand_vec_perm_d d;
37370 unsigned char perm[MAX_VECT_LEN];
37371 int i, nelt, which;
37374 d.target = operands[0];
37375 d.op0 = operands[1];
37376 d.op1 = operands[2];
37379 d.vmode = GET_MODE (d.target);
37380 gcc_assert (VECTOR_MODE_P (d.vmode));
37381 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37382 d.testing_p = false;
37384 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37385 gcc_assert (XVECLEN (sel, 0) == nelt);
37386 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37388 for (i = which = 0; i < nelt; ++i)
37390 rtx e = XVECEXP (sel, 0, i);
37391 int ei = INTVAL (e) & (2 * nelt - 1);
37393 which |= (ei < nelt ? 1 : 2);
37404 if (!rtx_equal_p (d.op0, d.op1))
37407 /* The elements of PERM do not suggest that only the first operand
37408 is used, but both operands are identical. Allow easier matching
37409 of the permutation by folding the permutation into the single
37411 for (i = 0; i < nelt; ++i)
37412 if (d.perm[i] >= nelt)
37421 for (i = 0; i < nelt; ++i)
37427 if (ix86_expand_vec_perm_const_1 (&d))
37430 /* If the mask says both arguments are needed, but they are the same,
37431 the above tried to expand with d.op0 == d.op1. If that didn't work,
37432 retry with d.op0 != d.op1 as that is what testing has been done with. */
37433 if (which == 3 && d.op0 == d.op1)
37438 memcpy (d.perm, perm, sizeof (perm));
37439 d.op1 = gen_reg_rtx (d.vmode);
37441 ok = ix86_expand_vec_perm_const_1 (&d);
37442 seq = get_insns ();
37446 emit_move_insn (d.op1, d.op0);
37455 /* Implement targetm.vectorize.vec_perm_const_ok. */
37458 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37459 const unsigned char *sel)
37461 struct expand_vec_perm_d d;
37462 unsigned int i, nelt, which;
37466 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37467 d.testing_p = true;
37469 /* Given sufficient ISA support we can just return true here
37470 for selected vector modes. */
37471 if (GET_MODE_SIZE (d.vmode) == 16)
37473 /* All implementable with a single vpperm insn. */
37476 /* All implementable with 2 pshufb + 1 ior. */
37479 /* All implementable with shufpd or unpck[lh]pd. */
37484 /* Extract the values from the vector CST into the permutation
37486 memcpy (d.perm, sel, nelt);
37487 for (i = which = 0; i < nelt; ++i)
37489 unsigned char e = d.perm[i];
37490 gcc_assert (e < 2 * nelt);
37491 which |= (e < nelt ? 1 : 2);
37494 /* For all elements from second vector, fold the elements to first. */
37496 for (i = 0; i < nelt; ++i)
37499 /* Check whether the mask can be applied to the vector type. */
37500 one_vec = (which != 3);
37502 /* Implementable with shufps or pshufd. */
37503 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37506 /* Otherwise we have to go through the motions and see if we can
37507 figure out how to generate the requested permutation. */
37508 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37509 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37511 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37514 ret = ix86_expand_vec_perm_const_1 (&d);
37521 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37523 struct expand_vec_perm_d d;
37529 d.vmode = GET_MODE (targ);
37530 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37531 d.testing_p = false;
37533 for (i = 0; i < nelt; ++i)
37534 d.perm[i] = i * 2 + odd;
37536 /* We'll either be able to implement the permutation directly... */
37537 if (expand_vec_perm_1 (&d))
37540 /* ... or we use the special-case patterns. */
37541 expand_vec_perm_even_odd_1 (&d, odd);
37544 /* Expand an insert into a vector register through pinsr insn.
37545 Return true if successful. */
37548 ix86_expand_pinsr (rtx *operands)
37550 rtx dst = operands[0];
37551 rtx src = operands[3];
37553 unsigned int size = INTVAL (operands[1]);
37554 unsigned int pos = INTVAL (operands[2]);
37556 if (GET_CODE (dst) == SUBREG)
37558 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37559 dst = SUBREG_REG (dst);
37562 if (GET_CODE (src) == SUBREG)
37563 src = SUBREG_REG (src);
37565 switch (GET_MODE (dst))
37572 enum machine_mode srcmode, dstmode;
37573 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37575 srcmode = mode_for_size (size, MODE_INT, 0);
37580 if (!TARGET_SSE4_1)
37582 dstmode = V16QImode;
37583 pinsr = gen_sse4_1_pinsrb;
37589 dstmode = V8HImode;
37590 pinsr = gen_sse2_pinsrw;
37594 if (!TARGET_SSE4_1)
37596 dstmode = V4SImode;
37597 pinsr = gen_sse4_1_pinsrd;
37601 gcc_assert (TARGET_64BIT);
37602 if (!TARGET_SSE4_1)
37604 dstmode = V2DImode;
37605 pinsr = gen_sse4_1_pinsrq;
37612 dst = gen_lowpart (dstmode, dst);
37613 src = gen_lowpart (srcmode, src);
37617 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37626 /* This function returns the calling abi specific va_list type node.
37627 It returns the FNDECL specific va_list type. */
37630 ix86_fn_abi_va_list (tree fndecl)
37633 return va_list_type_node;
37634 gcc_assert (fndecl != NULL_TREE);
37636 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37637 return ms_va_list_type_node;
37639 return sysv_va_list_type_node;
37642 /* Returns the canonical va_list type specified by TYPE. If there
37643 is no valid TYPE provided, it return NULL_TREE. */
37646 ix86_canonical_va_list_type (tree type)
37650 /* Resolve references and pointers to va_list type. */
37651 if (TREE_CODE (type) == MEM_REF)
37652 type = TREE_TYPE (type);
37653 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37654 type = TREE_TYPE (type);
37655 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37656 type = TREE_TYPE (type);
37658 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37660 wtype = va_list_type_node;
37661 gcc_assert (wtype != NULL_TREE);
37663 if (TREE_CODE (wtype) == ARRAY_TYPE)
37665 /* If va_list is an array type, the argument may have decayed
37666 to a pointer type, e.g. by being passed to another function.
37667 In that case, unwrap both types so that we can compare the
37668 underlying records. */
37669 if (TREE_CODE (htype) == ARRAY_TYPE
37670 || POINTER_TYPE_P (htype))
37672 wtype = TREE_TYPE (wtype);
37673 htype = TREE_TYPE (htype);
37676 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37677 return va_list_type_node;
37678 wtype = sysv_va_list_type_node;
37679 gcc_assert (wtype != NULL_TREE);
37681 if (TREE_CODE (wtype) == ARRAY_TYPE)
37683 /* If va_list is an array type, the argument may have decayed
37684 to a pointer type, e.g. by being passed to another function.
37685 In that case, unwrap both types so that we can compare the
37686 underlying records. */
37687 if (TREE_CODE (htype) == ARRAY_TYPE
37688 || POINTER_TYPE_P (htype))
37690 wtype = TREE_TYPE (wtype);
37691 htype = TREE_TYPE (htype);
37694 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37695 return sysv_va_list_type_node;
37696 wtype = ms_va_list_type_node;
37697 gcc_assert (wtype != NULL_TREE);
37699 if (TREE_CODE (wtype) == ARRAY_TYPE)
37701 /* If va_list is an array type, the argument may have decayed
37702 to a pointer type, e.g. by being passed to another function.
37703 In that case, unwrap both types so that we can compare the
37704 underlying records. */
37705 if (TREE_CODE (htype) == ARRAY_TYPE
37706 || POINTER_TYPE_P (htype))
37708 wtype = TREE_TYPE (wtype);
37709 htype = TREE_TYPE (htype);
37712 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37713 return ms_va_list_type_node;
37716 return std_canonical_va_list_type (type);
37719 /* Iterate through the target-specific builtin types for va_list.
37720 IDX denotes the iterator, *PTREE is set to the result type of
37721 the va_list builtin, and *PNAME to its internal type.
37722 Returns zero if there is no element for this index, otherwise
37723 IDX should be increased upon the next call.
37724 Note, do not iterate a base builtin's name like __builtin_va_list.
37725 Used from c_common_nodes_and_builtins. */
37728 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
37738 *ptree = ms_va_list_type_node;
37739 *pname = "__builtin_ms_va_list";
37743 *ptree = sysv_va_list_type_node;
37744 *pname = "__builtin_sysv_va_list";
37752 #undef TARGET_SCHED_DISPATCH
37753 #define TARGET_SCHED_DISPATCH has_dispatch
37754 #undef TARGET_SCHED_DISPATCH_DO
37755 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37756 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37757 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37759 /* The size of the dispatch window is the total number of bytes of
37760 object code allowed in a window. */
37761 #define DISPATCH_WINDOW_SIZE 16
37763 /* Number of dispatch windows considered for scheduling. */
37764 #define MAX_DISPATCH_WINDOWS 3
37766 /* Maximum number of instructions in a window. */
37769 /* Maximum number of immediate operands in a window. */
37772 /* Maximum number of immediate bits allowed in a window. */
37773 #define MAX_IMM_SIZE 128
37775 /* Maximum number of 32 bit immediates allowed in a window. */
37776 #define MAX_IMM_32 4
37778 /* Maximum number of 64 bit immediates allowed in a window. */
37779 #define MAX_IMM_64 2
37781 /* Maximum total of loads or prefetches allowed in a window. */
37784 /* Maximum total of stores allowed in a window. */
37785 #define MAX_STORE 1
37791 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37792 enum dispatch_group {
37807 /* Number of allowable groups in a dispatch window. It is an array
37808 indexed by dispatch_group enum. 100 is used as a big number,
37809 because the number of these kind of operations does not have any
37810 effect in dispatch window, but we need them for other reasons in
37812 static unsigned int num_allowable_groups[disp_last] = {
37813 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
37816 char group_name[disp_last + 1][16] = {
37817 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37818 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37819 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37822 /* Instruction path. */
37825 path_single, /* Single micro op. */
37826 path_double, /* Double micro op. */
37827 path_multi, /* Instructions with more than 2 micro op.. */
37831 /* sched_insn_info defines a window to the instructions scheduled in
37832 the basic block. It contains a pointer to the insn_info table and
37833 the instruction scheduled.
37835 Windows are allocated for each basic block and are linked
37837 typedef struct sched_insn_info_s {
37839 enum dispatch_group group;
37840 enum insn_path path;
37845 /* Linked list of dispatch windows. This is a two way list of
37846 dispatch windows of a basic block. It contains information about
37847 the number of uops in the window and the total number of
37848 instructions and of bytes in the object code for this dispatch
37850 typedef struct dispatch_windows_s {
37851 int num_insn; /* Number of insn in the window. */
37852 int num_uops; /* Number of uops in the window. */
37853 int window_size; /* Number of bytes in the window. */
37854 int window_num; /* Window number between 0 or 1. */
37855 int num_imm; /* Number of immediates in an insn. */
37856 int num_imm_32; /* Number of 32 bit immediates in an insn. */
37857 int num_imm_64; /* Number of 64 bit immediates in an insn. */
37858 int imm_size; /* Total immediates in the window. */
37859 int num_loads; /* Total memory loads in the window. */
37860 int num_stores; /* Total memory stores in the window. */
37861 int violation; /* Violation exists in window. */
37862 sched_insn_info *window; /* Pointer to the window. */
37863 struct dispatch_windows_s *next;
37864 struct dispatch_windows_s *prev;
37865 } dispatch_windows;
37867 /* Immediate valuse used in an insn. */
37868 typedef struct imm_info_s
37875 static dispatch_windows *dispatch_window_list;
37876 static dispatch_windows *dispatch_window_list1;
37878 /* Get dispatch group of insn. */
37880 static enum dispatch_group
37881 get_mem_group (rtx insn)
37883 enum attr_memory memory;
37885 if (INSN_CODE (insn) < 0)
37886 return disp_no_group;
37887 memory = get_attr_memory (insn);
37888 if (memory == MEMORY_STORE)
37891 if (memory == MEMORY_LOAD)
37894 if (memory == MEMORY_BOTH)
37895 return disp_load_store;
37897 return disp_no_group;
37900 /* Return true if insn is a compare instruction. */
37905 enum attr_type type;
37907 type = get_attr_type (insn);
37908 return (type == TYPE_TEST
37909 || type == TYPE_ICMP
37910 || type == TYPE_FCMP
37911 || GET_CODE (PATTERN (insn)) == COMPARE);
37914 /* Return true if a dispatch violation encountered. */
37917 dispatch_violation (void)
37919 if (dispatch_window_list->next)
37920 return dispatch_window_list->next->violation;
37921 return dispatch_window_list->violation;
37924 /* Return true if insn is a branch instruction. */
37927 is_branch (rtx insn)
37929 return (CALL_P (insn) || JUMP_P (insn));
37932 /* Return true if insn is a prefetch instruction. */
37935 is_prefetch (rtx insn)
37937 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
37940 /* This function initializes a dispatch window and the list container holding a
37941 pointer to the window. */
37944 init_window (int window_num)
37947 dispatch_windows *new_list;
37949 if (window_num == 0)
37950 new_list = dispatch_window_list;
37952 new_list = dispatch_window_list1;
37954 new_list->num_insn = 0;
37955 new_list->num_uops = 0;
37956 new_list->window_size = 0;
37957 new_list->next = NULL;
37958 new_list->prev = NULL;
37959 new_list->window_num = window_num;
37960 new_list->num_imm = 0;
37961 new_list->num_imm_32 = 0;
37962 new_list->num_imm_64 = 0;
37963 new_list->imm_size = 0;
37964 new_list->num_loads = 0;
37965 new_list->num_stores = 0;
37966 new_list->violation = false;
37968 for (i = 0; i < MAX_INSN; i++)
37970 new_list->window[i].insn = NULL;
37971 new_list->window[i].group = disp_no_group;
37972 new_list->window[i].path = no_path;
37973 new_list->window[i].byte_len = 0;
37974 new_list->window[i].imm_bytes = 0;
37979 /* This function allocates and initializes a dispatch window and the
37980 list container holding a pointer to the window. */
37982 static dispatch_windows *
37983 allocate_window (void)
37985 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
37986 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
37991 /* This routine initializes the dispatch scheduling information. It
37992 initiates building dispatch scheduler tables and constructs the
37993 first dispatch window. */
37996 init_dispatch_sched (void)
37998 /* Allocate a dispatch list and a window. */
37999 dispatch_window_list = allocate_window ();
38000 dispatch_window_list1 = allocate_window ();
38005 /* This function returns true if a branch is detected. End of a basic block
38006 does not have to be a branch, but here we assume only branches end a
38010 is_end_basic_block (enum dispatch_group group)
38012 return group == disp_branch;
38015 /* This function is called when the end of a window processing is reached. */
38018 process_end_window (void)
38020 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
38021 if (dispatch_window_list->next)
38023 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
38024 gcc_assert (dispatch_window_list->window_size
38025 + dispatch_window_list1->window_size <= 48);
38031 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38032 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38033 for 48 bytes of instructions. Note that these windows are not dispatch
38034 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38036 static dispatch_windows *
38037 allocate_next_window (int window_num)
38039 if (window_num == 0)
38041 if (dispatch_window_list->next)
38044 return dispatch_window_list;
38047 dispatch_window_list->next = dispatch_window_list1;
38048 dispatch_window_list1->prev = dispatch_window_list;
38050 return dispatch_window_list1;
38053 /* Increment the number of immediate operands of an instruction. */
38056 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
38061 switch ( GET_CODE (*in_rtx))
38066 (imm_values->imm)++;
38067 if (x86_64_immediate_operand (*in_rtx, SImode))
38068 (imm_values->imm32)++;
38070 (imm_values->imm64)++;
38074 (imm_values->imm)++;
38075 (imm_values->imm64)++;
38079 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
38081 (imm_values->imm)++;
38082 (imm_values->imm32)++;
38093 /* Compute number of immediate operands of an instruction. */
38096 find_constant (rtx in_rtx, imm_info *imm_values)
38098 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
38099 (rtx_function) find_constant_1, (void *) imm_values);
38102 /* Return total size of immediate operands of an instruction along with number
38103 of corresponding immediate-operands. It initializes its parameters to zero
38104 befor calling FIND_CONSTANT.
38105 INSN is the input instruction. IMM is the total of immediates.
38106 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38110 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
38112 imm_info imm_values = {0, 0, 0};
38114 find_constant (insn, &imm_values);
38115 *imm = imm_values.imm;
38116 *imm32 = imm_values.imm32;
38117 *imm64 = imm_values.imm64;
38118 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
38121 /* This function indicates if an operand of an instruction is an
38125 has_immediate (rtx insn)
38127 int num_imm_operand;
38128 int num_imm32_operand;
38129 int num_imm64_operand;
38132 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38133 &num_imm64_operand);
38137 /* Return single or double path for instructions. */
38139 static enum insn_path
38140 get_insn_path (rtx insn)
38142 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
38144 if ((int)path == 0)
38145 return path_single;
38147 if ((int)path == 1)
38148 return path_double;
38153 /* Return insn dispatch group. */
38155 static enum dispatch_group
38156 get_insn_group (rtx insn)
38158 enum dispatch_group group = get_mem_group (insn);
38162 if (is_branch (insn))
38163 return disp_branch;
38168 if (has_immediate (insn))
38171 if (is_prefetch (insn))
38172 return disp_prefetch;
38174 return disp_no_group;
38177 /* Count number of GROUP restricted instructions in a dispatch
38178 window WINDOW_LIST. */
38181 count_num_restricted (rtx insn, dispatch_windows *window_list)
38183 enum dispatch_group group = get_insn_group (insn);
38185 int num_imm_operand;
38186 int num_imm32_operand;
38187 int num_imm64_operand;
38189 if (group == disp_no_group)
38192 if (group == disp_imm)
38194 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38195 &num_imm64_operand);
38196 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
38197 || num_imm_operand + window_list->num_imm > MAX_IMM
38198 || (num_imm32_operand > 0
38199 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
38200 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
38201 || (num_imm64_operand > 0
38202 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
38203 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
38204 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
38205 && num_imm64_operand > 0
38206 && ((window_list->num_imm_64 > 0
38207 && window_list->num_insn >= 2)
38208 || window_list->num_insn >= 3)))
38214 if ((group == disp_load_store
38215 && (window_list->num_loads >= MAX_LOAD
38216 || window_list->num_stores >= MAX_STORE))
38217 || ((group == disp_load
38218 || group == disp_prefetch)
38219 && window_list->num_loads >= MAX_LOAD)
38220 || (group == disp_store
38221 && window_list->num_stores >= MAX_STORE))
38227 /* This function returns true if insn satisfies dispatch rules on the
38228 last window scheduled. */
38231 fits_dispatch_window (rtx insn)
38233 dispatch_windows *window_list = dispatch_window_list;
38234 dispatch_windows *window_list_next = dispatch_window_list->next;
38235 unsigned int num_restrict;
38236 enum dispatch_group group = get_insn_group (insn);
38237 enum insn_path path = get_insn_path (insn);
38240 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38241 instructions should be given the lowest priority in the
38242 scheduling process in Haifa scheduler to make sure they will be
38243 scheduled in the same dispatch window as the refrence to them. */
38244 if (group == disp_jcc || group == disp_cmp)
38247 /* Check nonrestricted. */
38248 if (group == disp_no_group || group == disp_branch)
38251 /* Get last dispatch window. */
38252 if (window_list_next)
38253 window_list = window_list_next;
38255 if (window_list->window_num == 1)
38257 sum = window_list->prev->window_size + window_list->window_size;
38260 || (min_insn_size (insn) + sum) >= 48)
38261 /* Window 1 is full. Go for next window. */
38265 num_restrict = count_num_restricted (insn, window_list);
38267 if (num_restrict > num_allowable_groups[group])
38270 /* See if it fits in the first window. */
38271 if (window_list->window_num == 0)
38273 /* The first widow should have only single and double path
38275 if (path == path_double
38276 && (window_list->num_uops + 2) > MAX_INSN)
38278 else if (path != path_single)
38284 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38285 dispatch window WINDOW_LIST. */
38288 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38290 int byte_len = min_insn_size (insn);
38291 int num_insn = window_list->num_insn;
38293 sched_insn_info *window = window_list->window;
38294 enum dispatch_group group = get_insn_group (insn);
38295 enum insn_path path = get_insn_path (insn);
38296 int num_imm_operand;
38297 int num_imm32_operand;
38298 int num_imm64_operand;
38300 if (!window_list->violation && group != disp_cmp
38301 && !fits_dispatch_window (insn))
38302 window_list->violation = true;
38304 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38305 &num_imm64_operand);
38307 /* Initialize window with new instruction. */
38308 window[num_insn].insn = insn;
38309 window[num_insn].byte_len = byte_len;
38310 window[num_insn].group = group;
38311 window[num_insn].path = path;
38312 window[num_insn].imm_bytes = imm_size;
38314 window_list->window_size += byte_len;
38315 window_list->num_insn = num_insn + 1;
38316 window_list->num_uops = window_list->num_uops + num_uops;
38317 window_list->imm_size += imm_size;
38318 window_list->num_imm += num_imm_operand;
38319 window_list->num_imm_32 += num_imm32_operand;
38320 window_list->num_imm_64 += num_imm64_operand;
38322 if (group == disp_store)
38323 window_list->num_stores += 1;
38324 else if (group == disp_load
38325 || group == disp_prefetch)
38326 window_list->num_loads += 1;
38327 else if (group == disp_load_store)
38329 window_list->num_stores += 1;
38330 window_list->num_loads += 1;
38334 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38335 If the total bytes of instructions or the number of instructions in
38336 the window exceed allowable, it allocates a new window. */
38339 add_to_dispatch_window (rtx insn)
38342 dispatch_windows *window_list;
38343 dispatch_windows *next_list;
38344 dispatch_windows *window0_list;
38345 enum insn_path path;
38346 enum dispatch_group insn_group;
38354 if (INSN_CODE (insn) < 0)
38357 byte_len = min_insn_size (insn);
38358 window_list = dispatch_window_list;
38359 next_list = window_list->next;
38360 path = get_insn_path (insn);
38361 insn_group = get_insn_group (insn);
38363 /* Get the last dispatch window. */
38365 window_list = dispatch_window_list->next;
38367 if (path == path_single)
38369 else if (path == path_double)
38372 insn_num_uops = (int) path;
38374 /* If current window is full, get a new window.
38375 Window number zero is full, if MAX_INSN uops are scheduled in it.
38376 Window number one is full, if window zero's bytes plus window
38377 one's bytes is 32, or if the bytes of the new instruction added
38378 to the total makes it greater than 48, or it has already MAX_INSN
38379 instructions in it. */
38380 num_insn = window_list->num_insn;
38381 num_uops = window_list->num_uops;
38382 window_num = window_list->window_num;
38383 insn_fits = fits_dispatch_window (insn);
38385 if (num_insn >= MAX_INSN
38386 || num_uops + insn_num_uops > MAX_INSN
38389 window_num = ~window_num & 1;
38390 window_list = allocate_next_window (window_num);
38393 if (window_num == 0)
38395 add_insn_window (insn, window_list, insn_num_uops);
38396 if (window_list->num_insn >= MAX_INSN
38397 && insn_group == disp_branch)
38399 process_end_window ();
38403 else if (window_num == 1)
38405 window0_list = window_list->prev;
38406 sum = window0_list->window_size + window_list->window_size;
38408 || (byte_len + sum) >= 48)
38410 process_end_window ();
38411 window_list = dispatch_window_list;
38414 add_insn_window (insn, window_list, insn_num_uops);
38417 gcc_unreachable ();
38419 if (is_end_basic_block (insn_group))
38421 /* End of basic block is reached do end-basic-block process. */
38422 process_end_window ();
38427 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38429 DEBUG_FUNCTION static void
38430 debug_dispatch_window_file (FILE *file, int window_num)
38432 dispatch_windows *list;
38435 if (window_num == 0)
38436 list = dispatch_window_list;
38438 list = dispatch_window_list1;
38440 fprintf (file, "Window #%d:\n", list->window_num);
38441 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38442 list->num_insn, list->num_uops, list->window_size);
38443 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38444 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38446 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38448 fprintf (file, " insn info:\n");
38450 for (i = 0; i < MAX_INSN; i++)
38452 if (!list->window[i].insn)
38454 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38455 i, group_name[list->window[i].group],
38456 i, (void *)list->window[i].insn,
38457 i, list->window[i].path,
38458 i, list->window[i].byte_len,
38459 i, list->window[i].imm_bytes);
38463 /* Print to stdout a dispatch window. */
38465 DEBUG_FUNCTION void
38466 debug_dispatch_window (int window_num)
38468 debug_dispatch_window_file (stdout, window_num);
38471 /* Print INSN dispatch information to FILE. */
38473 DEBUG_FUNCTION static void
38474 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38477 enum insn_path path;
38478 enum dispatch_group group;
38480 int num_imm_operand;
38481 int num_imm32_operand;
38482 int num_imm64_operand;
38484 if (INSN_CODE (insn) < 0)
38487 byte_len = min_insn_size (insn);
38488 path = get_insn_path (insn);
38489 group = get_insn_group (insn);
38490 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38491 &num_imm64_operand);
38493 fprintf (file, " insn info:\n");
38494 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38495 group_name[group], path, byte_len);
38496 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38497 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38500 /* Print to STDERR the status of the ready list with respect to
38501 dispatch windows. */
38503 DEBUG_FUNCTION void
38504 debug_ready_dispatch (void)
38507 int no_ready = number_in_ready ();
38509 fprintf (stdout, "Number of ready: %d\n", no_ready);
38511 for (i = 0; i < no_ready; i++)
38512 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38515 /* This routine is the driver of the dispatch scheduler. */
38518 do_dispatch (rtx insn, int mode)
38520 if (mode == DISPATCH_INIT)
38521 init_dispatch_sched ();
38522 else if (mode == ADD_TO_DISPATCH_WINDOW)
38523 add_to_dispatch_window (insn);
38526 /* Return TRUE if Dispatch Scheduling is supported. */
38529 has_dispatch (rtx insn, int action)
38531 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38532 && flag_dispatch_scheduler)
38538 case IS_DISPATCH_ON:
38543 return is_cmp (insn);
38545 case DISPATCH_VIOLATION:
38546 return dispatch_violation ();
38548 case FITS_DISPATCH_WINDOW:
38549 return fits_dispatch_window (insn);
38555 /* Implementation of reassociation_width target hook used by
38556 reassoc phase to identify parallelism level in reassociated
38557 tree. Statements tree_code is passed in OPC. Arguments type
38560 Currently parallel reassociation is enabled for Atom
38561 processors only and we set reassociation width to be 2
38562 because Atom may issue up to 2 instructions per cycle.
38564 Return value should be fixed if parallel reassociation is
38565 enabled for other processors. */
38568 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38569 enum machine_mode mode)
38573 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38575 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38581 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38582 place emms and femms instructions. */
38584 static enum machine_mode
38585 ix86_preferred_simd_mode (enum machine_mode mode)
38593 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38595 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38597 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38599 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38602 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38608 if (!TARGET_VECTORIZE_DOUBLE)
38610 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38612 else if (TARGET_SSE2)
38621 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38624 static unsigned int
38625 ix86_autovectorize_vector_sizes (void)
38627 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38630 /* Initialize the GCC target structure. */
38631 #undef TARGET_RETURN_IN_MEMORY
38632 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38634 #undef TARGET_LEGITIMIZE_ADDRESS
38635 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38637 #undef TARGET_ATTRIBUTE_TABLE
38638 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38639 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38640 # undef TARGET_MERGE_DECL_ATTRIBUTES
38641 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38644 #undef TARGET_COMP_TYPE_ATTRIBUTES
38645 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38647 #undef TARGET_INIT_BUILTINS
38648 #define TARGET_INIT_BUILTINS ix86_init_builtins
38649 #undef TARGET_BUILTIN_DECL
38650 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38651 #undef TARGET_EXPAND_BUILTIN
38652 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38654 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38655 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38656 ix86_builtin_vectorized_function
38658 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38659 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38661 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38662 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38664 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38665 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38667 #undef TARGET_BUILTIN_RECIPROCAL
38668 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38670 #undef TARGET_ASM_FUNCTION_EPILOGUE
38671 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38673 #undef TARGET_ENCODE_SECTION_INFO
38674 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38675 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38677 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38680 #undef TARGET_ASM_OPEN_PAREN
38681 #define TARGET_ASM_OPEN_PAREN ""
38682 #undef TARGET_ASM_CLOSE_PAREN
38683 #define TARGET_ASM_CLOSE_PAREN ""
38685 #undef TARGET_ASM_BYTE_OP
38686 #define TARGET_ASM_BYTE_OP ASM_BYTE
38688 #undef TARGET_ASM_ALIGNED_HI_OP
38689 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38690 #undef TARGET_ASM_ALIGNED_SI_OP
38691 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38693 #undef TARGET_ASM_ALIGNED_DI_OP
38694 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38697 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38698 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38700 #undef TARGET_ASM_UNALIGNED_HI_OP
38701 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38702 #undef TARGET_ASM_UNALIGNED_SI_OP
38703 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38704 #undef TARGET_ASM_UNALIGNED_DI_OP
38705 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38707 #undef TARGET_PRINT_OPERAND
38708 #define TARGET_PRINT_OPERAND ix86_print_operand
38709 #undef TARGET_PRINT_OPERAND_ADDRESS
38710 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38711 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38712 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38713 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38714 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38716 #undef TARGET_SCHED_INIT_GLOBAL
38717 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38718 #undef TARGET_SCHED_ADJUST_COST
38719 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38720 #undef TARGET_SCHED_ISSUE_RATE
38721 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38722 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38723 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38724 ia32_multipass_dfa_lookahead
38726 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38727 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38730 #undef TARGET_HAVE_TLS
38731 #define TARGET_HAVE_TLS true
38733 #undef TARGET_CANNOT_FORCE_CONST_MEM
38734 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38735 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38736 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38738 #undef TARGET_DELEGITIMIZE_ADDRESS
38739 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38741 #undef TARGET_MS_BITFIELD_LAYOUT_P
38742 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38745 #undef TARGET_BINDS_LOCAL_P
38746 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38748 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38749 #undef TARGET_BINDS_LOCAL_P
38750 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38753 #undef TARGET_ASM_OUTPUT_MI_THUNK
38754 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38755 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38756 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38758 #undef TARGET_ASM_FILE_START
38759 #define TARGET_ASM_FILE_START x86_file_start
38761 #undef TARGET_OPTION_OVERRIDE
38762 #define TARGET_OPTION_OVERRIDE ix86_option_override
38764 #undef TARGET_REGISTER_MOVE_COST
38765 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38766 #undef TARGET_MEMORY_MOVE_COST
38767 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38768 #undef TARGET_RTX_COSTS
38769 #define TARGET_RTX_COSTS ix86_rtx_costs
38770 #undef TARGET_ADDRESS_COST
38771 #define TARGET_ADDRESS_COST ix86_address_cost
38773 #undef TARGET_FIXED_CONDITION_CODE_REGS
38774 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38775 #undef TARGET_CC_MODES_COMPATIBLE
38776 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38778 #undef TARGET_MACHINE_DEPENDENT_REORG
38779 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38781 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38782 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38784 #undef TARGET_BUILD_BUILTIN_VA_LIST
38785 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38787 #undef TARGET_ENUM_VA_LIST_P
38788 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38790 #undef TARGET_FN_ABI_VA_LIST
38791 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38793 #undef TARGET_CANONICAL_VA_LIST_TYPE
38794 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38796 #undef TARGET_EXPAND_BUILTIN_VA_START
38797 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38799 #undef TARGET_MD_ASM_CLOBBERS
38800 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38802 #undef TARGET_PROMOTE_PROTOTYPES
38803 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38804 #undef TARGET_STRUCT_VALUE_RTX
38805 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38806 #undef TARGET_SETUP_INCOMING_VARARGS
38807 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38808 #undef TARGET_MUST_PASS_IN_STACK
38809 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38810 #undef TARGET_FUNCTION_ARG_ADVANCE
38811 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38812 #undef TARGET_FUNCTION_ARG
38813 #define TARGET_FUNCTION_ARG ix86_function_arg
38814 #undef TARGET_FUNCTION_ARG_BOUNDARY
38815 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38816 #undef TARGET_PASS_BY_REFERENCE
38817 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38818 #undef TARGET_INTERNAL_ARG_POINTER
38819 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38820 #undef TARGET_UPDATE_STACK_BOUNDARY
38821 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38822 #undef TARGET_GET_DRAP_RTX
38823 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38824 #undef TARGET_STRICT_ARGUMENT_NAMING
38825 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38826 #undef TARGET_STATIC_CHAIN
38827 #define TARGET_STATIC_CHAIN ix86_static_chain
38828 #undef TARGET_TRAMPOLINE_INIT
38829 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38830 #undef TARGET_RETURN_POPS_ARGS
38831 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38833 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38834 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38836 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38837 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38839 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38840 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38842 #undef TARGET_C_MODE_FOR_SUFFIX
38843 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38846 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38847 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38850 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38851 #undef TARGET_INSERT_ATTRIBUTES
38852 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38855 #undef TARGET_MANGLE_TYPE
38856 #define TARGET_MANGLE_TYPE ix86_mangle_type
38859 #undef TARGET_STACK_PROTECT_FAIL
38860 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38863 #undef TARGET_FUNCTION_VALUE
38864 #define TARGET_FUNCTION_VALUE ix86_function_value
38866 #undef TARGET_FUNCTION_VALUE_REGNO_P
38867 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38869 #undef TARGET_PROMOTE_FUNCTION_MODE
38870 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38872 #undef TARGET_SECONDARY_RELOAD
38873 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38875 #undef TARGET_CLASS_MAX_NREGS
38876 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38878 #undef TARGET_PREFERRED_RELOAD_CLASS
38879 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38880 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38881 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38882 #undef TARGET_CLASS_LIKELY_SPILLED_P
38883 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38885 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38886 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38887 ix86_builtin_vectorization_cost
38888 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38889 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38890 ix86_vectorize_vec_perm_const_ok
38891 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38892 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38893 ix86_preferred_simd_mode
38894 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38895 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38896 ix86_autovectorize_vector_sizes
38898 #undef TARGET_SET_CURRENT_FUNCTION
38899 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38901 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38902 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38904 #undef TARGET_OPTION_SAVE
38905 #define TARGET_OPTION_SAVE ix86_function_specific_save
38907 #undef TARGET_OPTION_RESTORE
38908 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38910 #undef TARGET_OPTION_PRINT
38911 #define TARGET_OPTION_PRINT ix86_function_specific_print
38913 #undef TARGET_CAN_INLINE_P
38914 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38916 #undef TARGET_EXPAND_TO_RTL_HOOK
38917 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38919 #undef TARGET_LEGITIMATE_ADDRESS_P
38920 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38922 #undef TARGET_LEGITIMATE_CONSTANT_P
38923 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38925 #undef TARGET_FRAME_POINTER_REQUIRED
38926 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38928 #undef TARGET_CAN_ELIMINATE
38929 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38931 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38932 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38934 #undef TARGET_ASM_CODE_END
38935 #define TARGET_ASM_CODE_END ix86_code_end
38937 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38938 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38941 #undef TARGET_INIT_LIBFUNCS
38942 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38945 struct gcc_target targetm = TARGET_INITIALIZER;
38947 #include "gt-i386.h"