1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
840 rtx base, index, disp;
842 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
845 static int ix86_decompose_address (rtx, struct ix86_address *);
846 static int ix86_address_cost (rtx);
847 static bool ix86_cannot_force_const_mem (rtx);
848 static rtx ix86_delegitimize_address (rtx);
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi (const struct builtin_description *,
853 static rtx ix86_expand_sse_compare (const struct builtin_description *,
855 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
857 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_store_builtin (enum insn_code, tree);
859 static rtx safe_vector_operand (rtx, enum machine_mode);
860 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
861 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
862 enum rtx_code *, enum rtx_code *);
863 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
867 static int ix86_fp_comparison_cost (enum rtx_code code);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame *);
871 static int ix86_comp_type_attributes (tree, tree);
872 static int ix86_function_regparm (tree, tree);
873 const struct attribute_spec ix86_attribute_table[];
874 static bool ix86_function_ok_for_sibcall (tree, tree);
875 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
876 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
877 static int ix86_value_regno (enum machine_mode);
878 static bool contains_128bit_aligned_vector_p (tree);
879 static bool ix86_ms_bitfield_layout_p (tree);
880 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
881 static int extended_reg_mentioned_1 (rtx *, void *);
882 static bool ix86_rtx_costs (rtx, int, int, int *);
883 static int min_insn_size (rtx);
884 static void k8_avoid_jump_misspredicts (void);
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx, int);
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES], int);
917 static int examine_argument (enum machine_mode, tree, int, int *, int *);
918 static rtx construct_container (enum machine_mode, tree, int, int, int,
920 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
921 enum x86_64_reg_class);
923 /* Table of constants used by fldpi, fldln2, etc.... */
924 static REAL_VALUE_TYPE ext_80387_constants_table [5];
925 static bool ext_80387_constants_init = 0;
926 static void init_ext_80387_constants (void);
928 /* Initialize the GCC target structure. */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 # undef TARGET_MERGE_DECL_ATTRIBUTES
933 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981 ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984 ia32_multipass_dfa_lookahead
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1026 struct gcc_target targetm = TARGET_INITIALIZER;
1028 /* The svr4 ABI for the i386 says that records and unions are returned
1030 #ifndef DEFAULT_PCC_STRUCT_RETURN
1031 #define DEFAULT_PCC_STRUCT_RETURN 1
1034 /* Sometimes certain combinations of command options do not make
1035 sense on a particular target machine. You can define a macro
1036 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1037 defined, is executed once just after all the command options have
1040 Don't use this macro to turn on various extra optimizations for
1041 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1044 override_options (void)
1047 /* Comes from final.c -- no real reason to change it. */
1048 #define MAX_CODE_ALIGN 16
1052 const struct processor_costs *cost; /* Processor costs */
1053 const int target_enable; /* Target flags to enable. */
1054 const int target_disable; /* Target flags to disable. */
1055 const int align_loop; /* Default alignments. */
1056 const int align_loop_max_skip;
1057 const int align_jump;
1058 const int align_jump_max_skip;
1059 const int align_func;
1061 const processor_target_table[PROCESSOR_max] =
1063 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1064 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1065 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1066 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1067 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1068 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1069 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1070 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1073 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1076 const char *const name; /* processor name or nickname. */
1077 const enum processor_type processor;
1078 const enum pta_flags
1084 PTA_PREFETCH_SSE = 16,
1090 const processor_alias_table[] =
1092 {"i386", PROCESSOR_I386, 0},
1093 {"i486", PROCESSOR_I486, 0},
1094 {"i586", PROCESSOR_PENTIUM, 0},
1095 {"pentium", PROCESSOR_PENTIUM, 0},
1096 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1097 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1098 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1099 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1100 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1101 {"i686", PROCESSOR_PENTIUMPRO, 0},
1102 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1103 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1104 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1105 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1106 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1107 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1108 | PTA_MMX | PTA_PREFETCH_SSE},
1109 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1110 | PTA_MMX | PTA_PREFETCH_SSE},
1111 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1112 | PTA_MMX | PTA_PREFETCH_SSE},
1113 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1114 | PTA_MMX | PTA_PREFETCH_SSE},
1115 {"k6", PROCESSOR_K6, PTA_MMX},
1116 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1117 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1120 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1121 | PTA_3DNOW | PTA_3DNOW_A},
1122 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1123 | PTA_3DNOW_A | PTA_SSE},
1124 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1125 | PTA_3DNOW_A | PTA_SSE},
1126 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127 | PTA_3DNOW_A | PTA_SSE},
1128 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1129 | PTA_SSE | PTA_SSE2 },
1130 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1131 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1132 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1133 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1134 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1135 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1137 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1140 int const pta_size = ARRAY_SIZE (processor_alias_table);
1142 /* Set the default values for switches whose default depends on TARGET_64BIT
1143 in case they weren't overwritten by command line options. */
1146 if (flag_omit_frame_pointer == 2)
1147 flag_omit_frame_pointer = 1;
1148 if (flag_asynchronous_unwind_tables == 2)
1149 flag_asynchronous_unwind_tables = 1;
1150 if (flag_pcc_struct_return == 2)
1151 flag_pcc_struct_return = 0;
1155 if (flag_omit_frame_pointer == 2)
1156 flag_omit_frame_pointer = 0;
1157 if (flag_asynchronous_unwind_tables == 2)
1158 flag_asynchronous_unwind_tables = 0;
1159 if (flag_pcc_struct_return == 2)
1160 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1163 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1164 SUBTARGET_OVERRIDE_OPTIONS;
1167 if (!ix86_tune_string && ix86_arch_string)
1168 ix86_tune_string = ix86_arch_string;
1169 if (!ix86_tune_string)
1170 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1171 if (!ix86_arch_string)
1172 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1174 if (ix86_cmodel_string != 0)
1176 if (!strcmp (ix86_cmodel_string, "small"))
1177 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1179 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1180 else if (!strcmp (ix86_cmodel_string, "32"))
1181 ix86_cmodel = CM_32;
1182 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1183 ix86_cmodel = CM_KERNEL;
1184 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1185 ix86_cmodel = CM_MEDIUM;
1186 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1187 ix86_cmodel = CM_LARGE;
1189 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1193 ix86_cmodel = CM_32;
1195 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1197 if (ix86_asm_string != 0)
1199 if (!strcmp (ix86_asm_string, "intel"))
1200 ix86_asm_dialect = ASM_INTEL;
1201 else if (!strcmp (ix86_asm_string, "att"))
1202 ix86_asm_dialect = ASM_ATT;
1204 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1206 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1207 error ("code model `%s' not supported in the %s bit mode",
1208 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1209 if (ix86_cmodel == CM_LARGE)
1210 sorry ("code model `large' not supported yet");
1211 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1212 sorry ("%i-bit mode not compiled in",
1213 (target_flags & MASK_64BIT) ? 64 : 32);
1215 for (i = 0; i < pta_size; i++)
1216 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1218 ix86_arch = processor_alias_table[i].processor;
1219 /* Default cpu tuning to the architecture. */
1220 ix86_tune = ix86_arch;
1221 if (processor_alias_table[i].flags & PTA_MMX
1222 && !(target_flags_explicit & MASK_MMX))
1223 target_flags |= MASK_MMX;
1224 if (processor_alias_table[i].flags & PTA_3DNOW
1225 && !(target_flags_explicit & MASK_3DNOW))
1226 target_flags |= MASK_3DNOW;
1227 if (processor_alias_table[i].flags & PTA_3DNOW_A
1228 && !(target_flags_explicit & MASK_3DNOW_A))
1229 target_flags |= MASK_3DNOW_A;
1230 if (processor_alias_table[i].flags & PTA_SSE
1231 && !(target_flags_explicit & MASK_SSE))
1232 target_flags |= MASK_SSE;
1233 if (processor_alias_table[i].flags & PTA_SSE2
1234 && !(target_flags_explicit & MASK_SSE2))
1235 target_flags |= MASK_SSE2;
1236 if (processor_alias_table[i].flags & PTA_SSE3
1237 && !(target_flags_explicit & MASK_SSE3))
1238 target_flags |= MASK_SSE3;
1239 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1240 x86_prefetch_sse = true;
1241 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1242 error ("CPU you selected does not support x86-64 instruction set");
1247 error ("bad value (%s) for -march= switch", ix86_arch_string);
1249 for (i = 0; i < pta_size; i++)
1250 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1252 ix86_tune = processor_alias_table[i].processor;
1253 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1254 error ("CPU you selected does not support x86-64 instruction set");
1257 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1258 x86_prefetch_sse = true;
1260 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1263 ix86_cost = &size_cost;
1265 ix86_cost = processor_target_table[ix86_tune].cost;
1266 target_flags |= processor_target_table[ix86_tune].target_enable;
1267 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1269 /* Arrange to set up i386_stack_locals for all functions. */
1270 init_machine_status = ix86_init_machine_status;
1272 /* Validate -mregparm= value. */
1273 if (ix86_regparm_string)
1275 i = atoi (ix86_regparm_string);
1276 if (i < 0 || i > REGPARM_MAX)
1277 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1283 ix86_regparm = REGPARM_MAX;
1285 /* If the user has provided any of the -malign-* options,
1286 warn and use that value only if -falign-* is not set.
1287 Remove this code in GCC 3.2 or later. */
1288 if (ix86_align_loops_string)
1290 warning ("-malign-loops is obsolete, use -falign-loops");
1291 if (align_loops == 0)
1293 i = atoi (ix86_align_loops_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 align_loops = 1 << i;
1301 if (ix86_align_jumps_string)
1303 warning ("-malign-jumps is obsolete, use -falign-jumps");
1304 if (align_jumps == 0)
1306 i = atoi (ix86_align_jumps_string);
1307 if (i < 0 || i > MAX_CODE_ALIGN)
1308 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1310 align_jumps = 1 << i;
1314 if (ix86_align_funcs_string)
1316 warning ("-malign-functions is obsolete, use -falign-functions");
1317 if (align_functions == 0)
1319 i = atoi (ix86_align_funcs_string);
1320 if (i < 0 || i > MAX_CODE_ALIGN)
1321 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1323 align_functions = 1 << i;
1327 /* Default align_* from the processor table. */
1328 if (align_loops == 0)
1330 align_loops = processor_target_table[ix86_tune].align_loop;
1331 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1333 if (align_jumps == 0)
1335 align_jumps = processor_target_table[ix86_tune].align_jump;
1336 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1338 if (align_functions == 0)
1340 align_functions = processor_target_table[ix86_tune].align_func;
1343 /* Validate -mpreferred-stack-boundary= value, or provide default.
1344 The default of 128 bits is for Pentium III's SSE __m128, but we
1345 don't want additional code to keep the stack aligned when
1346 optimizing for code size. */
1347 ix86_preferred_stack_boundary = (optimize_size
1348 ? TARGET_64BIT ? 128 : 32
1350 if (ix86_preferred_stack_boundary_string)
1352 i = atoi (ix86_preferred_stack_boundary_string);
1353 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1354 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1355 TARGET_64BIT ? 4 : 2);
1357 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1360 /* Validate -mbranch-cost= value, or provide default. */
1361 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1362 if (ix86_branch_cost_string)
1364 i = atoi (ix86_branch_cost_string);
1366 error ("-mbranch-cost=%d is not between 0 and 5", i);
1368 ix86_branch_cost = i;
1371 if (ix86_tls_dialect_string)
1373 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1374 ix86_tls_dialect = TLS_DIALECT_GNU;
1375 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1376 ix86_tls_dialect = TLS_DIALECT_SUN;
1378 error ("bad value (%s) for -mtls-dialect= switch",
1379 ix86_tls_dialect_string);
1382 /* Keep nonleaf frame pointers. */
1383 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1384 flag_omit_frame_pointer = 1;
1386 /* If we're doing fast math, we don't care about comparison order
1387 wrt NaNs. This lets us use a shorter comparison sequence. */
1388 if (flag_unsafe_math_optimizations)
1389 target_flags &= ~MASK_IEEE_FP;
1391 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1392 since the insns won't need emulation. */
1393 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1394 target_flags &= ~MASK_NO_FANCY_MATH_387;
1396 /* Turn on SSE2 builtins for -msse3. */
1398 target_flags |= MASK_SSE2;
1400 /* Turn on SSE builtins for -msse2. */
1402 target_flags |= MASK_SSE;
1406 if (TARGET_ALIGN_DOUBLE)
1407 error ("-malign-double makes no sense in the 64bit mode");
1409 error ("-mrtd calling convention not supported in the 64bit mode");
1410 /* Enable by default the SSE and MMX builtins. */
1411 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1412 ix86_fpmath = FPMATH_SSE;
1416 ix86_fpmath = FPMATH_387;
1417 /* i386 ABI does not specify red zone. It still makes sense to use it
1418 when programmer takes care to stack from being destroyed. */
1419 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1420 target_flags |= MASK_NO_RED_ZONE;
1423 if (ix86_fpmath_string != 0)
1425 if (! strcmp (ix86_fpmath_string, "387"))
1426 ix86_fpmath = FPMATH_387;
1427 else if (! strcmp (ix86_fpmath_string, "sse"))
1431 warning ("SSE instruction set disabled, using 387 arithmetics");
1432 ix86_fpmath = FPMATH_387;
1435 ix86_fpmath = FPMATH_SSE;
1437 else if (! strcmp (ix86_fpmath_string, "387,sse")
1438 || ! strcmp (ix86_fpmath_string, "sse,387"))
1442 warning ("SSE instruction set disabled, using 387 arithmetics");
1443 ix86_fpmath = FPMATH_387;
1445 else if (!TARGET_80387)
1447 warning ("387 instruction set disabled, using SSE arithmetics");
1448 ix86_fpmath = FPMATH_SSE;
1451 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1454 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1457 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1461 target_flags |= MASK_MMX;
1462 x86_prefetch_sse = true;
1465 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1468 target_flags |= MASK_MMX;
1469 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1470 extensions it adds. */
1471 if (x86_3dnow_a & (1 << ix86_arch))
1472 target_flags |= MASK_3DNOW_A;
1474 if ((x86_accumulate_outgoing_args & TUNEMASK)
1475 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1477 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1479 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1482 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1483 p = strchr (internal_label_prefix, 'X');
1484 internal_label_prefix_len = p - internal_label_prefix;
1490 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1492 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1493 make the problem with not enough registers even worse. */
1494 #ifdef INSN_SCHEDULING
1496 flag_schedule_insns = 0;
1499 /* The default values of these switches depend on the TARGET_64BIT
1500 that is not known at this moment. Mark these values with 2 and
1501 let user the to override these. In case there is no command line option
1502 specifying them, we will set the defaults in override_options. */
1504 flag_omit_frame_pointer = 2;
1505 flag_pcc_struct_return = 2;
1506 flag_asynchronous_unwind_tables = 2;
1509 /* Table of valid machine attributes. */
1510 const struct attribute_spec ix86_attribute_table[] =
1512 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1513 /* Stdcall attribute says callee is responsible for popping arguments
1514 if they are not variable. */
1515 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1516 /* Fastcall attribute says callee is responsible for popping arguments
1517 if they are not variable. */
1518 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1519 /* Cdecl attribute says the callee is a normal C declaration */
1520 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1521 /* Regparm attribute specifies how many integer arguments are to be
1522 passed in registers. */
1523 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1524 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1525 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1526 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1527 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1529 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1530 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1531 { NULL, 0, 0, false, false, false, NULL }
1534 /* Decide whether we can make a sibling call to a function. DECL is the
1535 declaration of the function being targeted by the call and EXP is the
1536 CALL_EXPR representing the call. */
1539 ix86_function_ok_for_sibcall (tree decl, tree exp)
1541 /* If we are generating position-independent code, we cannot sibcall
1542 optimize any indirect call, or a direct call to a global function,
1543 as the PLT requires %ebx be live. */
1544 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1547 /* If we are returning floats on the 80387 register stack, we cannot
1548 make a sibcall from a function that doesn't return a float to a
1549 function that does or, conversely, from a function that does return
1550 a float to a function that doesn't; the necessary stack adjustment
1551 would not be executed. */
1552 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1553 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1556 /* If this call is indirect, we'll need to be able to use a call-clobbered
1557 register for the address of the target function. Make sure that all
1558 such registers are not used for passing parameters. */
1559 if (!decl && !TARGET_64BIT)
1563 /* We're looking at the CALL_EXPR, we need the type of the function. */
1564 type = TREE_OPERAND (exp, 0); /* pointer expression */
1565 type = TREE_TYPE (type); /* pointer type */
1566 type = TREE_TYPE (type); /* function type */
1568 if (ix86_function_regparm (type, NULL) >= 3)
1570 /* ??? Need to count the actual number of registers to be used,
1571 not the possible number of registers. Fix later. */
1576 /* Otherwise okay. That also includes certain types of indirect calls. */
1580 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1581 arguments as in struct attribute_spec.handler. */
1583 ix86_handle_cdecl_attribute (tree *node, tree name,
1584 tree args ATTRIBUTE_UNUSED,
1585 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1587 if (TREE_CODE (*node) != FUNCTION_TYPE
1588 && TREE_CODE (*node) != METHOD_TYPE
1589 && TREE_CODE (*node) != FIELD_DECL
1590 && TREE_CODE (*node) != TYPE_DECL)
1592 warning ("`%s' attribute only applies to functions",
1593 IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1598 if (is_attribute_p ("fastcall", name))
1600 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1602 error ("fastcall and stdcall attributes are not compatible");
1604 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1606 error ("fastcall and regparm attributes are not compatible");
1609 else if (is_attribute_p ("stdcall", name))
1611 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1613 error ("fastcall and stdcall attributes are not compatible");
1620 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1621 *no_add_attrs = true;
1627 /* Handle a "regparm" attribute;
1628 arguments as in struct attribute_spec.handler. */
1630 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1631 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1633 if (TREE_CODE (*node) != FUNCTION_TYPE
1634 && TREE_CODE (*node) != METHOD_TYPE
1635 && TREE_CODE (*node) != FIELD_DECL
1636 && TREE_CODE (*node) != TYPE_DECL)
1638 warning ("`%s' attribute only applies to functions",
1639 IDENTIFIER_POINTER (name));
1640 *no_add_attrs = true;
1646 cst = TREE_VALUE (args);
1647 if (TREE_CODE (cst) != INTEGER_CST)
1649 warning ("`%s' attribute requires an integer constant argument",
1650 IDENTIFIER_POINTER (name));
1651 *no_add_attrs = true;
1653 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1655 warning ("argument to `%s' attribute larger than %d",
1656 IDENTIFIER_POINTER (name), REGPARM_MAX);
1657 *no_add_attrs = true;
1660 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1662 error ("fastcall and regparm attributes are not compatible");
1669 /* Return 0 if the attributes for two types are incompatible, 1 if they
1670 are compatible, and 2 if they are nearly compatible (which causes a
1671 warning to be generated). */
1674 ix86_comp_type_attributes (tree type1, tree type2)
1676 /* Check for mismatch of non-default calling convention. */
1677 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1679 if (TREE_CODE (type1) != FUNCTION_TYPE)
1682 /* Check for mismatched fastcall types */
1683 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1684 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1687 /* Check for mismatched return types (cdecl vs stdcall). */
1688 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1689 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1691 if (ix86_function_regparm (type1, NULL)
1692 != ix86_function_regparm (type2, NULL))
1697 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1698 DECL may be NULL when calling function indirectly
1699 or considering a libcall. */
1702 ix86_function_regparm (tree type, tree decl)
1705 int regparm = ix86_regparm;
1706 bool user_convention = false;
1710 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1713 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1714 user_convention = true;
1717 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1720 user_convention = true;
1723 /* Use register calling convention for local functions when possible. */
1724 if (!TARGET_64BIT && !user_convention && decl
1725 && flag_unit_at_a_time && !profile_flag)
1727 struct cgraph_local_info *i = cgraph_local_info (decl);
1730 /* We can't use regparm(3) for nested functions as these use
1731 static chain pointer in third argument. */
1732 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1742 /* Return true if EAX is live at the start of the function. Used by
1743 ix86_expand_prologue to determine if we need special help before
1744 calling allocate_stack_worker. */
1747 ix86_eax_live_at_start_p (void)
1749 /* Cheat. Don't bother working forward from ix86_function_regparm
1750 to the function type to whether an actual argument is located in
1751 eax. Instead just look at cfg info, which is still close enough
1752 to correct at this point. This gives false positives for broken
1753 functions that might use uninitialized data that happens to be
1754 allocated in eax, but who cares? */
1755 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1758 /* Value is the number of bytes of arguments automatically
1759 popped when returning from a subroutine call.
1760 FUNDECL is the declaration node of the function (as a tree),
1761 FUNTYPE is the data type of the function (as a tree),
1762 or for a library call it is an identifier node for the subroutine name.
1763 SIZE is the number of bytes of arguments passed on the stack.
1765 On the 80386, the RTD insn may be used to pop them if the number
1766 of args is fixed, but if the number is variable then the caller
1767 must pop them all. RTD can't be used for library calls now
1768 because the library is compiled with the Unix compiler.
1769 Use of RTD is a selectable option, since it is incompatible with
1770 standard Unix calling sequences. If the option is not selected,
1771 the caller must always pop the args.
1773 The attribute stdcall is equivalent to RTD on a per module basis. */
1776 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1778 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1780 /* Cdecl functions override -mrtd, and never pop the stack. */
1781 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1783 /* Stdcall and fastcall functions will pop the stack if not
1785 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1786 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1790 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1791 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1792 == void_type_node)))
1796 /* Lose any fake structure return argument if it is passed on the stack. */
1797 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1800 int nregs = ix86_function_regparm (funtype, fundecl);
1803 return GET_MODE_SIZE (Pmode);
1809 /* Argument support functions. */
1811 /* Return true when register may be used to pass function parameters. */
1813 ix86_function_arg_regno_p (int regno)
1817 return (regno < REGPARM_MAX
1818 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1819 if (SSE_REGNO_P (regno) && TARGET_SSE)
1821 /* RAX is used as hidden argument to va_arg functions. */
1824 for (i = 0; i < REGPARM_MAX; i++)
1825 if (regno == x86_64_int_parameter_registers[i])
1830 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1831 for a call to a function whose data type is FNTYPE.
1832 For a library call, FNTYPE is 0. */
1835 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1836 tree fntype, /* tree ptr for function decl */
1837 rtx libname, /* SYMBOL_REF of library name or 0 */
1840 static CUMULATIVE_ARGS zero_cum;
1841 tree param, next_param;
1843 if (TARGET_DEBUG_ARG)
1845 fprintf (stderr, "\ninit_cumulative_args (");
1847 fprintf (stderr, "fntype code = %s, ret code = %s",
1848 tree_code_name[(int) TREE_CODE (fntype)],
1849 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1851 fprintf (stderr, "no fntype");
1854 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1859 /* Set up the number of registers to use for passing arguments. */
1861 cum->nregs = ix86_function_regparm (fntype, fndecl);
1863 cum->nregs = ix86_regparm;
1864 cum->sse_nregs = SSE_REGPARM_MAX;
1865 cum->mmx_nregs = MMX_REGPARM_MAX;
1866 cum->warn_sse = true;
1867 cum->warn_mmx = true;
1868 cum->maybe_vaarg = false;
1870 /* Use ecx and edx registers if function has fastcall attribute */
1871 if (fntype && !TARGET_64BIT)
1873 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1881 /* Determine if this function has variable arguments. This is
1882 indicated by the last argument being 'void_type_mode' if there
1883 are no variable arguments. If there are variable arguments, then
1884 we won't pass anything in registers */
1886 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1888 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1889 param != 0; param = next_param)
1891 next_param = TREE_CHAIN (param);
1892 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1903 cum->maybe_vaarg = true;
1907 if ((!fntype && !libname)
1908 || (fntype && !TYPE_ARG_TYPES (fntype)))
1909 cum->maybe_vaarg = 1;
1911 if (TARGET_DEBUG_ARG)
1912 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1917 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1918 of this code is to classify each 8bytes of incoming argument by the register
1919 class and assign registers accordingly. */
1921 /* Return the union class of CLASS1 and CLASS2.
1922 See the x86-64 PS ABI for details. */
1924 static enum x86_64_reg_class
1925 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1927 /* Rule #1: If both classes are equal, this is the resulting class. */
1928 if (class1 == class2)
1931 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1933 if (class1 == X86_64_NO_CLASS)
1935 if (class2 == X86_64_NO_CLASS)
1938 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1939 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1940 return X86_64_MEMORY_CLASS;
1942 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1943 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1944 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1945 return X86_64_INTEGERSI_CLASS;
1946 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1947 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1948 return X86_64_INTEGER_CLASS;
1950 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1951 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1952 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1953 return X86_64_MEMORY_CLASS;
1955 /* Rule #6: Otherwise class SSE is used. */
1956 return X86_64_SSE_CLASS;
1959 /* Classify the argument of type TYPE and mode MODE.
1960 CLASSES will be filled by the register class used to pass each word
1961 of the operand. The number of words is returned. In case the parameter
1962 should be passed in memory, 0 is returned. As a special case for zero
1963 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1965 BIT_OFFSET is used internally for handling records and specifies offset
1966 of the offset in bits modulo 256 to avoid overflow cases.
1968 See the x86-64 PS ABI for details.
1972 classify_argument (enum machine_mode mode, tree type,
1973 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1975 HOST_WIDE_INT bytes =
1976 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1977 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1979 /* Variable sized entities are always passed/returned in memory. */
1983 if (mode != VOIDmode
1984 && MUST_PASS_IN_STACK (mode, type))
1987 if (type && AGGREGATE_TYPE_P (type))
1991 enum x86_64_reg_class subclasses[MAX_CLASSES];
1993 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1997 for (i = 0; i < words; i++)
1998 classes[i] = X86_64_NO_CLASS;
2000 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2001 signalize memory class, so handle it as special case. */
2004 classes[0] = X86_64_NO_CLASS;
2008 /* Classify each field of record and merge classes. */
2009 if (TREE_CODE (type) == RECORD_TYPE)
2011 /* For classes first merge in the field of the subclasses. */
2012 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2014 tree bases = TYPE_BINFO_BASETYPES (type);
2015 int n_bases = TREE_VEC_LENGTH (bases);
2018 for (i = 0; i < n_bases; ++i)
2020 tree binfo = TREE_VEC_ELT (bases, i);
2022 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2023 tree type = BINFO_TYPE (binfo);
2025 num = classify_argument (TYPE_MODE (type),
2027 (offset + bit_offset) % 256);
2030 for (i = 0; i < num; i++)
2032 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2034 merge_classes (subclasses[i], classes[i + pos]);
2038 /* And now merge the fields of structure. */
2039 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2041 if (TREE_CODE (field) == FIELD_DECL)
2045 /* Bitfields are always classified as integer. Handle them
2046 early, since later code would consider them to be
2047 misaligned integers. */
2048 if (DECL_BIT_FIELD (field))
2050 for (i = int_bit_position (field) / 8 / 8;
2051 i < (int_bit_position (field)
2052 + tree_low_cst (DECL_SIZE (field), 0)
2055 merge_classes (X86_64_INTEGER_CLASS,
2060 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2061 TREE_TYPE (field), subclasses,
2062 (int_bit_position (field)
2063 + bit_offset) % 256);
2066 for (i = 0; i < num; i++)
2069 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2071 merge_classes (subclasses[i], classes[i + pos]);
2077 /* Arrays are handled as small records. */
2078 else if (TREE_CODE (type) == ARRAY_TYPE)
2081 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2082 TREE_TYPE (type), subclasses, bit_offset);
2086 /* The partial classes are now full classes. */
2087 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2088 subclasses[0] = X86_64_SSE_CLASS;
2089 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2090 subclasses[0] = X86_64_INTEGER_CLASS;
2092 for (i = 0; i < words; i++)
2093 classes[i] = subclasses[i % num];
2095 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2096 else if (TREE_CODE (type) == UNION_TYPE
2097 || TREE_CODE (type) == QUAL_UNION_TYPE)
2099 /* For classes first merge in the field of the subclasses. */
2100 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2102 tree bases = TYPE_BINFO_BASETYPES (type);
2103 int n_bases = TREE_VEC_LENGTH (bases);
2106 for (i = 0; i < n_bases; ++i)
2108 tree binfo = TREE_VEC_ELT (bases, i);
2110 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2111 tree type = BINFO_TYPE (binfo);
2113 num = classify_argument (TYPE_MODE (type),
2115 (offset + (bit_offset % 64)) % 256);
2118 for (i = 0; i < num; i++)
2120 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2122 merge_classes (subclasses[i], classes[i + pos]);
2126 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2128 if (TREE_CODE (field) == FIELD_DECL)
2131 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2132 TREE_TYPE (field), subclasses,
2136 for (i = 0; i < num; i++)
2137 classes[i] = merge_classes (subclasses[i], classes[i]);
2141 else if (TREE_CODE (type) == SET_TYPE)
2145 classes[0] = X86_64_INTEGERSI_CLASS;
2148 else if (bytes <= 8)
2150 classes[0] = X86_64_INTEGER_CLASS;
2153 else if (bytes <= 12)
2155 classes[0] = X86_64_INTEGER_CLASS;
2156 classes[1] = X86_64_INTEGERSI_CLASS;
2161 classes[0] = X86_64_INTEGER_CLASS;
2162 classes[1] = X86_64_INTEGER_CLASS;
2169 /* Final merger cleanup. */
2170 for (i = 0; i < words; i++)
2172 /* If one class is MEMORY, everything should be passed in
2174 if (classes[i] == X86_64_MEMORY_CLASS)
2177 /* The X86_64_SSEUP_CLASS should be always preceded by
2178 X86_64_SSE_CLASS. */
2179 if (classes[i] == X86_64_SSEUP_CLASS
2180 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2181 classes[i] = X86_64_SSE_CLASS;
2183 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2184 if (classes[i] == X86_64_X87UP_CLASS
2185 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2186 classes[i] = X86_64_SSE_CLASS;
2191 /* Compute alignment needed. We align all types to natural boundaries with
2192 exception of XFmode that is aligned to 64bits. */
2193 if (mode != VOIDmode && mode != BLKmode)
2195 int mode_alignment = GET_MODE_BITSIZE (mode);
2198 mode_alignment = 128;
2199 else if (mode == XCmode)
2200 mode_alignment = 256;
2201 if (COMPLEX_MODE_P (mode))
2202 mode_alignment /= 2;
2203 /* Misaligned fields are always returned in memory. */
2204 if (bit_offset % mode_alignment)
2208 /* Classification of atomic types. */
2218 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2219 classes[0] = X86_64_INTEGERSI_CLASS;
2221 classes[0] = X86_64_INTEGER_CLASS;
2225 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2228 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2229 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2232 if (!(bit_offset % 64))
2233 classes[0] = X86_64_SSESF_CLASS;
2235 classes[0] = X86_64_SSE_CLASS;
2238 classes[0] = X86_64_SSEDF_CLASS;
2241 classes[0] = X86_64_X87_CLASS;
2242 classes[1] = X86_64_X87UP_CLASS;
2248 classes[0] = X86_64_X87_CLASS;
2249 classes[1] = X86_64_X87UP_CLASS;
2250 classes[2] = X86_64_X87_CLASS;
2251 classes[3] = X86_64_X87UP_CLASS;
2254 classes[0] = X86_64_SSEDF_CLASS;
2255 classes[1] = X86_64_SSEDF_CLASS;
2258 classes[0] = X86_64_SSE_CLASS;
2266 classes[0] = X86_64_SSE_CLASS;
2267 classes[1] = X86_64_SSEUP_CLASS;
2282 /* Examine the argument and return set number of register required in each
2283 class. Return 0 iff parameter should be passed in memory. */
2285 examine_argument (enum machine_mode mode, tree type, int in_return,
2286 int *int_nregs, int *sse_nregs)
2288 enum x86_64_reg_class class[MAX_CLASSES];
2289 int n = classify_argument (mode, type, class, 0);
2295 for (n--; n >= 0; n--)
2298 case X86_64_INTEGER_CLASS:
2299 case X86_64_INTEGERSI_CLASS:
2302 case X86_64_SSE_CLASS:
2303 case X86_64_SSESF_CLASS:
2304 case X86_64_SSEDF_CLASS:
2307 case X86_64_NO_CLASS:
2308 case X86_64_SSEUP_CLASS:
2310 case X86_64_X87_CLASS:
2311 case X86_64_X87UP_CLASS:
2315 case X86_64_MEMORY_CLASS:
2320 /* Construct container for the argument used by GCC interface. See
2321 FUNCTION_ARG for the detailed description. */
2323 construct_container (enum machine_mode mode, tree type, int in_return,
2324 int nintregs, int nsseregs, const int * intreg,
2327 enum machine_mode tmpmode;
2329 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2330 enum x86_64_reg_class class[MAX_CLASSES];
2334 int needed_sseregs, needed_intregs;
2335 rtx exp[MAX_CLASSES];
2338 n = classify_argument (mode, type, class, 0);
2339 if (TARGET_DEBUG_ARG)
2342 fprintf (stderr, "Memory class\n");
2345 fprintf (stderr, "Classes:");
2346 for (i = 0; i < n; i++)
2348 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2350 fprintf (stderr, "\n");
2355 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2357 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2360 /* First construct simple cases. Avoid SCmode, since we want to use
2361 single register to pass this type. */
2362 if (n == 1 && mode != SCmode)
2365 case X86_64_INTEGER_CLASS:
2366 case X86_64_INTEGERSI_CLASS:
2367 return gen_rtx_REG (mode, intreg[0]);
2368 case X86_64_SSE_CLASS:
2369 case X86_64_SSESF_CLASS:
2370 case X86_64_SSEDF_CLASS:
2371 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2372 case X86_64_X87_CLASS:
2373 return gen_rtx_REG (mode, FIRST_STACK_REG);
2374 case X86_64_NO_CLASS:
2375 /* Zero sized array, struct or class. */
2380 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2382 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2384 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2385 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2386 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2387 && class[1] == X86_64_INTEGER_CLASS
2388 && (mode == CDImode || mode == TImode || mode == TFmode)
2389 && intreg[0] + 1 == intreg[1])
2390 return gen_rtx_REG (mode, intreg[0]);
2392 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2393 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2395 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2397 /* Otherwise figure out the entries of the PARALLEL. */
2398 for (i = 0; i < n; i++)
2402 case X86_64_NO_CLASS:
2404 case X86_64_INTEGER_CLASS:
2405 case X86_64_INTEGERSI_CLASS:
2406 /* Merge TImodes on aligned occasions here too. */
2407 if (i * 8 + 8 > bytes)
2408 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2409 else if (class[i] == X86_64_INTEGERSI_CLASS)
2413 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2414 if (tmpmode == BLKmode)
2416 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2417 gen_rtx_REG (tmpmode, *intreg),
2421 case X86_64_SSESF_CLASS:
2422 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2423 gen_rtx_REG (SFmode,
2424 SSE_REGNO (sse_regno)),
2428 case X86_64_SSEDF_CLASS:
2429 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2430 gen_rtx_REG (DFmode,
2431 SSE_REGNO (sse_regno)),
2435 case X86_64_SSE_CLASS:
2436 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2440 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2441 gen_rtx_REG (tmpmode,
2442 SSE_REGNO (sse_regno)),
2444 if (tmpmode == TImode)
2452 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2453 for (i = 0; i < nexps; i++)
2454 XVECEXP (ret, 0, i) = exp [i];
2458 /* Update the data in CUM to advance over an argument
2459 of mode MODE and data type TYPE.
2460 (TYPE is null for libcalls where that information may not be available.) */
2463 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2464 enum machine_mode mode, /* current arg mode */
2465 tree type, /* type of the argument or 0 if lib support */
2466 int named) /* whether or not the argument was named */
2469 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2470 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2472 if (TARGET_DEBUG_ARG)
2474 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2475 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2478 int int_nregs, sse_nregs;
2479 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2480 cum->words += words;
2481 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2483 cum->nregs -= int_nregs;
2484 cum->sse_nregs -= sse_nregs;
2485 cum->regno += int_nregs;
2486 cum->sse_regno += sse_nregs;
2489 cum->words += words;
2493 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2494 && (!type || !AGGREGATE_TYPE_P (type)))
2496 cum->sse_words += words;
2497 cum->sse_nregs -= 1;
2498 cum->sse_regno += 1;
2499 if (cum->sse_nregs <= 0)
2505 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2506 && (!type || !AGGREGATE_TYPE_P (type)))
2508 cum->mmx_words += words;
2509 cum->mmx_nregs -= 1;
2510 cum->mmx_regno += 1;
2511 if (cum->mmx_nregs <= 0)
2519 cum->words += words;
2520 cum->nregs -= words;
2521 cum->regno += words;
2523 if (cum->nregs <= 0)
2533 /* Define where to put the arguments to a function.
2534 Value is zero to push the argument on the stack,
2535 or a hard register in which to store the argument.
2537 MODE is the argument's machine mode.
2538 TYPE is the data type of the argument (as a tree).
2539 This is null for libcalls where that information may
2541 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2542 the preceding args and about the function being called.
2543 NAMED is nonzero if this argument is a named parameter
2544 (otherwise it is an extra parameter matching an ellipsis). */
2547 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2548 enum machine_mode mode, /* current arg mode */
2549 tree type, /* type of the argument or 0 if lib support */
2550 int named) /* != 0 for normal args, == 0 for ... args */
2554 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2555 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2556 static bool warnedsse, warnedmmx;
2558 /* Handle a hidden AL argument containing number of registers for varargs
2559 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2561 if (mode == VOIDmode)
2564 return GEN_INT (cum->maybe_vaarg
2565 ? (cum->sse_nregs < 0
2573 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2574 &x86_64_int_parameter_registers [cum->regno],
2579 /* For now, pass fp/complex values on the stack. */
2591 if (words <= cum->nregs)
2593 int regno = cum->regno;
2595 /* Fastcall allocates the first two DWORD (SImode) or
2596 smaller arguments to ECX and EDX. */
2599 if (mode == BLKmode || mode == DImode)
2602 /* ECX not EAX is the first allocated register. */
2606 ret = gen_rtx_REG (mode, regno);
2616 if (!type || !AGGREGATE_TYPE_P (type))
2618 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2621 warning ("SSE vector argument without SSE enabled "
2625 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2632 if (!type || !AGGREGATE_TYPE_P (type))
2634 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2637 warning ("MMX vector argument without MMX enabled "
2641 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2646 if (TARGET_DEBUG_ARG)
2649 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2650 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2653 print_simple_rtl (stderr, ret);
2655 fprintf (stderr, ", stack");
2657 fprintf (stderr, " )\n");
2663 /* A C expression that indicates when an argument must be passed by
2664 reference. If nonzero for an argument, a copy of that argument is
2665 made in memory and a pointer to the argument is passed instead of
2666 the argument itself. The pointer is passed in whatever way is
2667 appropriate for passing a pointer to that type. */
2670 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2671 enum machine_mode mode ATTRIBUTE_UNUSED,
2672 tree type, int named ATTRIBUTE_UNUSED)
2677 if (type && int_size_in_bytes (type) == -1)
2679 if (TARGET_DEBUG_ARG)
2680 fprintf (stderr, "function_arg_pass_by_reference\n");
2687 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2690 contains_128bit_aligned_vector_p (tree type)
2692 enum machine_mode mode = TYPE_MODE (type);
2693 if (SSE_REG_MODE_P (mode)
2694 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2696 if (TYPE_ALIGN (type) < 128)
2699 if (AGGREGATE_TYPE_P (type))
2701 /* Walk the aggregates recursively. */
2702 if (TREE_CODE (type) == RECORD_TYPE
2703 || TREE_CODE (type) == UNION_TYPE
2704 || TREE_CODE (type) == QUAL_UNION_TYPE)
2708 if (TYPE_BINFO (type) != NULL
2709 && TYPE_BINFO_BASETYPES (type) != NULL)
2711 tree bases = TYPE_BINFO_BASETYPES (type);
2712 int n_bases = TREE_VEC_LENGTH (bases);
2715 for (i = 0; i < n_bases; ++i)
2717 tree binfo = TREE_VEC_ELT (bases, i);
2718 tree type = BINFO_TYPE (binfo);
2720 if (contains_128bit_aligned_vector_p (type))
2724 /* And now merge the fields of structure. */
2725 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2727 if (TREE_CODE (field) == FIELD_DECL
2728 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2732 /* Just for use if some languages passes arrays by value. */
2733 else if (TREE_CODE (type) == ARRAY_TYPE)
2735 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2744 /* Gives the alignment boundary, in bits, of an argument with the
2745 specified mode and type. */
2748 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2752 align = TYPE_ALIGN (type);
2754 align = GET_MODE_ALIGNMENT (mode);
2755 if (align < PARM_BOUNDARY)
2756 align = PARM_BOUNDARY;
2759 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2760 make an exception for SSE modes since these require 128bit
2763 The handling here differs from field_alignment. ICC aligns MMX
2764 arguments to 4 byte boundaries, while structure fields are aligned
2765 to 8 byte boundaries. */
2768 if (!SSE_REG_MODE_P (mode))
2769 align = PARM_BOUNDARY;
2773 if (!contains_128bit_aligned_vector_p (type))
2774 align = PARM_BOUNDARY;
2782 /* Return true if N is a possible register number of function value. */
2784 ix86_function_value_regno_p (int regno)
2788 return ((regno) == 0
2789 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2790 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2792 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2793 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2794 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2797 /* Define how to find the value returned by a function.
2798 VALTYPE is the data type of the value (as a tree).
2799 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2800 otherwise, FUNC is 0. */
2802 ix86_function_value (tree valtype)
2806 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2807 REGPARM_MAX, SSE_REGPARM_MAX,
2808 x86_64_int_return_registers, 0);
2809 /* For zero sized structures, construct_container return NULL, but we need
2810 to keep rest of compiler happy by returning meaningful value. */
2812 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2816 return gen_rtx_REG (TYPE_MODE (valtype),
2817 ix86_value_regno (TYPE_MODE (valtype)));
2820 /* Return false iff type is returned in memory. */
2822 ix86_return_in_memory (tree type)
2824 int needed_intregs, needed_sseregs, size;
2825 enum machine_mode mode = TYPE_MODE (type);
2828 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2830 if (mode == BLKmode)
2833 size = int_size_in_bytes (type);
2835 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2838 if (VECTOR_MODE_P (mode) || mode == TImode)
2840 /* User-created vectors small enough to fit in EAX. */
2844 /* MMX/3dNow values are returned on the stack, since we've
2845 got to EMMS/FEMMS before returning. */
2849 /* SSE values are returned in XMM0. */
2850 /* ??? Except when it doesn't exist? We have a choice of
2851 either (1) being abi incompatible with a -march switch,
2852 or (2) generating an error here. Given no good solution,
2853 I think the safest thing is one warning. The user won't
2854 be able to use -Werror, but.... */
2865 warning ("SSE vector return without SSE enabled "
2880 /* Define how to find the value returned by a library function
2881 assuming the value has mode MODE. */
2883 ix86_libcall_value (enum machine_mode mode)
2893 return gen_rtx_REG (mode, FIRST_SSE_REG);
2896 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2901 return gen_rtx_REG (mode, 0);
2905 return gen_rtx_REG (mode, ix86_value_regno (mode));
2908 /* Given a mode, return the register to use for a return value. */
2911 ix86_value_regno (enum machine_mode mode)
2913 /* Floating point return values in %st(0). */
2914 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2915 return FIRST_FLOAT_REG;
2916 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2917 we prevent this case when sse is not available. */
2918 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2919 return FIRST_SSE_REG;
2920 /* Everything else in %eax. */
2924 /* Create the va_list data type. */
2927 ix86_build_builtin_va_list (void)
2929 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2931 /* For i386 we use plain pointer to argument area. */
2933 return build_pointer_type (char_type_node);
2935 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2936 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2938 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2939 unsigned_type_node);
2940 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2941 unsigned_type_node);
2942 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2944 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2947 DECL_FIELD_CONTEXT (f_gpr) = record;
2948 DECL_FIELD_CONTEXT (f_fpr) = record;
2949 DECL_FIELD_CONTEXT (f_ovf) = record;
2950 DECL_FIELD_CONTEXT (f_sav) = record;
2952 TREE_CHAIN (record) = type_decl;
2953 TYPE_NAME (record) = type_decl;
2954 TYPE_FIELDS (record) = f_gpr;
2955 TREE_CHAIN (f_gpr) = f_fpr;
2956 TREE_CHAIN (f_fpr) = f_ovf;
2957 TREE_CHAIN (f_ovf) = f_sav;
2959 layout_type (record);
2961 /* The correct type is an array type of one element. */
2962 return build_array_type (record, build_index_type (size_zero_node));
2965 /* Perform any needed actions needed for a function that is receiving a
2966 variable number of arguments.
2970 MODE and TYPE are the mode and type of the current parameter.
2972 PRETEND_SIZE is a variable that should be set to the amount of stack
2973 that must be pushed by the prolog to pretend that our caller pushed
2976 Normally, this macro will push all remaining incoming registers on the
2977 stack and set PRETEND_SIZE to the length of the registers pushed. */
2980 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2981 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2984 CUMULATIVE_ARGS next_cum;
2985 rtx save_area = NULL_RTX, mem;
2998 /* Indicate to allocate space on the stack for varargs save area. */
2999 ix86_save_varrargs_registers = 1;
3001 cfun->stack_alignment_needed = 128;
3003 fntype = TREE_TYPE (current_function_decl);
3004 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3005 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3006 != void_type_node));
3008 /* For varargs, we do not want to skip the dummy va_dcl argument.
3009 For stdargs, we do want to skip the last named argument. */
3012 function_arg_advance (&next_cum, mode, type, 1);
3015 save_area = frame_pointer_rtx;
3017 set = get_varargs_alias_set ();
3019 for (i = next_cum.regno; i < ix86_regparm; i++)
3021 mem = gen_rtx_MEM (Pmode,
3022 plus_constant (save_area, i * UNITS_PER_WORD));
3023 set_mem_alias_set (mem, set);
3024 emit_move_insn (mem, gen_rtx_REG (Pmode,
3025 x86_64_int_parameter_registers[i]));
3028 if (next_cum.sse_nregs)
3030 /* Now emit code to save SSE registers. The AX parameter contains number
3031 of SSE parameter registers used to call this function. We use
3032 sse_prologue_save insn template that produces computed jump across
3033 SSE saves. We need some preparation work to get this working. */
3035 label = gen_label_rtx ();
3036 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3038 /* Compute address to jump to :
3039 label - 5*eax + nnamed_sse_arguments*5 */
3040 tmp_reg = gen_reg_rtx (Pmode);
3041 nsse_reg = gen_reg_rtx (Pmode);
3042 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3043 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3044 gen_rtx_MULT (Pmode, nsse_reg,
3046 if (next_cum.sse_regno)
3049 gen_rtx_CONST (DImode,
3050 gen_rtx_PLUS (DImode,
3052 GEN_INT (next_cum.sse_regno * 4))));
3054 emit_move_insn (nsse_reg, label_ref);
3055 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3057 /* Compute address of memory block we save into. We always use pointer
3058 pointing 127 bytes after first byte to store - this is needed to keep
3059 instruction size limited by 4 bytes. */
3060 tmp_reg = gen_reg_rtx (Pmode);
3061 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3062 plus_constant (save_area,
3063 8 * REGPARM_MAX + 127)));
3064 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3065 set_mem_alias_set (mem, set);
3066 set_mem_align (mem, BITS_PER_WORD);
3068 /* And finally do the dirty job! */
3069 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3070 GEN_INT (next_cum.sse_regno), label));
3075 /* Implement va_start. */
3078 ix86_va_start (tree valist, rtx nextarg)
3080 HOST_WIDE_INT words, n_gpr, n_fpr;
3081 tree f_gpr, f_fpr, f_ovf, f_sav;
3082 tree gpr, fpr, ovf, sav, t;
3084 /* Only 64bit target needs something special. */
3087 std_expand_builtin_va_start (valist, nextarg);
3091 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3092 f_fpr = TREE_CHAIN (f_gpr);
3093 f_ovf = TREE_CHAIN (f_fpr);
3094 f_sav = TREE_CHAIN (f_ovf);
3096 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3097 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3098 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3099 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3100 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3102 /* Count number of gp and fp argument registers used. */
3103 words = current_function_args_info.words;
3104 n_gpr = current_function_args_info.regno;
3105 n_fpr = current_function_args_info.sse_regno;
3107 if (TARGET_DEBUG_ARG)
3108 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3109 (int) words, (int) n_gpr, (int) n_fpr);
3111 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3112 build_int_2 (n_gpr * 8, 0));
3113 TREE_SIDE_EFFECTS (t) = 1;
3114 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3116 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3117 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3118 TREE_SIDE_EFFECTS (t) = 1;
3119 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3121 /* Find the overflow area. */
3122 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3124 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3125 build_int_2 (words * UNITS_PER_WORD, 0));
3126 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3127 TREE_SIDE_EFFECTS (t) = 1;
3128 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3130 /* Find the register save area.
3131 Prologue of the function save it right above stack frame. */
3132 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3133 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3134 TREE_SIDE_EFFECTS (t) = 1;
3135 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3138 /* Implement va_arg. */
3140 ix86_va_arg (tree valist, tree type)
3142 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3143 tree f_gpr, f_fpr, f_ovf, f_sav;
3144 tree gpr, fpr, ovf, sav, t;
3146 rtx lab_false, lab_over = NULL_RTX;
3151 /* Only 64bit target needs something special. */
3154 return std_expand_builtin_va_arg (valist, type);
3157 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3158 f_fpr = TREE_CHAIN (f_gpr);
3159 f_ovf = TREE_CHAIN (f_fpr);
3160 f_sav = TREE_CHAIN (f_ovf);
3162 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3163 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3164 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3165 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3166 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3168 size = int_size_in_bytes (type);
3171 /* Passed by reference. */
3173 type = build_pointer_type (type);
3174 size = int_size_in_bytes (type);
3176 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3178 container = construct_container (TYPE_MODE (type), type, 0,
3179 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3181 * Pull the value out of the saved registers ...
3184 addr_rtx = gen_reg_rtx (Pmode);
3188 rtx int_addr_rtx, sse_addr_rtx;
3189 int needed_intregs, needed_sseregs;
3192 lab_over = gen_label_rtx ();
3193 lab_false = gen_label_rtx ();
3195 examine_argument (TYPE_MODE (type), type, 0,
3196 &needed_intregs, &needed_sseregs);
3199 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3200 || TYPE_ALIGN (type) > 128);
3202 /* In case we are passing structure, verify that it is consecutive block
3203 on the register save area. If not we need to do moves. */
3204 if (!need_temp && !REG_P (container))
3206 /* Verify that all registers are strictly consecutive */
3207 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3211 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3213 rtx slot = XVECEXP (container, 0, i);
3214 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3215 || INTVAL (XEXP (slot, 1)) != i * 16)
3223 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3225 rtx slot = XVECEXP (container, 0, i);
3226 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3227 || INTVAL (XEXP (slot, 1)) != i * 8)
3234 int_addr_rtx = addr_rtx;
3235 sse_addr_rtx = addr_rtx;
3239 int_addr_rtx = gen_reg_rtx (Pmode);
3240 sse_addr_rtx = gen_reg_rtx (Pmode);
3242 /* First ensure that we fit completely in registers. */
3245 emit_cmp_and_jump_insns (expand_expr
3246 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3247 GEN_INT ((REGPARM_MAX - needed_intregs +
3248 1) * 8), GE, const1_rtx, SImode,
3253 emit_cmp_and_jump_insns (expand_expr
3254 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3255 GEN_INT ((SSE_REGPARM_MAX -
3256 needed_sseregs + 1) * 16 +
3257 REGPARM_MAX * 8), GE, const1_rtx,
3258 SImode, 1, lab_false);
3261 /* Compute index to start of area used for integer regs. */
3264 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3265 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3266 if (r != int_addr_rtx)
3267 emit_move_insn (int_addr_rtx, r);
3271 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3272 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3273 if (r != sse_addr_rtx)
3274 emit_move_insn (sse_addr_rtx, r);
3282 /* Never use the memory itself, as it has the alias set. */
3283 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3284 mem = gen_rtx_MEM (BLKmode, x);
3285 force_operand (x, addr_rtx);
3286 set_mem_alias_set (mem, get_varargs_alias_set ());
3287 set_mem_align (mem, BITS_PER_UNIT);
3289 for (i = 0; i < XVECLEN (container, 0); i++)
3291 rtx slot = XVECEXP (container, 0, i);
3292 rtx reg = XEXP (slot, 0);
3293 enum machine_mode mode = GET_MODE (reg);
3299 if (SSE_REGNO_P (REGNO (reg)))
3301 src_addr = sse_addr_rtx;
3302 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3306 src_addr = int_addr_rtx;
3307 src_offset = REGNO (reg) * 8;
3309 src_mem = gen_rtx_MEM (mode, src_addr);
3310 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3311 src_mem = adjust_address (src_mem, mode, src_offset);
3312 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3313 emit_move_insn (dest_mem, src_mem);
3320 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3321 build_int_2 (needed_intregs * 8, 0));
3322 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3323 TREE_SIDE_EFFECTS (t) = 1;
3324 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3329 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3330 build_int_2 (needed_sseregs * 16, 0));
3331 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3332 TREE_SIDE_EFFECTS (t) = 1;
3333 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3336 emit_jump_insn (gen_jump (lab_over));
3338 emit_label (lab_false);
3341 /* ... otherwise out of the overflow area. */
3343 /* Care for on-stack alignment if needed. */
3344 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3348 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3349 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3350 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3354 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3356 emit_move_insn (addr_rtx, r);
3359 build (PLUS_EXPR, TREE_TYPE (t), t,
3360 build_int_2 (rsize * UNITS_PER_WORD, 0));
3361 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3362 TREE_SIDE_EFFECTS (t) = 1;
3363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3366 emit_label (lab_over);
3370 r = gen_rtx_MEM (Pmode, addr_rtx);
3371 set_mem_alias_set (r, get_varargs_alias_set ());
3372 emit_move_insn (addr_rtx, r);
3378 /* Return nonzero if OP is either a i387 or SSE fp register. */
3380 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3382 return ANY_FP_REG_P (op);
3385 /* Return nonzero if OP is an i387 fp register. */
3387 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3389 return FP_REG_P (op);
3392 /* Return nonzero if OP is a non-fp register_operand. */
3394 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3396 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3399 /* Return nonzero if OP is a register operand other than an
3400 i387 fp register. */
3402 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3404 return register_operand (op, mode) && !FP_REG_P (op);
3407 /* Return nonzero if OP is general operand representable on x86_64. */
3410 x86_64_general_operand (rtx op, enum machine_mode mode)
3413 return general_operand (op, mode);
3414 if (nonimmediate_operand (op, mode))
3416 return x86_64_sign_extended_value (op);
3419 /* Return nonzero if OP is general operand representable on x86_64
3420 as either sign extended or zero extended constant. */
3423 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3426 return general_operand (op, mode);
3427 if (nonimmediate_operand (op, mode))
3429 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3432 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3435 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3438 return nonmemory_operand (op, mode);
3439 if (register_operand (op, mode))
3441 return x86_64_sign_extended_value (op);
3444 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3447 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3449 if (!TARGET_64BIT || !flag_pic)
3450 return nonmemory_operand (op, mode);
3451 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3453 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3458 /* Return nonzero if OPNUM's MEM should be matched
3459 in movabs* patterns. */
3462 ix86_check_movabs (rtx insn, int opnum)
3466 set = PATTERN (insn);
3467 if (GET_CODE (set) == PARALLEL)
3468 set = XVECEXP (set, 0, 0);
3469 if (GET_CODE (set) != SET)
3471 mem = XEXP (set, opnum);
3472 while (GET_CODE (mem) == SUBREG)
3473 mem = SUBREG_REG (mem);
3474 if (GET_CODE (mem) != MEM)
3476 return (volatile_ok || !MEM_VOLATILE_P (mem));
3479 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3482 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3485 return nonmemory_operand (op, mode);
3486 if (register_operand (op, mode))
3488 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3491 /* Return nonzero if OP is immediate operand representable on x86_64. */
3494 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3497 return immediate_operand (op, mode);
3498 return x86_64_sign_extended_value (op);
3501 /* Return nonzero if OP is immediate operand representable on x86_64. */
3504 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3506 return x86_64_zero_extended_value (op);
3509 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3510 for shift & compare patterns, as shifting by 0 does not change flags),
3511 else return zero. */
3514 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3516 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3519 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3520 reference and a constant. */
3523 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3525 switch (GET_CODE (op))
3533 if (GET_CODE (op) == SYMBOL_REF
3534 || GET_CODE (op) == LABEL_REF
3535 || (GET_CODE (op) == UNSPEC
3536 && (XINT (op, 1) == UNSPEC_GOT
3537 || XINT (op, 1) == UNSPEC_GOTOFF
3538 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3540 if (GET_CODE (op) != PLUS
3541 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3545 if (GET_CODE (op) == SYMBOL_REF
3546 || GET_CODE (op) == LABEL_REF)
3548 /* Only @GOTOFF gets offsets. */
3549 if (GET_CODE (op) != UNSPEC
3550 || XINT (op, 1) != UNSPEC_GOTOFF)
3553 op = XVECEXP (op, 0, 0);
3554 if (GET_CODE (op) == SYMBOL_REF
3555 || GET_CODE (op) == LABEL_REF)
3564 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3567 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3569 if (GET_CODE (op) != CONST)
3574 if (GET_CODE (op) == UNSPEC
3575 && XINT (op, 1) == UNSPEC_GOTPCREL)
3577 if (GET_CODE (op) == PLUS
3578 && GET_CODE (XEXP (op, 0)) == UNSPEC
3579 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3584 if (GET_CODE (op) == UNSPEC)
3586 if (GET_CODE (op) != PLUS
3587 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3590 if (GET_CODE (op) == UNSPEC)
3596 /* Return true if OP is a symbolic operand that resolves locally. */
3599 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3601 if (GET_CODE (op) == CONST
3602 && GET_CODE (XEXP (op, 0)) == PLUS
3603 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3604 op = XEXP (XEXP (op, 0), 0);
3606 if (GET_CODE (op) == LABEL_REF)
3609 if (GET_CODE (op) != SYMBOL_REF)
3612 if (SYMBOL_REF_LOCAL_P (op))
3615 /* There is, however, a not insubstantial body of code in the rest of
3616 the compiler that assumes it can just stick the results of
3617 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3618 /* ??? This is a hack. Should update the body of the compiler to
3619 always create a DECL an invoke targetm.encode_section_info. */
3620 if (strncmp (XSTR (op, 0), internal_label_prefix,
3621 internal_label_prefix_len) == 0)
3627 /* Test for various thread-local symbols. */
3630 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3632 if (GET_CODE (op) != SYMBOL_REF)
3634 return SYMBOL_REF_TLS_MODEL (op);
3638 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3640 if (GET_CODE (op) != SYMBOL_REF)
3642 return SYMBOL_REF_TLS_MODEL (op) == kind;
3646 global_dynamic_symbolic_operand (rtx op,
3647 enum machine_mode mode ATTRIBUTE_UNUSED)
3649 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3653 local_dynamic_symbolic_operand (rtx op,
3654 enum machine_mode mode ATTRIBUTE_UNUSED)
3656 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3660 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3662 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3666 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3668 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3671 /* Test for a valid operand for a call instruction. Don't allow the
3672 arg pointer register or virtual regs since they may decay into
3673 reg + const, which the patterns can't handle. */
3676 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3678 /* Disallow indirect through a virtual register. This leads to
3679 compiler aborts when trying to eliminate them. */
3680 if (GET_CODE (op) == REG
3681 && (op == arg_pointer_rtx
3682 || op == frame_pointer_rtx
3683 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3684 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3687 /* Disallow `call 1234'. Due to varying assembler lameness this
3688 gets either rejected or translated to `call .+1234'. */
3689 if (GET_CODE (op) == CONST_INT)
3692 /* Explicitly allow SYMBOL_REF even if pic. */
3693 if (GET_CODE (op) == SYMBOL_REF)
3696 /* Otherwise we can allow any general_operand in the address. */
3697 return general_operand (op, Pmode);
3700 /* Test for a valid operand for a call instruction. Don't allow the
3701 arg pointer register or virtual regs since they may decay into
3702 reg + const, which the patterns can't handle. */
3705 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3707 /* Disallow indirect through a virtual register. This leads to
3708 compiler aborts when trying to eliminate them. */
3709 if (GET_CODE (op) == REG
3710 && (op == arg_pointer_rtx
3711 || op == frame_pointer_rtx
3712 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3713 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3716 /* Explicitly allow SYMBOL_REF even if pic. */
3717 if (GET_CODE (op) == SYMBOL_REF)
3720 /* Otherwise we can only allow register operands. */
3721 return register_operand (op, Pmode);
3725 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3727 if (GET_CODE (op) == CONST
3728 && GET_CODE (XEXP (op, 0)) == PLUS
3729 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3730 op = XEXP (XEXP (op, 0), 0);
3731 return GET_CODE (op) == SYMBOL_REF;
3734 /* Match exactly zero and one. */
3737 const0_operand (rtx op, enum machine_mode mode)
3739 return op == CONST0_RTX (mode);
3743 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3745 return op == const1_rtx;
3748 /* Match 2, 4, or 8. Used for leal multiplicands. */
3751 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3753 return (GET_CODE (op) == CONST_INT
3754 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3758 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3760 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3764 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3766 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3770 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3776 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3778 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3782 /* True if this is a constant appropriate for an increment or decrement. */
3785 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3787 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3788 registers, since carry flag is not set. */
3789 if (TARGET_PENTIUM4 && !optimize_size)
3791 return op == const1_rtx || op == constm1_rtx;
3794 /* Return nonzero if OP is acceptable as operand of DImode shift
3798 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3801 return nonimmediate_operand (op, mode);
3803 return register_operand (op, mode);
3806 /* Return false if this is the stack pointer, or any other fake
3807 register eliminable to the stack pointer. Otherwise, this is
3810 This is used to prevent esp from being used as an index reg.
3811 Which would only happen in pathological cases. */
3814 reg_no_sp_operand (rtx op, enum machine_mode mode)
3817 if (GET_CODE (t) == SUBREG)
3819 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3822 return register_operand (op, mode);
3826 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3828 return MMX_REG_P (op);
3831 /* Return false if this is any eliminable register. Otherwise
3835 general_no_elim_operand (rtx op, enum machine_mode mode)
3838 if (GET_CODE (t) == SUBREG)
3840 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3841 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3842 || t == virtual_stack_dynamic_rtx)
3845 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3846 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3849 return general_operand (op, mode);
3852 /* Return false if this is any eliminable register. Otherwise
3853 register_operand or const_int. */
3856 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3859 if (GET_CODE (t) == SUBREG)
3861 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3862 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3863 || t == virtual_stack_dynamic_rtx)
3866 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3869 /* Return false if this is any eliminable register or stack register,
3870 otherwise work like register_operand. */
3873 index_register_operand (rtx op, enum machine_mode mode)
3876 if (GET_CODE (t) == SUBREG)
3880 if (t == arg_pointer_rtx
3881 || t == frame_pointer_rtx
3882 || t == virtual_incoming_args_rtx
3883 || t == virtual_stack_vars_rtx
3884 || t == virtual_stack_dynamic_rtx
3885 || REGNO (t) == STACK_POINTER_REGNUM)
3888 return general_operand (op, mode);
3891 /* Return true if op is a Q_REGS class register. */
3894 q_regs_operand (rtx op, enum machine_mode mode)
3896 if (mode != VOIDmode && GET_MODE (op) != mode)
3898 if (GET_CODE (op) == SUBREG)
3899 op = SUBREG_REG (op);
3900 return ANY_QI_REG_P (op);
3903 /* Return true if op is an flags register. */
3906 flags_reg_operand (rtx op, enum machine_mode mode)
3908 if (mode != VOIDmode && GET_MODE (op) != mode)
3910 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3913 /* Return true if op is a NON_Q_REGS class register. */
3916 non_q_regs_operand (rtx op, enum machine_mode mode)
3918 if (mode != VOIDmode && GET_MODE (op) != mode)
3920 if (GET_CODE (op) == SUBREG)
3921 op = SUBREG_REG (op);
3922 return NON_QI_REG_P (op);
3926 zero_extended_scalar_load_operand (rtx op,
3927 enum machine_mode mode ATTRIBUTE_UNUSED)
3930 if (GET_CODE (op) != MEM)
3932 op = maybe_get_pool_constant (op);
3935 if (GET_CODE (op) != CONST_VECTOR)
3938 (GET_MODE_SIZE (GET_MODE (op)) /
3939 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3940 for (n_elts--; n_elts > 0; n_elts--)
3942 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3943 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3949 /* Return 1 when OP is operand acceptable for standard SSE move. */
3951 vector_move_operand (rtx op, enum machine_mode mode)
3953 if (nonimmediate_operand (op, mode))
3955 if (GET_MODE (op) != mode && mode != VOIDmode)
3957 return (op == CONST0_RTX (GET_MODE (op)));
3960 /* Return true if op if a valid address, and does not contain
3961 a segment override. */
3964 no_seg_address_operand (rtx op, enum machine_mode mode)
3966 struct ix86_address parts;
3968 if (! address_operand (op, mode))
3971 if (! ix86_decompose_address (op, &parts))
3974 return parts.seg == SEG_DEFAULT;
3977 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3980 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3982 enum rtx_code code = GET_CODE (op);
3985 /* Operations supported directly. */
3995 /* These are equivalent to ones above in non-IEEE comparisons. */
4002 return !TARGET_IEEE_FP;
4007 /* Return 1 if OP is a valid comparison operator in valid mode. */
4009 ix86_comparison_operator (rtx op, enum machine_mode mode)
4011 enum machine_mode inmode;
4012 enum rtx_code code = GET_CODE (op);
4013 if (mode != VOIDmode && GET_MODE (op) != mode)
4015 if (GET_RTX_CLASS (code) != '<')
4017 inmode = GET_MODE (XEXP (op, 0));
4019 if (inmode == CCFPmode || inmode == CCFPUmode)
4021 enum rtx_code second_code, bypass_code;
4022 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4023 return (bypass_code == NIL && second_code == NIL);
4030 if (inmode == CCmode || inmode == CCGCmode
4031 || inmode == CCGOCmode || inmode == CCNOmode)
4034 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4035 if (inmode == CCmode)
4039 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4047 /* Return 1 if OP is a valid comparison operator testing carry flag
4050 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4052 enum machine_mode inmode;
4053 enum rtx_code code = GET_CODE (op);
4055 if (mode != VOIDmode && GET_MODE (op) != mode)
4057 if (GET_RTX_CLASS (code) != '<')
4059 inmode = GET_MODE (XEXP (op, 0));
4060 if (GET_CODE (XEXP (op, 0)) != REG
4061 || REGNO (XEXP (op, 0)) != 17
4062 || XEXP (op, 1) != const0_rtx)
4065 if (inmode == CCFPmode || inmode == CCFPUmode)
4067 enum rtx_code second_code, bypass_code;
4069 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4070 if (bypass_code != NIL || second_code != NIL)
4072 code = ix86_fp_compare_code_to_integer (code);
4074 else if (inmode != CCmode)
4079 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4082 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4084 enum machine_mode inmode;
4085 enum rtx_code code = GET_CODE (op);
4087 if (mode != VOIDmode && GET_MODE (op) != mode)
4089 if (GET_RTX_CLASS (code) != '<')
4091 inmode = GET_MODE (XEXP (op, 0));
4092 if (inmode == CCFPmode || inmode == CCFPUmode)
4094 enum rtx_code second_code, bypass_code;
4096 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4097 if (bypass_code != NIL || second_code != NIL)
4099 code = ix86_fp_compare_code_to_integer (code);
4101 /* i387 supports just limited amount of conditional codes. */
4104 case LTU: case GTU: case LEU: case GEU:
4105 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4108 case ORDERED: case UNORDERED:
4116 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4119 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4121 switch (GET_CODE (op))
4124 /* Modern CPUs have same latency for HImode and SImode multiply,
4125 but 386 and 486 do HImode multiply faster. */
4126 return ix86_tune > PROCESSOR_I486;
4138 /* Nearly general operand, but accept any const_double, since we wish
4139 to be able to drop them into memory rather than have them get pulled
4143 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4145 if (mode != VOIDmode && mode != GET_MODE (op))
4147 if (GET_CODE (op) == CONST_DOUBLE)
4149 return general_operand (op, mode);
4152 /* Match an SI or HImode register for a zero_extract. */
4155 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4158 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4159 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4162 if (!register_operand (op, VOIDmode))
4165 /* Be careful to accept only registers having upper parts. */
4166 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4167 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4170 /* Return 1 if this is a valid binary floating-point operation.
4171 OP is the expression matched, and MODE is its mode. */
4174 binary_fp_operator (rtx op, enum machine_mode mode)
4176 if (mode != VOIDmode && mode != GET_MODE (op))
4179 switch (GET_CODE (op))
4185 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4193 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4195 return GET_CODE (op) == MULT;
4199 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4201 return GET_CODE (op) == DIV;
4205 arith_or_logical_operator (rtx op, enum machine_mode mode)
4207 return ((mode == VOIDmode || GET_MODE (op) == mode)
4208 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4209 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4212 /* Returns 1 if OP is memory operand with a displacement. */
4215 memory_displacement_operand (rtx op, enum machine_mode mode)
4217 struct ix86_address parts;
4219 if (! memory_operand (op, mode))
4222 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4225 return parts.disp != NULL_RTX;
4228 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4229 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4231 ??? It seems likely that this will only work because cmpsi is an
4232 expander, and no actual insns use this. */
4235 cmpsi_operand (rtx op, enum machine_mode mode)
4237 if (nonimmediate_operand (op, mode))
4240 if (GET_CODE (op) == AND
4241 && GET_MODE (op) == SImode
4242 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4243 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4244 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4245 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4246 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4247 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4253 /* Returns 1 if OP is memory operand that can not be represented by the
4257 long_memory_operand (rtx op, enum machine_mode mode)
4259 if (! memory_operand (op, mode))
4262 return memory_address_length (op) != 0;
4265 /* Return nonzero if the rtx is known aligned. */
4268 aligned_operand (rtx op, enum machine_mode mode)
4270 struct ix86_address parts;
4272 if (!general_operand (op, mode))
4275 /* Registers and immediate operands are always "aligned". */
4276 if (GET_CODE (op) != MEM)
4279 /* Don't even try to do any aligned optimizations with volatiles. */
4280 if (MEM_VOLATILE_P (op))
4285 /* Pushes and pops are only valid on the stack pointer. */
4286 if (GET_CODE (op) == PRE_DEC
4287 || GET_CODE (op) == POST_INC)
4290 /* Decode the address. */
4291 if (! ix86_decompose_address (op, &parts))
4294 /* Look for some component that isn't known to be aligned. */
4298 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4303 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4308 if (GET_CODE (parts.disp) != CONST_INT
4309 || (INTVAL (parts.disp) & 3) != 0)
4313 /* Didn't find one -- this must be an aligned address. */
4317 /* Initialize the table of extra 80387 mathematical constants. */
4320 init_ext_80387_constants (void)
4322 static const char * cst[5] =
4324 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4325 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4326 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4327 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4328 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4332 for (i = 0; i < 5; i++)
4334 real_from_string (&ext_80387_constants_table[i], cst[i]);
4335 /* Ensure each constant is rounded to XFmode precision. */
4336 real_convert (&ext_80387_constants_table[i],
4337 XFmode, &ext_80387_constants_table[i]);
4340 ext_80387_constants_init = 1;
4343 /* Return true if the constant is something that can be loaded with
4344 a special instruction. */
4347 standard_80387_constant_p (rtx x)
4349 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4352 if (x == CONST0_RTX (GET_MODE (x)))
4354 if (x == CONST1_RTX (GET_MODE (x)))
4357 /* For XFmode constants, try to find a special 80387 instruction on
4358 those CPUs that benefit from them. */
4359 if (GET_MODE (x) == XFmode
4360 && x86_ext_80387_constants & TUNEMASK)
4365 if (! ext_80387_constants_init)
4366 init_ext_80387_constants ();
4368 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4369 for (i = 0; i < 5; i++)
4370 if (real_identical (&r, &ext_80387_constants_table[i]))
4377 /* Return the opcode of the special instruction to be used to load
4381 standard_80387_constant_opcode (rtx x)
4383 switch (standard_80387_constant_p (x))
4403 /* Return the CONST_DOUBLE representing the 80387 constant that is
4404 loaded by the specified special instruction. The argument IDX
4405 matches the return value from standard_80387_constant_p. */
4408 standard_80387_constant_rtx (int idx)
4412 if (! ext_80387_constants_init)
4413 init_ext_80387_constants ();
4429 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4433 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4436 standard_sse_constant_p (rtx x)
4438 if (x == const0_rtx)
4440 return (x == CONST0_RTX (GET_MODE (x)));
4443 /* Returns 1 if OP contains a symbol reference */
4446 symbolic_reference_mentioned_p (rtx op)
4451 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4454 fmt = GET_RTX_FORMAT (GET_CODE (op));
4455 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4461 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4462 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4466 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4473 /* Return 1 if it is appropriate to emit `ret' instructions in the
4474 body of a function. Do this only if the epilogue is simple, needing a
4475 couple of insns. Prior to reloading, we can't tell how many registers
4476 must be saved, so return 0 then. Return 0 if there is no frame
4477 marker to de-allocate.
4479 If NON_SAVING_SETJMP is defined and true, then it is not possible
4480 for the epilogue to be simple, so return 0. This is a special case
4481 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4482 until final, but jump_optimize may need to know sooner if a
4486 ix86_can_use_return_insn_p (void)
4488 struct ix86_frame frame;
4490 #ifdef NON_SAVING_SETJMP
4491 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4495 if (! reload_completed || frame_pointer_needed)
4498 /* Don't allow more than 32 pop, since that's all we can do
4499 with one instruction. */
4500 if (current_function_pops_args
4501 && current_function_args_size >= 32768)
4504 ix86_compute_frame_layout (&frame);
4505 return frame.to_allocate == 0 && frame.nregs == 0;
4508 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4510 x86_64_sign_extended_value (rtx value)
4512 switch (GET_CODE (value))
4514 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4515 to be at least 32 and this all acceptable constants are
4516 represented as CONST_INT. */
4518 if (HOST_BITS_PER_WIDE_INT == 32)
4522 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4523 return trunc_int_for_mode (val, SImode) == val;
4527 /* For certain code models, the symbolic references are known to fit.
4528 in CM_SMALL_PIC model we know it fits if it is local to the shared
4529 library. Don't count TLS SYMBOL_REFs here, since they should fit
4530 only if inside of UNSPEC handled below. */
4532 /* TLS symbols are not constant. */
4533 if (tls_symbolic_operand (value, Pmode))
4535 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4537 /* For certain code models, the code is near as well. */
4539 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4540 || ix86_cmodel == CM_KERNEL);
4542 /* We also may accept the offsetted memory references in certain special
4545 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4546 switch (XINT (XEXP (value, 0), 1))
4548 case UNSPEC_GOTPCREL:
4550 case UNSPEC_GOTNTPOFF:
4556 if (GET_CODE (XEXP (value, 0)) == PLUS)
4558 rtx op1 = XEXP (XEXP (value, 0), 0);
4559 rtx op2 = XEXP (XEXP (value, 0), 1);
4560 HOST_WIDE_INT offset;
4562 if (ix86_cmodel == CM_LARGE)
4564 if (GET_CODE (op2) != CONST_INT)
4566 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4567 switch (GET_CODE (op1))
4570 /* For CM_SMALL assume that latest object is 16MB before
4571 end of 31bits boundary. We may also accept pretty
4572 large negative constants knowing that all objects are
4573 in the positive half of address space. */
4574 if (ix86_cmodel == CM_SMALL
4575 && offset < 16*1024*1024
4576 && trunc_int_for_mode (offset, SImode) == offset)
4578 /* For CM_KERNEL we know that all object resist in the
4579 negative half of 32bits address space. We may not
4580 accept negative offsets, since they may be just off
4581 and we may accept pretty large positive ones. */
4582 if (ix86_cmodel == CM_KERNEL
4584 && trunc_int_for_mode (offset, SImode) == offset)
4588 /* These conditions are similar to SYMBOL_REF ones, just the
4589 constraints for code models differ. */
4590 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4591 && offset < 16*1024*1024
4592 && trunc_int_for_mode (offset, SImode) == offset)
4594 if (ix86_cmodel == CM_KERNEL
4596 && trunc_int_for_mode (offset, SImode) == offset)
4600 switch (XINT (op1, 1))
4605 && trunc_int_for_mode (offset, SImode) == offset)
4619 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4621 x86_64_zero_extended_value (rtx value)
4623 switch (GET_CODE (value))
4626 if (HOST_BITS_PER_WIDE_INT == 32)
4627 return (GET_MODE (value) == VOIDmode
4628 && !CONST_DOUBLE_HIGH (value));
4632 if (HOST_BITS_PER_WIDE_INT == 32)
4633 return INTVAL (value) >= 0;
4635 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4638 /* For certain code models, the symbolic references are known to fit. */
4640 /* TLS symbols are not constant. */
4641 if (tls_symbolic_operand (value, Pmode))
4643 return ix86_cmodel == CM_SMALL;
4645 /* For certain code models, the code is near as well. */
4647 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4649 /* We also may accept the offsetted memory references in certain special
4652 if (GET_CODE (XEXP (value, 0)) == PLUS)
4654 rtx op1 = XEXP (XEXP (value, 0), 0);
4655 rtx op2 = XEXP (XEXP (value, 0), 1);
4657 if (ix86_cmodel == CM_LARGE)
4659 switch (GET_CODE (op1))
4663 /* For small code model we may accept pretty large positive
4664 offsets, since one bit is available for free. Negative
4665 offsets are limited by the size of NULL pointer area
4666 specified by the ABI. */
4667 if (ix86_cmodel == CM_SMALL
4668 && GET_CODE (op2) == CONST_INT
4669 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4670 && (trunc_int_for_mode (INTVAL (op2), SImode)
4673 /* ??? For the kernel, we may accept adjustment of
4674 -0x10000000, since we know that it will just convert
4675 negative address space to positive, but perhaps this
4676 is not worthwhile. */
4679 /* These conditions are similar to SYMBOL_REF ones, just the
4680 constraints for code models differ. */
4681 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4682 && GET_CODE (op2) == CONST_INT
4683 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4684 && (trunc_int_for_mode (INTVAL (op2), SImode)
4698 /* Value should be nonzero if functions must have frame pointers.
4699 Zero means the frame pointer need not be set up (and parms may
4700 be accessed via the stack pointer) in functions that seem suitable. */
4703 ix86_frame_pointer_required (void)
4705 /* If we accessed previous frames, then the generated code expects
4706 to be able to access the saved ebp value in our frame. */
4707 if (cfun->machine->accesses_prev_frame)
4710 /* Several x86 os'es need a frame pointer for other reasons,
4711 usually pertaining to setjmp. */
4712 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4715 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4716 the frame pointer by default. Turn it back on now if we've not
4717 got a leaf function. */
4718 if (TARGET_OMIT_LEAF_FRAME_POINTER
4719 && (!current_function_is_leaf))
4722 if (current_function_profile)
4728 /* Record that the current function accesses previous call frames. */
4731 ix86_setup_frame_addresses (void)
4733 cfun->machine->accesses_prev_frame = 1;
4736 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4737 # define USE_HIDDEN_LINKONCE 1
4739 # define USE_HIDDEN_LINKONCE 0
4742 static int pic_labels_used;
4744 /* Fills in the label name that should be used for a pc thunk for
4745 the given register. */
4748 get_pc_thunk_name (char name[32], unsigned int regno)
4750 if (USE_HIDDEN_LINKONCE)
4751 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4753 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4757 /* This function generates code for -fpic that loads %ebx with
4758 the return address of the caller and then returns. */
4761 ix86_file_end (void)
4766 for (regno = 0; regno < 8; ++regno)
4770 if (! ((pic_labels_used >> regno) & 1))
4773 get_pc_thunk_name (name, regno);
4775 if (USE_HIDDEN_LINKONCE)
4779 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4781 TREE_PUBLIC (decl) = 1;
4782 TREE_STATIC (decl) = 1;
4783 DECL_ONE_ONLY (decl) = 1;
4785 (*targetm.asm_out.unique_section) (decl, 0);
4786 named_section (decl, NULL, 0);
4788 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4789 fputs ("\t.hidden\t", asm_out_file);
4790 assemble_name (asm_out_file, name);
4791 fputc ('\n', asm_out_file);
4792 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4797 ASM_OUTPUT_LABEL (asm_out_file, name);
4800 xops[0] = gen_rtx_REG (SImode, regno);
4801 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4802 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4803 output_asm_insn ("ret", xops);
4806 if (NEED_INDICATE_EXEC_STACK)
4807 file_end_indicate_exec_stack ();
4810 /* Emit code for the SET_GOT patterns. */
4813 output_set_got (rtx dest)
4818 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4820 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4822 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4825 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4827 output_asm_insn ("call\t%a2", xops);
4830 /* Output the "canonical" label name ("Lxx$pb") here too. This
4831 is what will be referred to by the Mach-O PIC subsystem. */
4832 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4834 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4835 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4838 output_asm_insn ("pop{l}\t%0", xops);
4843 get_pc_thunk_name (name, REGNO (dest));
4844 pic_labels_used |= 1 << REGNO (dest);
4846 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4847 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4848 output_asm_insn ("call\t%X2", xops);
4851 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4852 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4853 else if (!TARGET_MACHO)
4854 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4859 /* Generate an "push" pattern for input ARG. */
4864 return gen_rtx_SET (VOIDmode,
4866 gen_rtx_PRE_DEC (Pmode,
4867 stack_pointer_rtx)),
4871 /* Return >= 0 if there is an unused call-clobbered register available
4872 for the entire function. */
4875 ix86_select_alt_pic_regnum (void)
4877 if (current_function_is_leaf && !current_function_profile)
4880 for (i = 2; i >= 0; --i)
4881 if (!regs_ever_live[i])
4885 return INVALID_REGNUM;
4888 /* Return 1 if we need to save REGNO. */
4890 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4892 if (pic_offset_table_rtx
4893 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4894 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4895 || current_function_profile
4896 || current_function_calls_eh_return
4897 || current_function_uses_const_pool))
4899 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4904 if (current_function_calls_eh_return && maybe_eh_return)
4909 unsigned test = EH_RETURN_DATA_REGNO (i);
4910 if (test == INVALID_REGNUM)
4917 return (regs_ever_live[regno]
4918 && !call_used_regs[regno]
4919 && !fixed_regs[regno]
4920 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4923 /* Return number of registers to be saved on the stack. */
4926 ix86_nsaved_regs (void)
4931 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4932 if (ix86_save_reg (regno, true))
4937 /* Return the offset between two registers, one to be eliminated, and the other
4938 its replacement, at the start of a routine. */
4941 ix86_initial_elimination_offset (int from, int to)
4943 struct ix86_frame frame;
4944 ix86_compute_frame_layout (&frame);
4946 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4947 return frame.hard_frame_pointer_offset;
4948 else if (from == FRAME_POINTER_REGNUM
4949 && to == HARD_FRAME_POINTER_REGNUM)
4950 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4953 if (to != STACK_POINTER_REGNUM)
4955 else if (from == ARG_POINTER_REGNUM)
4956 return frame.stack_pointer_offset;
4957 else if (from != FRAME_POINTER_REGNUM)
4960 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4964 /* Fill structure ix86_frame about frame of currently computed function. */
4967 ix86_compute_frame_layout (struct ix86_frame *frame)
4969 HOST_WIDE_INT total_size;
4970 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4971 HOST_WIDE_INT offset;
4972 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4973 HOST_WIDE_INT size = get_frame_size ();
4975 frame->nregs = ix86_nsaved_regs ();
4978 /* During reload iteration the amount of registers saved can change.
4979 Recompute the value as needed. Do not recompute when amount of registers
4980 didn't change as reload does mutiple calls to the function and does not
4981 expect the decision to change within single iteration. */
4983 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4985 int count = frame->nregs;
4987 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4988 /* The fast prologue uses move instead of push to save registers. This
4989 is significantly longer, but also executes faster as modern hardware
4990 can execute the moves in parallel, but can't do that for push/pop.
4992 Be careful about choosing what prologue to emit: When function takes
4993 many instructions to execute we may use slow version as well as in
4994 case function is known to be outside hot spot (this is known with
4995 feedback only). Weight the size of function by number of registers
4996 to save as it is cheap to use one or two push instructions but very
4997 slow to use many of them. */
4999 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5000 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5001 || (flag_branch_probabilities
5002 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5003 cfun->machine->use_fast_prologue_epilogue = false;
5005 cfun->machine->use_fast_prologue_epilogue
5006 = !expensive_function_p (count);
5008 if (TARGET_PROLOGUE_USING_MOVE
5009 && cfun->machine->use_fast_prologue_epilogue)
5010 frame->save_regs_using_mov = true;
5012 frame->save_regs_using_mov = false;
5015 /* Skip return address and saved base pointer. */
5016 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5018 frame->hard_frame_pointer_offset = offset;
5020 /* Do some sanity checking of stack_alignment_needed and
5021 preferred_alignment, since i386 port is the only using those features
5022 that may break easily. */
5024 if (size && !stack_alignment_needed)
5026 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5028 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5030 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5033 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5034 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5036 /* Register save area */
5037 offset += frame->nregs * UNITS_PER_WORD;
5040 if (ix86_save_varrargs_registers)
5042 offset += X86_64_VARARGS_SIZE;
5043 frame->va_arg_size = X86_64_VARARGS_SIZE;
5046 frame->va_arg_size = 0;
5048 /* Align start of frame for local function. */
5049 frame->padding1 = ((offset + stack_alignment_needed - 1)
5050 & -stack_alignment_needed) - offset;
5052 offset += frame->padding1;
5054 /* Frame pointer points here. */
5055 frame->frame_pointer_offset = offset;
5059 /* Add outgoing arguments area. Can be skipped if we eliminated
5060 all the function calls as dead code.
5061 Skipping is however impossible when function calls alloca. Alloca
5062 expander assumes that last current_function_outgoing_args_size
5063 of stack frame are unused. */
5064 if (ACCUMULATE_OUTGOING_ARGS
5065 && (!current_function_is_leaf || current_function_calls_alloca))
5067 offset += current_function_outgoing_args_size;
5068 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5071 frame->outgoing_arguments_size = 0;
5073 /* Align stack boundary. Only needed if we're calling another function
5075 if (!current_function_is_leaf || current_function_calls_alloca)
5076 frame->padding2 = ((offset + preferred_alignment - 1)
5077 & -preferred_alignment) - offset;
5079 frame->padding2 = 0;
5081 offset += frame->padding2;
5083 /* We've reached end of stack frame. */
5084 frame->stack_pointer_offset = offset;
5086 /* Size prologue needs to allocate. */
5087 frame->to_allocate =
5088 (size + frame->padding1 + frame->padding2
5089 + frame->outgoing_arguments_size + frame->va_arg_size);
5091 if ((!frame->to_allocate && frame->nregs <= 1)
5092 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5093 frame->save_regs_using_mov = false;
5095 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5096 && current_function_is_leaf)
5098 frame->red_zone_size = frame->to_allocate;
5099 if (frame->save_regs_using_mov)
5100 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5101 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5102 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5105 frame->red_zone_size = 0;
5106 frame->to_allocate -= frame->red_zone_size;
5107 frame->stack_pointer_offset -= frame->red_zone_size;
5109 fprintf (stderr, "nregs: %i\n", frame->nregs);
5110 fprintf (stderr, "size: %i\n", size);
5111 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5112 fprintf (stderr, "padding1: %i\n", frame->padding1);
5113 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5114 fprintf (stderr, "padding2: %i\n", frame->padding2);
5115 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5116 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5117 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5118 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5119 frame->hard_frame_pointer_offset);
5120 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5124 /* Emit code to save registers in the prologue. */
5127 ix86_emit_save_regs (void)
5132 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5133 if (ix86_save_reg (regno, true))
5135 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5136 RTX_FRAME_RELATED_P (insn) = 1;
5140 /* Emit code to save registers using MOV insns. First register
5141 is restored from POINTER + OFFSET. */
5143 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5148 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5149 if (ix86_save_reg (regno, true))
5151 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5153 gen_rtx_REG (Pmode, regno));
5154 RTX_FRAME_RELATED_P (insn) = 1;
5155 offset += UNITS_PER_WORD;
5159 /* Expand prologue or epilogue stack adjustment.
5160 The pattern exist to put a dependency on all ebp-based memory accesses.
5161 STYLE should be negative if instructions should be marked as frame related,
5162 zero if %r11 register is live and cannot be freely used and positive
5166 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5171 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5172 else if (x86_64_immediate_operand (offset, DImode))
5173 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5177 /* r11 is used by indirect sibcall return as well, set before the
5178 epilogue and used after the epilogue. ATM indirect sibcall
5179 shouldn't be used together with huge frame sizes in one
5180 function because of the frame_size check in sibcall.c. */
5183 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5184 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5186 RTX_FRAME_RELATED_P (insn) = 1;
5187 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5191 RTX_FRAME_RELATED_P (insn) = 1;
5194 /* Expand the prologue into a bunch of separate insns. */
5197 ix86_expand_prologue (void)
5201 struct ix86_frame frame;
5202 HOST_WIDE_INT allocate;
5204 ix86_compute_frame_layout (&frame);
5206 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5207 slower on all targets. Also sdb doesn't like it. */
5209 if (frame_pointer_needed)
5211 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5212 RTX_FRAME_RELATED_P (insn) = 1;
5214 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5215 RTX_FRAME_RELATED_P (insn) = 1;
5218 allocate = frame.to_allocate;
5220 if (!frame.save_regs_using_mov)
5221 ix86_emit_save_regs ();
5223 allocate += frame.nregs * UNITS_PER_WORD;
5225 /* When using red zone we may start register saving before allocating
5226 the stack frame saving one cycle of the prologue. */
5227 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5228 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5229 : stack_pointer_rtx,
5230 -frame.nregs * UNITS_PER_WORD);
5234 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5235 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5236 GEN_INT (-allocate), -1);
5239 /* Only valid for Win32. */
5240 rtx eax = gen_rtx_REG (SImode, 0);
5241 bool eax_live = ix86_eax_live_at_start_p ();
5248 emit_insn (gen_push (eax));
5252 insn = emit_move_insn (eax, GEN_INT (allocate));
5253 RTX_FRAME_RELATED_P (insn) = 1;
5255 insn = emit_insn (gen_allocate_stack_worker (eax));
5256 RTX_FRAME_RELATED_P (insn) = 1;
5260 rtx t = plus_constant (stack_pointer_rtx, allocate);
5261 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5265 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5267 if (!frame_pointer_needed || !frame.to_allocate)
5268 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5270 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5271 -frame.nregs * UNITS_PER_WORD);
5274 pic_reg_used = false;
5275 if (pic_offset_table_rtx
5276 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5277 || current_function_profile))
5279 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5281 if (alt_pic_reg_used != INVALID_REGNUM)
5282 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5284 pic_reg_used = true;
5289 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5291 /* Even with accurate pre-reload life analysis, we can wind up
5292 deleting all references to the pic register after reload.
5293 Consider if cross-jumping unifies two sides of a branch
5294 controlled by a comparison vs the only read from a global.
5295 In which case, allow the set_got to be deleted, though we're
5296 too late to do anything about the ebx save in the prologue. */
5297 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5300 /* Prevent function calls from be scheduled before the call to mcount.
5301 In the pic_reg_used case, make sure that the got load isn't deleted. */
5302 if (current_function_profile)
5303 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5306 /* Emit code to restore saved registers using MOV insns. First register
5307 is restored from POINTER + OFFSET. */
5309 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5310 int maybe_eh_return)
5313 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5315 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5316 if (ix86_save_reg (regno, maybe_eh_return))
5318 /* Ensure that adjust_address won't be forced to produce pointer
5319 out of range allowed by x86-64 instruction set. */
5320 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5324 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5325 emit_move_insn (r11, GEN_INT (offset));
5326 emit_insn (gen_adddi3 (r11, r11, pointer));
5327 base_address = gen_rtx_MEM (Pmode, r11);
5330 emit_move_insn (gen_rtx_REG (Pmode, regno),
5331 adjust_address (base_address, Pmode, offset));
5332 offset += UNITS_PER_WORD;
5336 /* Restore function stack, frame, and registers. */
5339 ix86_expand_epilogue (int style)
5342 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5343 struct ix86_frame frame;
5344 HOST_WIDE_INT offset;
5346 ix86_compute_frame_layout (&frame);
5348 /* Calculate start of saved registers relative to ebp. Special care
5349 must be taken for the normal return case of a function using
5350 eh_return: the eax and edx registers are marked as saved, but not
5351 restored along this path. */
5352 offset = frame.nregs;
5353 if (current_function_calls_eh_return && style != 2)
5355 offset *= -UNITS_PER_WORD;
5357 /* If we're only restoring one register and sp is not valid then
5358 using a move instruction to restore the register since it's
5359 less work than reloading sp and popping the register.
5361 The default code result in stack adjustment using add/lea instruction,
5362 while this code results in LEAVE instruction (or discrete equivalent),
5363 so it is profitable in some other cases as well. Especially when there
5364 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5365 and there is exactly one register to pop. This heuristic may need some
5366 tuning in future. */
5367 if ((!sp_valid && frame.nregs <= 1)
5368 || (TARGET_EPILOGUE_USING_MOVE
5369 && cfun->machine->use_fast_prologue_epilogue
5370 && (frame.nregs > 1 || frame.to_allocate))
5371 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5372 || (frame_pointer_needed && TARGET_USE_LEAVE
5373 && cfun->machine->use_fast_prologue_epilogue
5374 && frame.nregs == 1)
5375 || current_function_calls_eh_return)
5377 /* Restore registers. We can use ebp or esp to address the memory
5378 locations. If both are available, default to ebp, since offsets
5379 are known to be small. Only exception is esp pointing directly to the
5380 end of block of saved registers, where we may simplify addressing
5383 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5384 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5385 frame.to_allocate, style == 2);
5387 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5388 offset, style == 2);
5390 /* eh_return epilogues need %ecx added to the stack pointer. */
5393 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5395 if (frame_pointer_needed)
5397 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5398 tmp = plus_constant (tmp, UNITS_PER_WORD);
5399 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5401 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5402 emit_move_insn (hard_frame_pointer_rtx, tmp);
5404 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5409 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5410 tmp = plus_constant (tmp, (frame.to_allocate
5411 + frame.nregs * UNITS_PER_WORD));
5412 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5415 else if (!frame_pointer_needed)
5416 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5417 GEN_INT (frame.to_allocate
5418 + frame.nregs * UNITS_PER_WORD),
5420 /* If not an i386, mov & pop is faster than "leave". */
5421 else if (TARGET_USE_LEAVE || optimize_size
5422 || !cfun->machine->use_fast_prologue_epilogue)
5423 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5426 pro_epilogue_adjust_stack (stack_pointer_rtx,
5427 hard_frame_pointer_rtx,
5430 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5432 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5437 /* First step is to deallocate the stack frame so that we can
5438 pop the registers. */
5441 if (!frame_pointer_needed)
5443 pro_epilogue_adjust_stack (stack_pointer_rtx,
5444 hard_frame_pointer_rtx,
5445 GEN_INT (offset), style);
5447 else if (frame.to_allocate)
5448 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5449 GEN_INT (frame.to_allocate), style);
5451 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5452 if (ix86_save_reg (regno, false))
5455 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5457 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5459 if (frame_pointer_needed)
5461 /* Leave results in shorter dependency chains on CPUs that are
5462 able to grok it fast. */
5463 if (TARGET_USE_LEAVE)
5464 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5465 else if (TARGET_64BIT)
5466 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5468 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5472 /* Sibcall epilogues don't want a return instruction. */
5476 if (current_function_pops_args && current_function_args_size)
5478 rtx popc = GEN_INT (current_function_pops_args);
5480 /* i386 can only pop 64K bytes. If asked to pop more, pop
5481 return address, do explicit add, and jump indirectly to the
5484 if (current_function_pops_args >= 65536)
5486 rtx ecx = gen_rtx_REG (SImode, 2);
5488 /* There is no "pascal" calling convention in 64bit ABI. */
5492 emit_insn (gen_popsi1 (ecx));
5493 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5494 emit_jump_insn (gen_return_indirect_internal (ecx));
5497 emit_jump_insn (gen_return_pop_internal (popc));
5500 emit_jump_insn (gen_return_internal ());
5503 /* Reset from the function's potential modifications. */
5506 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5507 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5509 if (pic_offset_table_rtx)
5510 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5513 /* Extract the parts of an RTL expression that is a valid memory address
5514 for an instruction. Return 0 if the structure of the address is
5515 grossly off. Return -1 if the address contains ASHIFT, so it is not
5516 strictly valid, but still used for computing length of lea instruction. */
5519 ix86_decompose_address (rtx addr, struct ix86_address *out)
5521 rtx base = NULL_RTX;
5522 rtx index = NULL_RTX;
5523 rtx disp = NULL_RTX;
5524 HOST_WIDE_INT scale = 1;
5525 rtx scale_rtx = NULL_RTX;
5527 enum ix86_address_seg seg = SEG_DEFAULT;
5529 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5531 else if (GET_CODE (addr) == PLUS)
5541 addends[n++] = XEXP (op, 1);
5544 while (GET_CODE (op) == PLUS);
5549 for (i = n; i >= 0; --i)
5552 switch (GET_CODE (op))
5557 index = XEXP (op, 0);
5558 scale_rtx = XEXP (op, 1);
5562 if (XINT (op, 1) == UNSPEC_TP
5563 && TARGET_TLS_DIRECT_SEG_REFS
5564 && seg == SEG_DEFAULT)
5565 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5594 else if (GET_CODE (addr) == MULT)
5596 index = XEXP (addr, 0); /* index*scale */
5597 scale_rtx = XEXP (addr, 1);
5599 else if (GET_CODE (addr) == ASHIFT)
5603 /* We're called for lea too, which implements ashift on occasion. */
5604 index = XEXP (addr, 0);
5605 tmp = XEXP (addr, 1);
5606 if (GET_CODE (tmp) != CONST_INT)
5608 scale = INTVAL (tmp);
5609 if ((unsigned HOST_WIDE_INT) scale > 3)
5615 disp = addr; /* displacement */
5617 /* Extract the integral value of scale. */
5620 if (GET_CODE (scale_rtx) != CONST_INT)
5622 scale = INTVAL (scale_rtx);
5625 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5626 if (base && index && scale == 1
5627 && (index == arg_pointer_rtx
5628 || index == frame_pointer_rtx
5629 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5636 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5637 if ((base == hard_frame_pointer_rtx
5638 || base == frame_pointer_rtx
5639 || base == arg_pointer_rtx) && !disp)
5642 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5643 Avoid this by transforming to [%esi+0]. */
5644 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5645 && base && !index && !disp
5647 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5650 /* Special case: encode reg+reg instead of reg*2. */
5651 if (!base && index && scale && scale == 2)
5652 base = index, scale = 1;
5654 /* Special case: scaling cannot be encoded without base or displacement. */
5655 if (!base && !disp && index && scale != 1)
5667 /* Return cost of the memory address x.
5668 For i386, it is better to use a complex address than let gcc copy
5669 the address into a reg and make a new pseudo. But not if the address
5670 requires to two regs - that would mean more pseudos with longer
5673 ix86_address_cost (rtx x)
5675 struct ix86_address parts;
5678 if (!ix86_decompose_address (x, &parts))
5681 /* More complex memory references are better. */
5682 if (parts.disp && parts.disp != const0_rtx)
5684 if (parts.seg != SEG_DEFAULT)
5687 /* Attempt to minimize number of registers in the address. */
5689 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5691 && (!REG_P (parts.index)
5692 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5696 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5698 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5699 && parts.base != parts.index)
5702 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5703 since it's predecode logic can't detect the length of instructions
5704 and it degenerates to vector decoded. Increase cost of such
5705 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5706 to split such addresses or even refuse such addresses at all.
5708 Following addressing modes are affected:
5713 The first and last case may be avoidable by explicitly coding the zero in
5714 memory address, but I don't have AMD-K6 machine handy to check this
5718 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5719 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5720 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5726 /* If X is a machine specific address (i.e. a symbol or label being
5727 referenced as a displacement from the GOT implemented using an
5728 UNSPEC), then return the base term. Otherwise return X. */
5731 ix86_find_base_term (rtx x)
5737 if (GET_CODE (x) != CONST)
5740 if (GET_CODE (term) == PLUS
5741 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5742 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5743 term = XEXP (term, 0);
5744 if (GET_CODE (term) != UNSPEC
5745 || XINT (term, 1) != UNSPEC_GOTPCREL)
5748 term = XVECEXP (term, 0, 0);
5750 if (GET_CODE (term) != SYMBOL_REF
5751 && GET_CODE (term) != LABEL_REF)
5757 term = ix86_delegitimize_address (x);
5759 if (GET_CODE (term) != SYMBOL_REF
5760 && GET_CODE (term) != LABEL_REF)
5766 /* Determine if a given RTX is a valid constant. We already know this
5767 satisfies CONSTANT_P. */
5770 legitimate_constant_p (rtx x)
5774 switch (GET_CODE (x))
5777 /* TLS symbols are not constant. */
5778 if (tls_symbolic_operand (x, Pmode))
5783 inner = XEXP (x, 0);
5785 /* Offsets of TLS symbols are never valid.
5786 Discourage CSE from creating them. */
5787 if (GET_CODE (inner) == PLUS
5788 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5791 if (GET_CODE (inner) == PLUS
5792 || GET_CODE (inner) == MINUS)
5794 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5796 inner = XEXP (inner, 0);
5799 /* Only some unspecs are valid as "constants". */
5800 if (GET_CODE (inner) == UNSPEC)
5801 switch (XINT (inner, 1))
5805 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5807 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5817 /* Otherwise we handle everything else in the move patterns. */
5821 /* Determine if it's legal to put X into the constant pool. This
5822 is not possible for the address of thread-local symbols, which
5823 is checked above. */
5826 ix86_cannot_force_const_mem (rtx x)
5828 return !legitimate_constant_p (x);
5831 /* Determine if a given RTX is a valid constant address. */
5834 constant_address_p (rtx x)
5836 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5839 /* Nonzero if the constant value X is a legitimate general operand
5840 when generating PIC code. It is given that flag_pic is on and
5841 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5844 legitimate_pic_operand_p (rtx x)
5848 switch (GET_CODE (x))
5851 inner = XEXP (x, 0);
5853 /* Only some unspecs are valid as "constants". */
5854 if (GET_CODE (inner) == UNSPEC)
5855 switch (XINT (inner, 1))
5858 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5866 return legitimate_pic_address_disp_p (x);
5873 /* Determine if a given CONST RTX is a valid memory displacement
5877 legitimate_pic_address_disp_p (rtx disp)
5881 /* In 64bit mode we can allow direct addresses of symbols and labels
5882 when they are not dynamic symbols. */
5885 /* TLS references should always be enclosed in UNSPEC. */
5886 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5888 if (GET_CODE (disp) == SYMBOL_REF
5889 && ix86_cmodel == CM_SMALL_PIC
5890 && SYMBOL_REF_LOCAL_P (disp))
5892 if (GET_CODE (disp) == LABEL_REF)
5894 if (GET_CODE (disp) == CONST
5895 && GET_CODE (XEXP (disp, 0)) == PLUS)
5897 rtx op0 = XEXP (XEXP (disp, 0), 0);
5898 rtx op1 = XEXP (XEXP (disp, 0), 1);
5900 /* TLS references should always be enclosed in UNSPEC. */
5901 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5903 if (((GET_CODE (op0) == SYMBOL_REF
5904 && ix86_cmodel == CM_SMALL_PIC
5905 && SYMBOL_REF_LOCAL_P (op0))
5906 || GET_CODE (op0) == LABEL_REF)
5907 && GET_CODE (op1) == CONST_INT
5908 && INTVAL (op1) < 16*1024*1024
5909 && INTVAL (op1) >= -16*1024*1024)
5913 if (GET_CODE (disp) != CONST)
5915 disp = XEXP (disp, 0);
5919 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5920 of GOT tables. We should not need these anyway. */
5921 if (GET_CODE (disp) != UNSPEC
5922 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5925 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5926 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5932 if (GET_CODE (disp) == PLUS)
5934 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5936 disp = XEXP (disp, 0);
5940 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5941 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5943 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5944 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5945 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5947 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5948 if (! strcmp (sym_name, "<pic base>"))
5953 if (GET_CODE (disp) != UNSPEC)
5956 switch (XINT (disp, 1))
5961 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5963 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5964 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5965 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5967 case UNSPEC_GOTTPOFF:
5968 case UNSPEC_GOTNTPOFF:
5969 case UNSPEC_INDNTPOFF:
5972 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5974 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5976 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5982 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5983 memory address for an instruction. The MODE argument is the machine mode
5984 for the MEM expression that wants to use this address.
5986 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5987 convert common non-canonical forms to canonical form so that they will
5991 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5993 struct ix86_address parts;
5994 rtx base, index, disp;
5995 HOST_WIDE_INT scale;
5996 const char *reason = NULL;
5997 rtx reason_rtx = NULL_RTX;
5999 if (TARGET_DEBUG_ADDR)
6002 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6003 GET_MODE_NAME (mode), strict);
6007 if (ix86_decompose_address (addr, &parts) <= 0)
6009 reason = "decomposition failed";
6014 index = parts.index;
6016 scale = parts.scale;
6018 /* Validate base register.
6020 Don't allow SUBREG's here, it can lead to spill failures when the base
6021 is one word out of a two word structure, which is represented internally
6028 if (GET_CODE (base) != REG)
6030 reason = "base is not a register";
6034 if (GET_MODE (base) != Pmode)
6036 reason = "base is not in Pmode";
6040 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6041 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6043 reason = "base is not valid";
6048 /* Validate index register.
6050 Don't allow SUBREG's here, it can lead to spill failures when the index
6051 is one word out of a two word structure, which is represented internally
6058 if (GET_CODE (index) != REG)
6060 reason = "index is not a register";
6064 if (GET_MODE (index) != Pmode)
6066 reason = "index is not in Pmode";
6070 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6071 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6073 reason = "index is not valid";
6078 /* Validate scale factor. */
6081 reason_rtx = GEN_INT (scale);
6084 reason = "scale without index";
6088 if (scale != 2 && scale != 4 && scale != 8)
6090 reason = "scale is not a valid multiplier";
6095 /* Validate displacement. */
6100 if (GET_CODE (disp) == CONST
6101 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6102 switch (XINT (XEXP (disp, 0), 1))
6106 case UNSPEC_GOTPCREL:
6109 goto is_legitimate_pic;
6111 case UNSPEC_GOTTPOFF:
6112 case UNSPEC_GOTNTPOFF:
6113 case UNSPEC_INDNTPOFF:
6119 reason = "invalid address unspec";
6123 else if (flag_pic && (SYMBOLIC_CONST (disp)
6125 && !machopic_operand_p (disp)
6130 if (TARGET_64BIT && (index || base))
6132 /* foo@dtpoff(%rX) is ok. */
6133 if (GET_CODE (disp) != CONST
6134 || GET_CODE (XEXP (disp, 0)) != PLUS
6135 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6136 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6137 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6138 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6140 reason = "non-constant pic memory reference";
6144 else if (! legitimate_pic_address_disp_p (disp))
6146 reason = "displacement is an invalid pic construct";
6150 /* This code used to verify that a symbolic pic displacement
6151 includes the pic_offset_table_rtx register.
6153 While this is good idea, unfortunately these constructs may
6154 be created by "adds using lea" optimization for incorrect
6163 This code is nonsensical, but results in addressing
6164 GOT table with pic_offset_table_rtx base. We can't
6165 just refuse it easily, since it gets matched by
6166 "addsi3" pattern, that later gets split to lea in the
6167 case output register differs from input. While this
6168 can be handled by separate addsi pattern for this case
6169 that never results in lea, this seems to be easier and
6170 correct fix for crash to disable this test. */
6172 else if (GET_CODE (disp) != LABEL_REF
6173 && GET_CODE (disp) != CONST_INT
6174 && (GET_CODE (disp) != CONST
6175 || !legitimate_constant_p (disp))
6176 && (GET_CODE (disp) != SYMBOL_REF
6177 || !legitimate_constant_p (disp)))
6179 reason = "displacement is not constant";
6182 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6184 reason = "displacement is out of range";
6189 /* Everything looks valid. */
6190 if (TARGET_DEBUG_ADDR)
6191 fprintf (stderr, "Success.\n");
6195 if (TARGET_DEBUG_ADDR)
6197 fprintf (stderr, "Error: %s\n", reason);
6198 debug_rtx (reason_rtx);
6203 /* Return an unique alias set for the GOT. */
6205 static HOST_WIDE_INT
6206 ix86_GOT_alias_set (void)
6208 static HOST_WIDE_INT set = -1;
6210 set = new_alias_set ();
6214 /* Return a legitimate reference for ORIG (an address) using the
6215 register REG. If REG is 0, a new pseudo is generated.
6217 There are two types of references that must be handled:
6219 1. Global data references must load the address from the GOT, via
6220 the PIC reg. An insn is emitted to do this load, and the reg is
6223 2. Static data references, constant pool addresses, and code labels
6224 compute the address as an offset from the GOT, whose base is in
6225 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6226 differentiate them from global data objects. The returned
6227 address is the PIC reg + an unspec constant.
6229 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6230 reg also appears in the address. */
6233 legitimize_pic_address (rtx orig, rtx reg)
6241 reg = gen_reg_rtx (Pmode);
6242 /* Use the generic Mach-O PIC machinery. */
6243 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6246 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6248 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6250 /* This symbol may be referenced via a displacement from the PIC
6251 base address (@GOTOFF). */
6253 if (reload_in_progress)
6254 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6255 if (GET_CODE (addr) == CONST)
6256 addr = XEXP (addr, 0);
6257 if (GET_CODE (addr) == PLUS)
6259 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6260 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6263 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6264 new = gen_rtx_CONST (Pmode, new);
6265 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6269 emit_move_insn (reg, new);
6273 else if (GET_CODE (addr) == SYMBOL_REF)
6277 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6278 new = gen_rtx_CONST (Pmode, new);
6279 new = gen_rtx_MEM (Pmode, new);
6280 RTX_UNCHANGING_P (new) = 1;
6281 set_mem_alias_set (new, ix86_GOT_alias_set ());
6284 reg = gen_reg_rtx (Pmode);
6285 /* Use directly gen_movsi, otherwise the address is loaded
6286 into register for CSE. We don't want to CSE this addresses,
6287 instead we CSE addresses from the GOT table, so skip this. */
6288 emit_insn (gen_movsi (reg, new));
6293 /* This symbol must be referenced via a load from the
6294 Global Offset Table (@GOT). */
6296 if (reload_in_progress)
6297 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6298 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6299 new = gen_rtx_CONST (Pmode, new);
6300 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6301 new = gen_rtx_MEM (Pmode, new);
6302 RTX_UNCHANGING_P (new) = 1;
6303 set_mem_alias_set (new, ix86_GOT_alias_set ());
6306 reg = gen_reg_rtx (Pmode);
6307 emit_move_insn (reg, new);
6313 if (GET_CODE (addr) == CONST)
6315 addr = XEXP (addr, 0);
6317 /* We must match stuff we generate before. Assume the only
6318 unspecs that can get here are ours. Not that we could do
6319 anything with them anyway.... */
6320 if (GET_CODE (addr) == UNSPEC
6321 || (GET_CODE (addr) == PLUS
6322 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6324 if (GET_CODE (addr) != PLUS)
6327 if (GET_CODE (addr) == PLUS)
6329 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6331 /* Check first to see if this is a constant offset from a @GOTOFF
6332 symbol reference. */
6333 if (local_symbolic_operand (op0, Pmode)
6334 && GET_CODE (op1) == CONST_INT)
6338 if (reload_in_progress)
6339 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6340 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6342 new = gen_rtx_PLUS (Pmode, new, op1);
6343 new = gen_rtx_CONST (Pmode, new);
6344 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6348 emit_move_insn (reg, new);
6354 if (INTVAL (op1) < -16*1024*1024
6355 || INTVAL (op1) >= 16*1024*1024)
6356 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6361 base = legitimize_pic_address (XEXP (addr, 0), reg);
6362 new = legitimize_pic_address (XEXP (addr, 1),
6363 base == reg ? NULL_RTX : reg);
6365 if (GET_CODE (new) == CONST_INT)
6366 new = plus_constant (base, INTVAL (new));
6369 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6371 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6372 new = XEXP (new, 1);
6374 new = gen_rtx_PLUS (Pmode, base, new);
6382 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6385 get_thread_pointer (int to_reg)
6389 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6393 reg = gen_reg_rtx (Pmode);
6394 insn = gen_rtx_SET (VOIDmode, reg, tp);
6395 insn = emit_insn (insn);
6400 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6401 false if we expect this to be used for a memory address and true if
6402 we expect to load the address into a register. */
6405 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6407 rtx dest, base, off, pic;
6412 case TLS_MODEL_GLOBAL_DYNAMIC:
6413 dest = gen_reg_rtx (Pmode);
6416 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6419 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6420 insns = get_insns ();
6423 emit_libcall_block (insns, dest, rax, x);
6426 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6429 case TLS_MODEL_LOCAL_DYNAMIC:
6430 base = gen_reg_rtx (Pmode);
6433 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6436 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6437 insns = get_insns ();
6440 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6441 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6442 emit_libcall_block (insns, base, rax, note);
6445 emit_insn (gen_tls_local_dynamic_base_32 (base));
6447 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6448 off = gen_rtx_CONST (Pmode, off);
6450 return gen_rtx_PLUS (Pmode, base, off);
6452 case TLS_MODEL_INITIAL_EXEC:
6456 type = UNSPEC_GOTNTPOFF;
6460 if (reload_in_progress)
6461 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6462 pic = pic_offset_table_rtx;
6463 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6465 else if (!TARGET_GNU_TLS)
6467 pic = gen_reg_rtx (Pmode);
6468 emit_insn (gen_set_got (pic));
6469 type = UNSPEC_GOTTPOFF;
6474 type = UNSPEC_INDNTPOFF;
6477 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6478 off = gen_rtx_CONST (Pmode, off);
6480 off = gen_rtx_PLUS (Pmode, pic, off);
6481 off = gen_rtx_MEM (Pmode, off);
6482 RTX_UNCHANGING_P (off) = 1;
6483 set_mem_alias_set (off, ix86_GOT_alias_set ());
6485 if (TARGET_64BIT || TARGET_GNU_TLS)
6487 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6488 off = force_reg (Pmode, off);
6489 return gen_rtx_PLUS (Pmode, base, off);
6493 base = get_thread_pointer (true);
6494 dest = gen_reg_rtx (Pmode);
6495 emit_insn (gen_subsi3 (dest, base, off));
6499 case TLS_MODEL_LOCAL_EXEC:
6500 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6501 (TARGET_64BIT || TARGET_GNU_TLS)
6502 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6503 off = gen_rtx_CONST (Pmode, off);
6505 if (TARGET_64BIT || TARGET_GNU_TLS)
6507 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6508 return gen_rtx_PLUS (Pmode, base, off);
6512 base = get_thread_pointer (true);
6513 dest = gen_reg_rtx (Pmode);
6514 emit_insn (gen_subsi3 (dest, base, off));
6525 /* Try machine-dependent ways of modifying an illegitimate address
6526 to be legitimate. If we find one, return the new, valid address.
6527 This macro is used in only one place: `memory_address' in explow.c.
6529 OLDX is the address as it was before break_out_memory_refs was called.
6530 In some cases it is useful to look at this to decide what needs to be done.
6532 MODE and WIN are passed so that this macro can use
6533 GO_IF_LEGITIMATE_ADDRESS.
6535 It is always safe for this macro to do nothing. It exists to recognize
6536 opportunities to optimize the output.
6538 For the 80386, we handle X+REG by loading X into a register R and
6539 using R+REG. R will go in a general reg and indexing will be used.
6540 However, if REG is a broken-out memory address or multiplication,
6541 nothing needs to be done because REG can certainly go in a general reg.
6543 When -fpic is used, special handling is needed for symbolic references.
6544 See comments by legitimize_pic_address in i386.c for details. */
6547 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6552 if (TARGET_DEBUG_ADDR)
6554 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6555 GET_MODE_NAME (mode));
6559 log = tls_symbolic_operand (x, mode);
6561 return legitimize_tls_address (x, log, false);
6563 if (flag_pic && SYMBOLIC_CONST (x))
6564 return legitimize_pic_address (x, 0);
6566 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6567 if (GET_CODE (x) == ASHIFT
6568 && GET_CODE (XEXP (x, 1)) == CONST_INT
6569 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6572 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6573 GEN_INT (1 << log));
6576 if (GET_CODE (x) == PLUS)
6578 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6580 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6581 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6582 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6585 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6586 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6587 GEN_INT (1 << log));
6590 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6591 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6592 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6595 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6596 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6597 GEN_INT (1 << log));
6600 /* Put multiply first if it isn't already. */
6601 if (GET_CODE (XEXP (x, 1)) == MULT)
6603 rtx tmp = XEXP (x, 0);
6604 XEXP (x, 0) = XEXP (x, 1);
6609 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6610 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6611 created by virtual register instantiation, register elimination, and
6612 similar optimizations. */
6613 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6616 x = gen_rtx_PLUS (Pmode,
6617 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6618 XEXP (XEXP (x, 1), 0)),
6619 XEXP (XEXP (x, 1), 1));
6623 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6624 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6625 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6626 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6627 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6628 && CONSTANT_P (XEXP (x, 1)))
6631 rtx other = NULL_RTX;
6633 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6635 constant = XEXP (x, 1);
6636 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6638 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6640 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6641 other = XEXP (x, 1);
6649 x = gen_rtx_PLUS (Pmode,
6650 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6651 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6652 plus_constant (other, INTVAL (constant)));
6656 if (changed && legitimate_address_p (mode, x, FALSE))
6659 if (GET_CODE (XEXP (x, 0)) == MULT)
6662 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6665 if (GET_CODE (XEXP (x, 1)) == MULT)
6668 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6672 && GET_CODE (XEXP (x, 1)) == REG
6673 && GET_CODE (XEXP (x, 0)) == REG)
6676 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6679 x = legitimize_pic_address (x, 0);
6682 if (changed && legitimate_address_p (mode, x, FALSE))
6685 if (GET_CODE (XEXP (x, 0)) == REG)
6687 rtx temp = gen_reg_rtx (Pmode);
6688 rtx val = force_operand (XEXP (x, 1), temp);
6690 emit_move_insn (temp, val);
6696 else if (GET_CODE (XEXP (x, 1)) == REG)
6698 rtx temp = gen_reg_rtx (Pmode);
6699 rtx val = force_operand (XEXP (x, 0), temp);
6701 emit_move_insn (temp, val);
6711 /* Print an integer constant expression in assembler syntax. Addition
6712 and subtraction are the only arithmetic that may appear in these
6713 expressions. FILE is the stdio stream to write to, X is the rtx, and
6714 CODE is the operand print code from the output string. */
6717 output_pic_addr_const (FILE *file, rtx x, int code)
6721 switch (GET_CODE (x))
6731 assemble_name (file, XSTR (x, 0));
6732 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6733 fputs ("@PLT", file);
6740 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6741 assemble_name (asm_out_file, buf);
6745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6749 /* This used to output parentheses around the expression,
6750 but that does not work on the 386 (either ATT or BSD assembler). */
6751 output_pic_addr_const (file, XEXP (x, 0), code);
6755 if (GET_MODE (x) == VOIDmode)
6757 /* We can use %d if the number is <32 bits and positive. */
6758 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6759 fprintf (file, "0x%lx%08lx",
6760 (unsigned long) CONST_DOUBLE_HIGH (x),
6761 (unsigned long) CONST_DOUBLE_LOW (x));
6763 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6766 /* We can't handle floating point constants;
6767 PRINT_OPERAND must handle them. */
6768 output_operand_lossage ("floating constant misused");
6772 /* Some assemblers need integer constants to appear first. */
6773 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6775 output_pic_addr_const (file, XEXP (x, 0), code);
6777 output_pic_addr_const (file, XEXP (x, 1), code);
6779 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6781 output_pic_addr_const (file, XEXP (x, 1), code);
6783 output_pic_addr_const (file, XEXP (x, 0), code);
6791 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6792 output_pic_addr_const (file, XEXP (x, 0), code);
6794 output_pic_addr_const (file, XEXP (x, 1), code);
6796 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6800 if (XVECLEN (x, 0) != 1)
6802 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6803 switch (XINT (x, 1))
6806 fputs ("@GOT", file);
6809 fputs ("@GOTOFF", file);
6811 case UNSPEC_GOTPCREL:
6812 fputs ("@GOTPCREL(%rip)", file);
6814 case UNSPEC_GOTTPOFF:
6815 /* FIXME: This might be @TPOFF in Sun ld too. */
6816 fputs ("@GOTTPOFF", file);
6819 fputs ("@TPOFF", file);
6823 fputs ("@TPOFF", file);
6825 fputs ("@NTPOFF", file);
6828 fputs ("@DTPOFF", file);
6830 case UNSPEC_GOTNTPOFF:
6832 fputs ("@GOTTPOFF(%rip)", file);
6834 fputs ("@GOTNTPOFF", file);
6836 case UNSPEC_INDNTPOFF:
6837 fputs ("@INDNTPOFF", file);
6840 output_operand_lossage ("invalid UNSPEC as operand");
6846 output_operand_lossage ("invalid expression as operand");
6850 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6851 We need to handle our special PIC relocations. */
6854 i386_dwarf_output_addr_const (FILE *file, rtx x)
6857 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6861 fprintf (file, "%s", ASM_LONG);
6864 output_pic_addr_const (file, x, '\0');
6866 output_addr_const (file, x);
6870 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6871 We need to emit DTP-relative relocations. */
6874 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6876 fputs (ASM_LONG, file);
6877 output_addr_const (file, x);
6878 fputs ("@DTPOFF", file);
6884 fputs (", 0", file);
6891 /* In the name of slightly smaller debug output, and to cater to
6892 general assembler losage, recognize PIC+GOTOFF and turn it back
6893 into a direct symbol reference. */
6896 ix86_delegitimize_address (rtx orig_x)
6900 if (GET_CODE (x) == MEM)
6905 if (GET_CODE (x) != CONST
6906 || GET_CODE (XEXP (x, 0)) != UNSPEC
6907 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6908 || GET_CODE (orig_x) != MEM)
6910 return XVECEXP (XEXP (x, 0), 0, 0);
6913 if (GET_CODE (x) != PLUS
6914 || GET_CODE (XEXP (x, 1)) != CONST)
6917 if (GET_CODE (XEXP (x, 0)) == REG
6918 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6919 /* %ebx + GOT/GOTOFF */
6921 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6923 /* %ebx + %reg * scale + GOT/GOTOFF */
6925 if (GET_CODE (XEXP (y, 0)) == REG
6926 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6928 else if (GET_CODE (XEXP (y, 1)) == REG
6929 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6933 if (GET_CODE (y) != REG
6934 && GET_CODE (y) != MULT
6935 && GET_CODE (y) != ASHIFT)
6941 x = XEXP (XEXP (x, 1), 0);
6942 if (GET_CODE (x) == UNSPEC
6943 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6944 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6947 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6948 return XVECEXP (x, 0, 0);
6951 if (GET_CODE (x) == PLUS
6952 && GET_CODE (XEXP (x, 0)) == UNSPEC
6953 && GET_CODE (XEXP (x, 1)) == CONST_INT
6954 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6955 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6956 && GET_CODE (orig_x) != MEM)))
6958 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6960 return gen_rtx_PLUS (Pmode, y, x);
6968 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6973 if (mode == CCFPmode || mode == CCFPUmode)
6975 enum rtx_code second_code, bypass_code;
6976 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6977 if (bypass_code != NIL || second_code != NIL)
6979 code = ix86_fp_compare_code_to_integer (code);
6983 code = reverse_condition (code);
6994 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6999 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7000 Those same assemblers have the same but opposite losage on cmov. */
7003 suffix = fp ? "nbe" : "a";
7006 if (mode == CCNOmode || mode == CCGOCmode)
7008 else if (mode == CCmode || mode == CCGCmode)
7019 if (mode == CCNOmode || mode == CCGOCmode)
7021 else if (mode == CCmode || mode == CCGCmode)
7030 suffix = fp ? "nb" : "ae";
7033 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7043 suffix = fp ? "u" : "p";
7046 suffix = fp ? "nu" : "np";
7051 fputs (suffix, file);
7054 /* Print the name of register X to FILE based on its machine mode and number.
7055 If CODE is 'w', pretend the mode is HImode.
7056 If CODE is 'b', pretend the mode is QImode.
7057 If CODE is 'k', pretend the mode is SImode.
7058 If CODE is 'q', pretend the mode is DImode.
7059 If CODE is 'h', pretend the reg is the `high' byte register.
7060 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7063 print_reg (rtx x, int code, FILE *file)
7065 if (REGNO (x) == ARG_POINTER_REGNUM
7066 || REGNO (x) == FRAME_POINTER_REGNUM
7067 || REGNO (x) == FLAGS_REG
7068 || REGNO (x) == FPSR_REG)
7071 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7074 if (code == 'w' || MMX_REG_P (x))
7076 else if (code == 'b')
7078 else if (code == 'k')
7080 else if (code == 'q')
7082 else if (code == 'y')
7084 else if (code == 'h')
7087 code = GET_MODE_SIZE (GET_MODE (x));
7089 /* Irritatingly, AMD extended registers use different naming convention
7090 from the normal registers. */
7091 if (REX_INT_REG_P (x))
7098 error ("extended registers have no high halves");
7101 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7104 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7107 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7110 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7113 error ("unsupported operand size for extended register");
7121 if (STACK_TOP_P (x))
7123 fputs ("st(0)", file);
7130 if (! ANY_FP_REG_P (x))
7131 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7136 fputs (hi_reg_name[REGNO (x)], file);
7139 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7141 fputs (qi_reg_name[REGNO (x)], file);
7144 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7146 fputs (qi_high_reg_name[REGNO (x)], file);
7153 /* Locate some local-dynamic symbol still in use by this function
7154 so that we can print its name in some tls_local_dynamic_base
7158 get_some_local_dynamic_name (void)
7162 if (cfun->machine->some_ld_name)
7163 return cfun->machine->some_ld_name;
7165 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7167 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7168 return cfun->machine->some_ld_name;
7174 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7178 if (GET_CODE (x) == SYMBOL_REF
7179 && local_dynamic_symbolic_operand (x, Pmode))
7181 cfun->machine->some_ld_name = XSTR (x, 0);
7189 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7190 C -- print opcode suffix for set/cmov insn.
7191 c -- like C, but print reversed condition
7192 F,f -- likewise, but for floating-point.
7193 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7195 R -- print the prefix for register names.
7196 z -- print the opcode suffix for the size of the current operand.
7197 * -- print a star (in certain assembler syntax)
7198 A -- print an absolute memory reference.
7199 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7200 s -- print a shift double count, followed by the assemblers argument
7202 b -- print the QImode name of the register for the indicated operand.
7203 %b0 would print %al if operands[0] is reg 0.
7204 w -- likewise, print the HImode name of the register.
7205 k -- likewise, print the SImode name of the register.
7206 q -- likewise, print the DImode name of the register.
7207 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7208 y -- print "st(0)" instead of "st" as a register.
7209 D -- print condition for SSE cmp instruction.
7210 P -- if PIC, print an @PLT suffix.
7211 X -- don't print any sort of PIC '@' suffix for a symbol.
7212 & -- print some in-use local-dynamic symbol name.
7216 print_operand (FILE *file, rtx x, int code)
7223 if (ASSEMBLER_DIALECT == ASM_ATT)
7228 assemble_name (file, get_some_local_dynamic_name ());
7232 if (ASSEMBLER_DIALECT == ASM_ATT)
7234 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7236 /* Intel syntax. For absolute addresses, registers should not
7237 be surrounded by braces. */
7238 if (GET_CODE (x) != REG)
7241 PRINT_OPERAND (file, x, 0);
7249 PRINT_OPERAND (file, x, 0);
7254 if (ASSEMBLER_DIALECT == ASM_ATT)
7259 if (ASSEMBLER_DIALECT == ASM_ATT)
7264 if (ASSEMBLER_DIALECT == ASM_ATT)
7269 if (ASSEMBLER_DIALECT == ASM_ATT)
7274 if (ASSEMBLER_DIALECT == ASM_ATT)
7279 if (ASSEMBLER_DIALECT == ASM_ATT)
7284 /* 387 opcodes don't get size suffixes if the operands are
7286 if (STACK_REG_P (x))
7289 /* Likewise if using Intel opcodes. */
7290 if (ASSEMBLER_DIALECT == ASM_INTEL)
7293 /* This is the size of op from size of operand. */
7294 switch (GET_MODE_SIZE (GET_MODE (x)))
7297 #ifdef HAVE_GAS_FILDS_FISTS
7303 if (GET_MODE (x) == SFmode)
7318 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7320 #ifdef GAS_MNEMONICS
7346 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7348 PRINT_OPERAND (file, x, 0);
7354 /* Little bit of braindamage here. The SSE compare instructions
7355 does use completely different names for the comparisons that the
7356 fp conditional moves. */
7357 switch (GET_CODE (x))
7372 fputs ("unord", file);
7376 fputs ("neq", file);
7380 fputs ("nlt", file);
7384 fputs ("nle", file);
7387 fputs ("ord", file);
7395 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7396 if (ASSEMBLER_DIALECT == ASM_ATT)
7398 switch (GET_MODE (x))
7400 case HImode: putc ('w', file); break;
7402 case SFmode: putc ('l', file); break;
7404 case DFmode: putc ('q', file); break;
7412 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7415 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7416 if (ASSEMBLER_DIALECT == ASM_ATT)
7419 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7422 /* Like above, but reverse condition */
7424 /* Check to see if argument to %c is really a constant
7425 and not a condition code which needs to be reversed. */
7426 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7428 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7431 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7434 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7435 if (ASSEMBLER_DIALECT == ASM_ATT)
7438 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7444 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7447 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7450 int pred_val = INTVAL (XEXP (x, 0));
7452 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7453 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7455 int taken = pred_val > REG_BR_PROB_BASE / 2;
7456 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7458 /* Emit hints only in the case default branch prediction
7459 heuristics would fail. */
7460 if (taken != cputaken)
7462 /* We use 3e (DS) prefix for taken branches and
7463 2e (CS) prefix for not taken branches. */
7465 fputs ("ds ; ", file);
7467 fputs ("cs ; ", file);
7474 output_operand_lossage ("invalid operand code `%c'", code);
7478 if (GET_CODE (x) == REG)
7479 print_reg (x, code, file);
7481 else if (GET_CODE (x) == MEM)
7483 /* No `byte ptr' prefix for call instructions. */
7484 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7487 switch (GET_MODE_SIZE (GET_MODE (x)))
7489 case 1: size = "BYTE"; break;
7490 case 2: size = "WORD"; break;
7491 case 4: size = "DWORD"; break;
7492 case 8: size = "QWORD"; break;
7493 case 12: size = "XWORD"; break;
7494 case 16: size = "XMMWORD"; break;
7499 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7502 else if (code == 'w')
7504 else if (code == 'k')
7508 fputs (" PTR ", file);
7512 /* Avoid (%rip) for call operands. */
7513 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7514 && GET_CODE (x) != CONST_INT)
7515 output_addr_const (file, x);
7516 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7517 output_operand_lossage ("invalid constraints for operand");
7522 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7527 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7528 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7530 if (ASSEMBLER_DIALECT == ASM_ATT)
7532 fprintf (file, "0x%08lx", l);
7535 /* These float cases don't actually occur as immediate operands. */
7536 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7540 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7541 fprintf (file, "%s", dstr);
7544 else if (GET_CODE (x) == CONST_DOUBLE
7545 && GET_MODE (x) == XFmode)
7549 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7550 fprintf (file, "%s", dstr);
7557 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7559 if (ASSEMBLER_DIALECT == ASM_ATT)
7562 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7563 || GET_CODE (x) == LABEL_REF)
7565 if (ASSEMBLER_DIALECT == ASM_ATT)
7568 fputs ("OFFSET FLAT:", file);
7571 if (GET_CODE (x) == CONST_INT)
7572 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7574 output_pic_addr_const (file, x, code);
7576 output_addr_const (file, x);
7580 /* Print a memory operand whose address is ADDR. */
7583 print_operand_address (FILE *file, rtx addr)
7585 struct ix86_address parts;
7586 rtx base, index, disp;
7589 if (! ix86_decompose_address (addr, &parts))
7593 index = parts.index;
7595 scale = parts.scale;
7603 if (USER_LABEL_PREFIX[0] == 0)
7605 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7611 if (!base && !index)
7613 /* Displacement only requires special attention. */
7615 if (GET_CODE (disp) == CONST_INT)
7617 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7619 if (USER_LABEL_PREFIX[0] == 0)
7621 fputs ("ds:", file);
7623 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7626 output_pic_addr_const (file, disp, 0);
7628 output_addr_const (file, disp);
7630 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7632 && ((GET_CODE (disp) == SYMBOL_REF
7633 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7634 || GET_CODE (disp) == LABEL_REF
7635 || (GET_CODE (disp) == CONST
7636 && GET_CODE (XEXP (disp, 0)) == PLUS
7637 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7638 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7639 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7640 fputs ("(%rip)", file);
7644 if (ASSEMBLER_DIALECT == ASM_ATT)
7649 output_pic_addr_const (file, disp, 0);
7650 else if (GET_CODE (disp) == LABEL_REF)
7651 output_asm_label (disp);
7653 output_addr_const (file, disp);
7658 print_reg (base, 0, file);
7662 print_reg (index, 0, file);
7664 fprintf (file, ",%d", scale);
7670 rtx offset = NULL_RTX;
7674 /* Pull out the offset of a symbol; print any symbol itself. */
7675 if (GET_CODE (disp) == CONST
7676 && GET_CODE (XEXP (disp, 0)) == PLUS
7677 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7679 offset = XEXP (XEXP (disp, 0), 1);
7680 disp = gen_rtx_CONST (VOIDmode,
7681 XEXP (XEXP (disp, 0), 0));
7685 output_pic_addr_const (file, disp, 0);
7686 else if (GET_CODE (disp) == LABEL_REF)
7687 output_asm_label (disp);
7688 else if (GET_CODE (disp) == CONST_INT)
7691 output_addr_const (file, disp);
7697 print_reg (base, 0, file);
7700 if (INTVAL (offset) >= 0)
7702 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7706 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7713 print_reg (index, 0, file);
7715 fprintf (file, "*%d", scale);
7723 output_addr_const_extra (FILE *file, rtx x)
7727 if (GET_CODE (x) != UNSPEC)
7730 op = XVECEXP (x, 0, 0);
7731 switch (XINT (x, 1))
7733 case UNSPEC_GOTTPOFF:
7734 output_addr_const (file, op);
7735 /* FIXME: This might be @TPOFF in Sun ld. */
7736 fputs ("@GOTTPOFF", file);
7739 output_addr_const (file, op);
7740 fputs ("@TPOFF", file);
7743 output_addr_const (file, op);
7745 fputs ("@TPOFF", file);
7747 fputs ("@NTPOFF", file);
7750 output_addr_const (file, op);
7751 fputs ("@DTPOFF", file);
7753 case UNSPEC_GOTNTPOFF:
7754 output_addr_const (file, op);
7756 fputs ("@GOTTPOFF(%rip)", file);
7758 fputs ("@GOTNTPOFF", file);
7760 case UNSPEC_INDNTPOFF:
7761 output_addr_const (file, op);
7762 fputs ("@INDNTPOFF", file);
7772 /* Split one or more DImode RTL references into pairs of SImode
7773 references. The RTL can be REG, offsettable MEM, integer constant, or
7774 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7775 split and "num" is its length. lo_half and hi_half are output arrays
7776 that parallel "operands". */
7779 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7783 rtx op = operands[num];
7785 /* simplify_subreg refuse to split volatile memory addresses,
7786 but we still have to handle it. */
7787 if (GET_CODE (op) == MEM)
7789 lo_half[num] = adjust_address (op, SImode, 0);
7790 hi_half[num] = adjust_address (op, SImode, 4);
7794 lo_half[num] = simplify_gen_subreg (SImode, op,
7795 GET_MODE (op) == VOIDmode
7796 ? DImode : GET_MODE (op), 0);
7797 hi_half[num] = simplify_gen_subreg (SImode, op,
7798 GET_MODE (op) == VOIDmode
7799 ? DImode : GET_MODE (op), 4);
7803 /* Split one or more TImode RTL references into pairs of SImode
7804 references. The RTL can be REG, offsettable MEM, integer constant, or
7805 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7806 split and "num" is its length. lo_half and hi_half are output arrays
7807 that parallel "operands". */
7810 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7814 rtx op = operands[num];
7816 /* simplify_subreg refuse to split volatile memory addresses, but we
7817 still have to handle it. */
7818 if (GET_CODE (op) == MEM)
7820 lo_half[num] = adjust_address (op, DImode, 0);
7821 hi_half[num] = adjust_address (op, DImode, 8);
7825 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7826 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7831 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7832 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7833 is the expression of the binary operation. The output may either be
7834 emitted here, or returned to the caller, like all output_* functions.
7836 There is no guarantee that the operands are the same mode, as they
7837 might be within FLOAT or FLOAT_EXTEND expressions. */
7839 #ifndef SYSV386_COMPAT
7840 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7841 wants to fix the assemblers because that causes incompatibility
7842 with gcc. No-one wants to fix gcc because that causes
7843 incompatibility with assemblers... You can use the option of
7844 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7845 #define SYSV386_COMPAT 1
7849 output_387_binary_op (rtx insn, rtx *operands)
7851 static char buf[30];
7854 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7856 #ifdef ENABLE_CHECKING
7857 /* Even if we do not want to check the inputs, this documents input
7858 constraints. Which helps in understanding the following code. */
7859 if (STACK_REG_P (operands[0])
7860 && ((REG_P (operands[1])
7861 && REGNO (operands[0]) == REGNO (operands[1])
7862 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7863 || (REG_P (operands[2])
7864 && REGNO (operands[0]) == REGNO (operands[2])
7865 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7866 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7872 switch (GET_CODE (operands[3]))
7875 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7876 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7884 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7885 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7893 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7894 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7902 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7903 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7917 if (GET_MODE (operands[0]) == SFmode)
7918 strcat (buf, "ss\t{%2, %0|%0, %2}");
7920 strcat (buf, "sd\t{%2, %0|%0, %2}");
7925 switch (GET_CODE (operands[3]))
7929 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7931 rtx temp = operands[2];
7932 operands[2] = operands[1];
7936 /* know operands[0] == operands[1]. */
7938 if (GET_CODE (operands[2]) == MEM)
7944 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7946 if (STACK_TOP_P (operands[0]))
7947 /* How is it that we are storing to a dead operand[2]?
7948 Well, presumably operands[1] is dead too. We can't
7949 store the result to st(0) as st(0) gets popped on this
7950 instruction. Instead store to operands[2] (which I
7951 think has to be st(1)). st(1) will be popped later.
7952 gcc <= 2.8.1 didn't have this check and generated
7953 assembly code that the Unixware assembler rejected. */
7954 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7956 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7960 if (STACK_TOP_P (operands[0]))
7961 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7963 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7968 if (GET_CODE (operands[1]) == MEM)
7974 if (GET_CODE (operands[2]) == MEM)
7980 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7983 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7984 derived assemblers, confusingly reverse the direction of
7985 the operation for fsub{r} and fdiv{r} when the
7986 destination register is not st(0). The Intel assembler
7987 doesn't have this brain damage. Read !SYSV386_COMPAT to
7988 figure out what the hardware really does. */
7989 if (STACK_TOP_P (operands[0]))
7990 p = "{p\t%0, %2|rp\t%2, %0}";
7992 p = "{rp\t%2, %0|p\t%0, %2}";
7994 if (STACK_TOP_P (operands[0]))
7995 /* As above for fmul/fadd, we can't store to st(0). */
7996 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7998 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8003 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8006 if (STACK_TOP_P (operands[0]))
8007 p = "{rp\t%0, %1|p\t%1, %0}";
8009 p = "{p\t%1, %0|rp\t%0, %1}";
8011 if (STACK_TOP_P (operands[0]))
8012 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8014 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8019 if (STACK_TOP_P (operands[0]))
8021 if (STACK_TOP_P (operands[1]))
8022 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8024 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8027 else if (STACK_TOP_P (operands[1]))
8030 p = "{\t%1, %0|r\t%0, %1}";
8032 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8038 p = "{r\t%2, %0|\t%0, %2}";
8040 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8053 /* Output code to initialize control word copies used by
8054 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8055 is set to control word rounding downwards. */
8057 emit_i387_cw_initialization (rtx normal, rtx round_down)
8059 rtx reg = gen_reg_rtx (HImode);
8061 emit_insn (gen_x86_fnstcw_1 (normal));
8062 emit_move_insn (reg, normal);
8063 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8065 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8067 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8068 emit_move_insn (round_down, reg);
8071 /* Output code for INSN to convert a float to a signed int. OPERANDS
8072 are the insn operands. The output may be [HSD]Imode and the input
8073 operand may be [SDX]Fmode. */
8076 output_fix_trunc (rtx insn, rtx *operands)
8078 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8079 int dimode_p = GET_MODE (operands[0]) == DImode;
8081 /* Jump through a hoop or two for DImode, since the hardware has no
8082 non-popping instruction. We used to do this a different way, but
8083 that was somewhat fragile and broke with post-reload splitters. */
8084 if (dimode_p && !stack_top_dies)
8085 output_asm_insn ("fld\t%y1", operands);
8087 if (!STACK_TOP_P (operands[1]))
8090 if (GET_CODE (operands[0]) != MEM)
8093 output_asm_insn ("fldcw\t%3", operands);
8094 if (stack_top_dies || dimode_p)
8095 output_asm_insn ("fistp%z0\t%0", operands);
8097 output_asm_insn ("fist%z0\t%0", operands);
8098 output_asm_insn ("fldcw\t%2", operands);
8103 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8104 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8105 when fucom should be used. */
8108 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8111 rtx cmp_op0 = operands[0];
8112 rtx cmp_op1 = operands[1];
8113 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8118 cmp_op1 = operands[2];
8122 if (GET_MODE (operands[0]) == SFmode)
8124 return "ucomiss\t{%1, %0|%0, %1}";
8126 return "comiss\t{%1, %0|%0, %1}";
8129 return "ucomisd\t{%1, %0|%0, %1}";
8131 return "comisd\t{%1, %0|%0, %1}";
8134 if (! STACK_TOP_P (cmp_op0))
8137 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8139 if (STACK_REG_P (cmp_op1)
8141 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8142 && REGNO (cmp_op1) != FIRST_STACK_REG)
8144 /* If both the top of the 387 stack dies, and the other operand
8145 is also a stack register that dies, then this must be a
8146 `fcompp' float compare */
8150 /* There is no double popping fcomi variant. Fortunately,
8151 eflags is immune from the fstp's cc clobbering. */
8153 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8155 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8163 return "fucompp\n\tfnstsw\t%0";
8165 return "fcompp\n\tfnstsw\t%0";
8178 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8180 static const char * const alt[24] =
8192 "fcomi\t{%y1, %0|%0, %y1}",
8193 "fcomip\t{%y1, %0|%0, %y1}",
8194 "fucomi\t{%y1, %0|%0, %y1}",
8195 "fucomip\t{%y1, %0|%0, %y1}",
8202 "fcom%z2\t%y2\n\tfnstsw\t%0",
8203 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8204 "fucom%z2\t%y2\n\tfnstsw\t%0",
8205 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8207 "ficom%z2\t%y2\n\tfnstsw\t%0",
8208 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8216 mask = eflags_p << 3;
8217 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8218 mask |= unordered_p << 1;
8219 mask |= stack_top_dies;
8232 ix86_output_addr_vec_elt (FILE *file, int value)
8234 const char *directive = ASM_LONG;
8239 directive = ASM_QUAD;
8245 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8249 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8252 fprintf (file, "%s%s%d-%s%d\n",
8253 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8254 else if (HAVE_AS_GOTOFF_IN_DATA)
8255 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8257 else if (TARGET_MACHO)
8259 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8260 machopic_output_function_base_name (file);
8261 fprintf(file, "\n");
8265 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8266 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8269 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8273 ix86_expand_clear (rtx dest)
8277 /* We play register width games, which are only valid after reload. */
8278 if (!reload_completed)
8281 /* Avoid HImode and its attendant prefix byte. */
8282 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8283 dest = gen_rtx_REG (SImode, REGNO (dest));
8285 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8287 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8288 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8290 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8291 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8297 /* X is an unchanging MEM. If it is a constant pool reference, return
8298 the constant pool rtx, else NULL. */
8301 maybe_get_pool_constant (rtx x)
8303 x = ix86_delegitimize_address (XEXP (x, 0));
8305 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8306 return get_pool_constant (x);
8312 ix86_expand_move (enum machine_mode mode, rtx operands[])
8314 int strict = (reload_in_progress || reload_completed);
8316 enum tls_model model;
8321 model = tls_symbolic_operand (op1, Pmode);
8324 op1 = legitimize_tls_address (op1, model, true);
8325 op1 = force_operand (op1, op0);
8330 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8335 rtx temp = ((reload_in_progress
8336 || ((op0 && GET_CODE (op0) == REG)
8338 ? op0 : gen_reg_rtx (Pmode));
8339 op1 = machopic_indirect_data_reference (op1, temp);
8340 op1 = machopic_legitimize_pic_address (op1, mode,
8341 temp == op1 ? 0 : temp);
8343 else if (MACHOPIC_INDIRECT)
8344 op1 = machopic_indirect_data_reference (op1, 0);
8348 if (GET_CODE (op0) == MEM)
8349 op1 = force_reg (Pmode, op1);
8353 if (GET_CODE (temp) != REG)
8354 temp = gen_reg_rtx (Pmode);
8355 temp = legitimize_pic_address (op1, temp);
8360 #endif /* TARGET_MACHO */
8364 if (GET_CODE (op0) == MEM
8365 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8366 || !push_operand (op0, mode))
8367 && GET_CODE (op1) == MEM)
8368 op1 = force_reg (mode, op1);
8370 if (push_operand (op0, mode)
8371 && ! general_no_elim_operand (op1, mode))
8372 op1 = copy_to_mode_reg (mode, op1);
8374 /* Force large constants in 64bit compilation into register
8375 to get them CSEed. */
8376 if (TARGET_64BIT && mode == DImode
8377 && immediate_operand (op1, mode)
8378 && !x86_64_zero_extended_value (op1)
8379 && !register_operand (op0, mode)
8380 && optimize && !reload_completed && !reload_in_progress)
8381 op1 = copy_to_mode_reg (mode, op1);
8383 if (FLOAT_MODE_P (mode))
8385 /* If we are loading a floating point constant to a register,
8386 force the value to memory now, since we'll get better code
8387 out the back end. */
8391 else if (GET_CODE (op1) == CONST_DOUBLE)
8393 op1 = validize_mem (force_const_mem (mode, op1));
8394 if (!register_operand (op0, mode))
8396 rtx temp = gen_reg_rtx (mode);
8397 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8398 emit_move_insn (op0, temp);
8405 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8409 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8411 /* Force constants other than zero into memory. We do not know how
8412 the instructions used to build constants modify the upper 64 bits
8413 of the register, once we have that information we may be able
8414 to handle some of them more efficiently. */
8415 if ((reload_in_progress | reload_completed) == 0
8416 && register_operand (operands[0], mode)
8417 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8418 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8420 /* Make operand1 a register if it isn't already. */
8422 && !register_operand (operands[0], mode)
8423 && !register_operand (operands[1], mode))
8425 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8426 emit_move_insn (operands[0], temp);
8430 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8433 /* Attempt to expand a binary operator. Make the expansion closer to the
8434 actual machine, then just general_operand, which will allow 3 separate
8435 memory references (one output, two input) in a single insn. */
8438 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8441 int matching_memory;
8442 rtx src1, src2, dst, op, clob;
8448 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8449 if (GET_RTX_CLASS (code) == 'c'
8450 && (rtx_equal_p (dst, src2)
8451 || immediate_operand (src1, mode)))
8458 /* If the destination is memory, and we do not have matching source
8459 operands, do things in registers. */
8460 matching_memory = 0;
8461 if (GET_CODE (dst) == MEM)
8463 if (rtx_equal_p (dst, src1))
8464 matching_memory = 1;
8465 else if (GET_RTX_CLASS (code) == 'c'
8466 && rtx_equal_p (dst, src2))
8467 matching_memory = 2;
8469 dst = gen_reg_rtx (mode);
8472 /* Both source operands cannot be in memory. */
8473 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8475 if (matching_memory != 2)
8476 src2 = force_reg (mode, src2);
8478 src1 = force_reg (mode, src1);
8481 /* If the operation is not commutable, source 1 cannot be a constant
8482 or non-matching memory. */
8483 if ((CONSTANT_P (src1)
8484 || (!matching_memory && GET_CODE (src1) == MEM))
8485 && GET_RTX_CLASS (code) != 'c')
8486 src1 = force_reg (mode, src1);
8488 /* If optimizing, copy to regs to improve CSE */
8489 if (optimize && ! no_new_pseudos)
8491 if (GET_CODE (dst) == MEM)
8492 dst = gen_reg_rtx (mode);
8493 if (GET_CODE (src1) == MEM)
8494 src1 = force_reg (mode, src1);
8495 if (GET_CODE (src2) == MEM)
8496 src2 = force_reg (mode, src2);
8499 /* Emit the instruction. */
8501 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8502 if (reload_in_progress)
8504 /* Reload doesn't know about the flags register, and doesn't know that
8505 it doesn't want to clobber it. We can only do this with PLUS. */
8512 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8513 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8516 /* Fix up the destination if needed. */
8517 if (dst != operands[0])
8518 emit_move_insn (operands[0], dst);
8521 /* Return TRUE or FALSE depending on whether the binary operator meets the
8522 appropriate constraints. */
8525 ix86_binary_operator_ok (enum rtx_code code,
8526 enum machine_mode mode ATTRIBUTE_UNUSED,
8529 /* Both source operands cannot be in memory. */
8530 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8532 /* If the operation is not commutable, source 1 cannot be a constant. */
8533 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8535 /* If the destination is memory, we must have a matching source operand. */
8536 if (GET_CODE (operands[0]) == MEM
8537 && ! (rtx_equal_p (operands[0], operands[1])
8538 || (GET_RTX_CLASS (code) == 'c'
8539 && rtx_equal_p (operands[0], operands[2]))))
8541 /* If the operation is not commutable and the source 1 is memory, we must
8542 have a matching destination. */
8543 if (GET_CODE (operands[1]) == MEM
8544 && GET_RTX_CLASS (code) != 'c'
8545 && ! rtx_equal_p (operands[0], operands[1]))
8550 /* Attempt to expand a unary operator. Make the expansion closer to the
8551 actual machine, then just general_operand, which will allow 2 separate
8552 memory references (one output, one input) in a single insn. */
8555 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8558 int matching_memory;
8559 rtx src, dst, op, clob;
8564 /* If the destination is memory, and we do not have matching source
8565 operands, do things in registers. */
8566 matching_memory = 0;
8567 if (GET_CODE (dst) == MEM)
8569 if (rtx_equal_p (dst, src))
8570 matching_memory = 1;
8572 dst = gen_reg_rtx (mode);
8575 /* When source operand is memory, destination must match. */
8576 if (!matching_memory && GET_CODE (src) == MEM)
8577 src = force_reg (mode, src);
8579 /* If optimizing, copy to regs to improve CSE */
8580 if (optimize && ! no_new_pseudos)
8582 if (GET_CODE (dst) == MEM)
8583 dst = gen_reg_rtx (mode);
8584 if (GET_CODE (src) == MEM)
8585 src = force_reg (mode, src);
8588 /* Emit the instruction. */
8590 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8591 if (reload_in_progress || code == NOT)
8593 /* Reload doesn't know about the flags register, and doesn't know that
8594 it doesn't want to clobber it. */
8601 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8602 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8605 /* Fix up the destination if needed. */
8606 if (dst != operands[0])
8607 emit_move_insn (operands[0], dst);
8610 /* Return TRUE or FALSE depending on whether the unary operator meets the
8611 appropriate constraints. */
8614 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8615 enum machine_mode mode ATTRIBUTE_UNUSED,
8616 rtx operands[2] ATTRIBUTE_UNUSED)
8618 /* If one of operands is memory, source and destination must match. */
8619 if ((GET_CODE (operands[0]) == MEM
8620 || GET_CODE (operands[1]) == MEM)
8621 && ! rtx_equal_p (operands[0], operands[1]))
8626 /* Return TRUE or FALSE depending on whether the first SET in INSN
8627 has source and destination with matching CC modes, and that the
8628 CC mode is at least as constrained as REQ_MODE. */
8631 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8634 enum machine_mode set_mode;
8636 set = PATTERN (insn);
8637 if (GET_CODE (set) == PARALLEL)
8638 set = XVECEXP (set, 0, 0);
8639 if (GET_CODE (set) != SET)
8641 if (GET_CODE (SET_SRC (set)) != COMPARE)
8644 set_mode = GET_MODE (SET_DEST (set));
8648 if (req_mode != CCNOmode
8649 && (req_mode != CCmode
8650 || XEXP (SET_SRC (set), 1) != const0_rtx))
8654 if (req_mode == CCGCmode)
8658 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8662 if (req_mode == CCZmode)
8672 return (GET_MODE (SET_SRC (set)) == set_mode);
8675 /* Generate insn patterns to do an integer compare of OPERANDS. */
8678 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8680 enum machine_mode cmpmode;
8683 cmpmode = SELECT_CC_MODE (code, op0, op1);
8684 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8686 /* This is very simple, but making the interface the same as in the
8687 FP case makes the rest of the code easier. */
8688 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8689 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8691 /* Return the test that should be put into the flags user, i.e.
8692 the bcc, scc, or cmov instruction. */
8693 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8696 /* Figure out whether to use ordered or unordered fp comparisons.
8697 Return the appropriate mode to use. */
8700 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8702 /* ??? In order to make all comparisons reversible, we do all comparisons
8703 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8704 all forms trapping and nontrapping comparisons, we can make inequality
8705 comparisons trapping again, since it results in better code when using
8706 FCOM based compares. */
8707 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8711 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8713 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8714 return ix86_fp_compare_mode (code);
8717 /* Only zero flag is needed. */
8719 case NE: /* ZF!=0 */
8721 /* Codes needing carry flag. */
8722 case GEU: /* CF=0 */
8723 case GTU: /* CF=0 & ZF=0 */
8724 case LTU: /* CF=1 */
8725 case LEU: /* CF=1 | ZF=1 */
8727 /* Codes possibly doable only with sign flag when
8728 comparing against zero. */
8729 case GE: /* SF=OF or SF=0 */
8730 case LT: /* SF<>OF or SF=1 */
8731 if (op1 == const0_rtx)
8734 /* For other cases Carry flag is not required. */
8736 /* Codes doable only with sign flag when comparing
8737 against zero, but we miss jump instruction for it
8738 so we need to use relational tests against overflow
8739 that thus needs to be zero. */
8740 case GT: /* ZF=0 & SF=OF */
8741 case LE: /* ZF=1 | SF<>OF */
8742 if (op1 == const0_rtx)
8746 /* strcmp pattern do (use flags) and combine may ask us for proper
8755 /* Return the fixed registers used for condition codes. */
8758 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8765 /* If two condition code modes are compatible, return a condition code
8766 mode which is compatible with both. Otherwise, return
8769 static enum machine_mode
8770 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8775 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8778 if ((m1 == CCGCmode && m2 == CCGOCmode)
8779 || (m1 == CCGOCmode && m2 == CCGCmode))
8807 /* These are only compatible with themselves, which we already
8813 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8816 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8818 enum rtx_code swapped_code = swap_condition (code);
8819 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8820 || (ix86_fp_comparison_cost (swapped_code)
8821 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8824 /* Swap, force into registers, or otherwise massage the two operands
8825 to a fp comparison. The operands are updated in place; the new
8826 comparison code is returned. */
8828 static enum rtx_code
8829 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8831 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8832 rtx op0 = *pop0, op1 = *pop1;
8833 enum machine_mode op_mode = GET_MODE (op0);
8834 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8836 /* All of the unordered compare instructions only work on registers.
8837 The same is true of the XFmode compare instructions. The same is
8838 true of the fcomi compare instructions. */
8841 && (fpcmp_mode == CCFPUmode
8842 || op_mode == XFmode
8843 || ix86_use_fcomi_compare (code)))
8845 op0 = force_reg (op_mode, op0);
8846 op1 = force_reg (op_mode, op1);
8850 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8851 things around if they appear profitable, otherwise force op0
8854 if (standard_80387_constant_p (op0) == 0
8855 || (GET_CODE (op0) == MEM
8856 && ! (standard_80387_constant_p (op1) == 0
8857 || GET_CODE (op1) == MEM)))
8860 tmp = op0, op0 = op1, op1 = tmp;
8861 code = swap_condition (code);
8864 if (GET_CODE (op0) != REG)
8865 op0 = force_reg (op_mode, op0);
8867 if (CONSTANT_P (op1))
8869 if (standard_80387_constant_p (op1))
8870 op1 = force_reg (op_mode, op1);
8872 op1 = validize_mem (force_const_mem (op_mode, op1));
8876 /* Try to rearrange the comparison to make it cheaper. */
8877 if (ix86_fp_comparison_cost (code)
8878 > ix86_fp_comparison_cost (swap_condition (code))
8879 && (GET_CODE (op1) == REG || !no_new_pseudos))
8882 tmp = op0, op0 = op1, op1 = tmp;
8883 code = swap_condition (code);
8884 if (GET_CODE (op0) != REG)
8885 op0 = force_reg (op_mode, op0);
8893 /* Convert comparison codes we use to represent FP comparison to integer
8894 code that will result in proper branch. Return UNKNOWN if no such code
8896 static enum rtx_code
8897 ix86_fp_compare_code_to_integer (enum rtx_code code)
8926 /* Split comparison code CODE into comparisons we can do using branch
8927 instructions. BYPASS_CODE is comparison code for branch that will
8928 branch around FIRST_CODE and SECOND_CODE. If some of branches
8929 is not required, set value to NIL.
8930 We never require more than two branches. */
8932 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8933 enum rtx_code *first_code,
8934 enum rtx_code *second_code)
8940 /* The fcomi comparison sets flags as follows:
8950 case GT: /* GTU - CF=0 & ZF=0 */
8951 case GE: /* GEU - CF=0 */
8952 case ORDERED: /* PF=0 */
8953 case UNORDERED: /* PF=1 */
8954 case UNEQ: /* EQ - ZF=1 */
8955 case UNLT: /* LTU - CF=1 */
8956 case UNLE: /* LEU - CF=1 | ZF=1 */
8957 case LTGT: /* EQ - ZF=0 */
8959 case LT: /* LTU - CF=1 - fails on unordered */
8961 *bypass_code = UNORDERED;
8963 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8965 *bypass_code = UNORDERED;
8967 case EQ: /* EQ - ZF=1 - fails on unordered */
8969 *bypass_code = UNORDERED;
8971 case NE: /* NE - ZF=0 - fails on unordered */
8973 *second_code = UNORDERED;
8975 case UNGE: /* GEU - CF=0 - fails on unordered */
8977 *second_code = UNORDERED;
8979 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8981 *second_code = UNORDERED;
8986 if (!TARGET_IEEE_FP)
8993 /* Return cost of comparison done fcom + arithmetics operations on AX.
8994 All following functions do use number of instructions as a cost metrics.
8995 In future this should be tweaked to compute bytes for optimize_size and
8996 take into account performance of various instructions on various CPUs. */
8998 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9000 if (!TARGET_IEEE_FP)
9002 /* The cost of code output by ix86_expand_fp_compare. */
9030 /* Return cost of comparison done using fcomi operation.
9031 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9033 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9035 enum rtx_code bypass_code, first_code, second_code;
9036 /* Return arbitrarily high cost when instruction is not supported - this
9037 prevents gcc from using it. */
9040 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9041 return (bypass_code != NIL || second_code != NIL) + 2;
9044 /* Return cost of comparison done using sahf operation.
9045 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9047 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9049 enum rtx_code bypass_code, first_code, second_code;
9050 /* Return arbitrarily high cost when instruction is not preferred - this
9051 avoids gcc from using it. */
9052 if (!TARGET_USE_SAHF && !optimize_size)
9054 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9055 return (bypass_code != NIL || second_code != NIL) + 3;
9058 /* Compute cost of the comparison done using any method.
9059 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9061 ix86_fp_comparison_cost (enum rtx_code code)
9063 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9066 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9067 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9069 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9070 if (min > sahf_cost)
9072 if (min > fcomi_cost)
9077 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9080 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9081 rtx *second_test, rtx *bypass_test)
9083 enum machine_mode fpcmp_mode, intcmp_mode;
9085 int cost = ix86_fp_comparison_cost (code);
9086 enum rtx_code bypass_code, first_code, second_code;
9088 fpcmp_mode = ix86_fp_compare_mode (code);
9089 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9092 *second_test = NULL_RTX;
9094 *bypass_test = NULL_RTX;
9096 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9098 /* Do fcomi/sahf based test when profitable. */
9099 if ((bypass_code == NIL || bypass_test)
9100 && (second_code == NIL || second_test)
9101 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9105 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9106 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9112 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9113 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9115 scratch = gen_reg_rtx (HImode);
9116 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9117 emit_insn (gen_x86_sahf_1 (scratch));
9120 /* The FP codes work out to act like unsigned. */
9121 intcmp_mode = fpcmp_mode;
9123 if (bypass_code != NIL)
9124 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9125 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9127 if (second_code != NIL)
9128 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9129 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9134 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9135 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9136 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9138 scratch = gen_reg_rtx (HImode);
9139 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9141 /* In the unordered case, we have to check C2 for NaN's, which
9142 doesn't happen to work out to anything nice combination-wise.
9143 So do some bit twiddling on the value we've got in AH to come
9144 up with an appropriate set of condition codes. */
9146 intcmp_mode = CCNOmode;
9151 if (code == GT || !TARGET_IEEE_FP)
9153 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9158 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9159 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9160 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9161 intcmp_mode = CCmode;
9167 if (code == LT && TARGET_IEEE_FP)
9169 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9170 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9171 intcmp_mode = CCmode;
9176 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9182 if (code == GE || !TARGET_IEEE_FP)
9184 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9189 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9190 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9197 if (code == LE && TARGET_IEEE_FP)
9199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9200 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9201 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9202 intcmp_mode = CCmode;
9207 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9213 if (code == EQ && TARGET_IEEE_FP)
9215 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9216 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9217 intcmp_mode = CCmode;
9222 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9229 if (code == NE && TARGET_IEEE_FP)
9231 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9232 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9244 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9257 /* Return the test that should be put into the flags user, i.e.
9258 the bcc, scc, or cmov instruction. */
9259 return gen_rtx_fmt_ee (code, VOIDmode,
9260 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9265 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9268 op0 = ix86_compare_op0;
9269 op1 = ix86_compare_op1;
9272 *second_test = NULL_RTX;
9274 *bypass_test = NULL_RTX;
9276 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9277 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9278 second_test, bypass_test);
9280 ret = ix86_expand_int_compare (code, op0, op1);
9285 /* Return true if the CODE will result in nontrivial jump sequence. */
9287 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9289 enum rtx_code bypass_code, first_code, second_code;
9292 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9293 return bypass_code != NIL || second_code != NIL;
9297 ix86_expand_branch (enum rtx_code code, rtx label)
9301 switch (GET_MODE (ix86_compare_op0))
9307 tmp = ix86_expand_compare (code, NULL, NULL);
9308 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9309 gen_rtx_LABEL_REF (VOIDmode, label),
9311 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9320 enum rtx_code bypass_code, first_code, second_code;
9322 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9325 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9327 /* Check whether we will use the natural sequence with one jump. If
9328 so, we can expand jump early. Otherwise delay expansion by
9329 creating compound insn to not confuse optimizers. */
9330 if (bypass_code == NIL && second_code == NIL
9333 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9334 gen_rtx_LABEL_REF (VOIDmode, label),
9339 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9340 ix86_compare_op0, ix86_compare_op1);
9341 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9342 gen_rtx_LABEL_REF (VOIDmode, label),
9344 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9346 use_fcomi = ix86_use_fcomi_compare (code);
9347 vec = rtvec_alloc (3 + !use_fcomi);
9348 RTVEC_ELT (vec, 0) = tmp;
9350 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9352 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9355 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9357 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9365 /* Expand DImode branch into multiple compare+branch. */
9367 rtx lo[2], hi[2], label2;
9368 enum rtx_code code1, code2, code3;
9370 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9372 tmp = ix86_compare_op0;
9373 ix86_compare_op0 = ix86_compare_op1;
9374 ix86_compare_op1 = tmp;
9375 code = swap_condition (code);
9377 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9378 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9380 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9381 avoid two branches. This costs one extra insn, so disable when
9382 optimizing for size. */
9384 if ((code == EQ || code == NE)
9386 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9391 if (hi[1] != const0_rtx)
9392 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9393 NULL_RTX, 0, OPTAB_WIDEN);
9396 if (lo[1] != const0_rtx)
9397 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9398 NULL_RTX, 0, OPTAB_WIDEN);
9400 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9401 NULL_RTX, 0, OPTAB_WIDEN);
9403 ix86_compare_op0 = tmp;
9404 ix86_compare_op1 = const0_rtx;
9405 ix86_expand_branch (code, label);
9409 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9410 op1 is a constant and the low word is zero, then we can just
9411 examine the high word. */
9413 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9416 case LT: case LTU: case GE: case GEU:
9417 ix86_compare_op0 = hi[0];
9418 ix86_compare_op1 = hi[1];
9419 ix86_expand_branch (code, label);
9425 /* Otherwise, we need two or three jumps. */
9427 label2 = gen_label_rtx ();
9430 code2 = swap_condition (code);
9431 code3 = unsigned_condition (code);
9435 case LT: case GT: case LTU: case GTU:
9438 case LE: code1 = LT; code2 = GT; break;
9439 case GE: code1 = GT; code2 = LT; break;
9440 case LEU: code1 = LTU; code2 = GTU; break;
9441 case GEU: code1 = GTU; code2 = LTU; break;
9443 case EQ: code1 = NIL; code2 = NE; break;
9444 case NE: code2 = NIL; break;
9452 * if (hi(a) < hi(b)) goto true;
9453 * if (hi(a) > hi(b)) goto false;
9454 * if (lo(a) < lo(b)) goto true;
9458 ix86_compare_op0 = hi[0];
9459 ix86_compare_op1 = hi[1];
9462 ix86_expand_branch (code1, label);
9464 ix86_expand_branch (code2, label2);
9466 ix86_compare_op0 = lo[0];
9467 ix86_compare_op1 = lo[1];
9468 ix86_expand_branch (code3, label);
9471 emit_label (label2);
9480 /* Split branch based on floating point condition. */
9482 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9483 rtx target1, rtx target2, rtx tmp)
9486 rtx label = NULL_RTX;
9488 int bypass_probability = -1, second_probability = -1, probability = -1;
9491 if (target2 != pc_rtx)
9494 code = reverse_condition_maybe_unordered (code);
9499 condition = ix86_expand_fp_compare (code, op1, op2,
9500 tmp, &second, &bypass);
9502 if (split_branch_probability >= 0)
9504 /* Distribute the probabilities across the jumps.
9505 Assume the BYPASS and SECOND to be always test
9507 probability = split_branch_probability;
9509 /* Value of 1 is low enough to make no need for probability
9510 to be updated. Later we may run some experiments and see
9511 if unordered values are more frequent in practice. */
9513 bypass_probability = 1;
9515 second_probability = 1;
9517 if (bypass != NULL_RTX)
9519 label = gen_label_rtx ();
9520 i = emit_jump_insn (gen_rtx_SET
9522 gen_rtx_IF_THEN_ELSE (VOIDmode,
9524 gen_rtx_LABEL_REF (VOIDmode,
9527 if (bypass_probability >= 0)
9529 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9530 GEN_INT (bypass_probability),
9533 i = emit_jump_insn (gen_rtx_SET
9535 gen_rtx_IF_THEN_ELSE (VOIDmode,
9536 condition, target1, target2)));
9537 if (probability >= 0)
9539 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9540 GEN_INT (probability),
9542 if (second != NULL_RTX)
9544 i = emit_jump_insn (gen_rtx_SET
9546 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9548 if (second_probability >= 0)
9550 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9551 GEN_INT (second_probability),
9554 if (label != NULL_RTX)
9559 ix86_expand_setcc (enum rtx_code code, rtx dest)
9561 rtx ret, tmp, tmpreg, equiv;
9562 rtx second_test, bypass_test;
9564 if (GET_MODE (ix86_compare_op0) == DImode
9566 return 0; /* FAIL */
9568 if (GET_MODE (dest) != QImode)
9571 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9572 PUT_MODE (ret, QImode);
9577 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9578 if (bypass_test || second_test)
9580 rtx test = second_test;
9582 rtx tmp2 = gen_reg_rtx (QImode);
9589 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9591 PUT_MODE (test, QImode);
9592 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9595 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9597 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9600 /* Attach a REG_EQUAL note describing the comparison result. */
9601 equiv = simplify_gen_relational (code, QImode,
9602 GET_MODE (ix86_compare_op0),
9603 ix86_compare_op0, ix86_compare_op1);
9604 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9606 return 1; /* DONE */
9609 /* Expand comparison setting or clearing carry flag. Return true when
9610 successful and set pop for the operation. */
9612 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9614 enum machine_mode mode =
9615 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9617 /* Do not handle DImode compares that go trought special path. Also we can't
9618 deal with FP compares yet. This is possible to add. */
9619 if ((mode == DImode && !TARGET_64BIT))
9621 if (FLOAT_MODE_P (mode))
9623 rtx second_test = NULL, bypass_test = NULL;
9624 rtx compare_op, compare_seq;
9626 /* Shortcut: following common codes never translate into carry flag compares. */
9627 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9628 || code == ORDERED || code == UNORDERED)
9631 /* These comparisons require zero flag; swap operands so they won't. */
9632 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9638 code = swap_condition (code);
9641 /* Try to expand the comparison and verify that we end up with carry flag
9642 based comparison. This is fails to be true only when we decide to expand
9643 comparison using arithmetic that is not too common scenario. */
9645 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9646 &second_test, &bypass_test);
9647 compare_seq = get_insns ();
9650 if (second_test || bypass_test)
9652 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9653 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9654 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9656 code = GET_CODE (compare_op);
9657 if (code != LTU && code != GEU)
9659 emit_insn (compare_seq);
9663 if (!INTEGRAL_MODE_P (mode))
9671 /* Convert a==0 into (unsigned)a<1. */
9674 if (op1 != const0_rtx)
9677 code = (code == EQ ? LTU : GEU);
9680 /* Convert a>b into b<a or a>=b-1. */
9683 if (GET_CODE (op1) == CONST_INT)
9685 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9686 /* Bail out on overflow. We still can swap operands but that
9687 would force loading of the constant into register. */
9688 if (op1 == const0_rtx
9689 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9691 code = (code == GTU ? GEU : LTU);
9698 code = (code == GTU ? LTU : GEU);
9702 /* Convert a>=0 into (unsigned)a<0x80000000. */
9705 if (mode == DImode || op1 != const0_rtx)
9707 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9708 code = (code == LT ? GEU : LTU);
9712 if (mode == DImode || op1 != constm1_rtx)
9714 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9715 code = (code == LE ? GEU : LTU);
9721 /* Swapping operands may cause constant to appear as first operand. */
9722 if (!nonimmediate_operand (op0, VOIDmode))
9726 op0 = force_reg (mode, op0);
9728 ix86_compare_op0 = op0;
9729 ix86_compare_op1 = op1;
9730 *pop = ix86_expand_compare (code, NULL, NULL);
9731 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9737 ix86_expand_int_movcc (rtx operands[])
9739 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9740 rtx compare_seq, compare_op;
9741 rtx second_test, bypass_test;
9742 enum machine_mode mode = GET_MODE (operands[0]);
9743 bool sign_bit_compare_p = false;;
9746 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9747 compare_seq = get_insns ();
9750 compare_code = GET_CODE (compare_op);
9752 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9753 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9754 sign_bit_compare_p = true;
9756 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9757 HImode insns, we'd be swallowed in word prefix ops. */
9759 if ((mode != HImode || TARGET_FAST_PREFIX)
9760 && (mode != DImode || TARGET_64BIT)
9761 && GET_CODE (operands[2]) == CONST_INT
9762 && GET_CODE (operands[3]) == CONST_INT)
9764 rtx out = operands[0];
9765 HOST_WIDE_INT ct = INTVAL (operands[2]);
9766 HOST_WIDE_INT cf = INTVAL (operands[3]);
9770 /* Sign bit compares are better done using shifts than we do by using
9772 if (sign_bit_compare_p
9773 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9774 ix86_compare_op1, &compare_op))
9776 /* Detect overlap between destination and compare sources. */
9779 if (!sign_bit_compare_p)
9783 compare_code = GET_CODE (compare_op);
9785 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9786 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9789 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9792 /* To simplify rest of code, restrict to the GEU case. */
9793 if (compare_code == LTU)
9795 HOST_WIDE_INT tmp = ct;
9798 compare_code = reverse_condition (compare_code);
9799 code = reverse_condition (code);
9804 PUT_CODE (compare_op,
9805 reverse_condition_maybe_unordered
9806 (GET_CODE (compare_op)));
9808 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9812 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9813 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9814 tmp = gen_reg_rtx (mode);
9817 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9819 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9823 if (code == GT || code == GE)
9824 code = reverse_condition (code);
9827 HOST_WIDE_INT tmp = ct;
9832 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9833 ix86_compare_op1, VOIDmode, 0, -1);
9846 tmp = expand_simple_binop (mode, PLUS,
9848 copy_rtx (tmp), 1, OPTAB_DIRECT);
9859 tmp = expand_simple_binop (mode, IOR,
9861 copy_rtx (tmp), 1, OPTAB_DIRECT);
9863 else if (diff == -1 && ct)
9873 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9875 tmp = expand_simple_binop (mode, PLUS,
9876 copy_rtx (tmp), GEN_INT (cf),
9877 copy_rtx (tmp), 1, OPTAB_DIRECT);
9885 * andl cf - ct, dest
9895 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9898 tmp = expand_simple_binop (mode, AND,
9900 gen_int_mode (cf - ct, mode),
9901 copy_rtx (tmp), 1, OPTAB_DIRECT);
9903 tmp = expand_simple_binop (mode, PLUS,
9904 copy_rtx (tmp), GEN_INT (ct),
9905 copy_rtx (tmp), 1, OPTAB_DIRECT);
9908 if (!rtx_equal_p (tmp, out))
9909 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9911 return 1; /* DONE */
9917 tmp = ct, ct = cf, cf = tmp;
9919 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9921 /* We may be reversing unordered compare to normal compare, that
9922 is not valid in general (we may convert non-trapping condition
9923 to trapping one), however on i386 we currently emit all
9924 comparisons unordered. */
9925 compare_code = reverse_condition_maybe_unordered (compare_code);
9926 code = reverse_condition_maybe_unordered (code);
9930 compare_code = reverse_condition (compare_code);
9931 code = reverse_condition (code);
9936 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9937 && GET_CODE (ix86_compare_op1) == CONST_INT)
9939 if (ix86_compare_op1 == const0_rtx
9940 && (code == LT || code == GE))
9941 compare_code = code;
9942 else if (ix86_compare_op1 == constm1_rtx)
9946 else if (code == GT)
9951 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9952 if (compare_code != NIL
9953 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9954 && (cf == -1 || ct == -1))
9956 /* If lea code below could be used, only optimize
9957 if it results in a 2 insn sequence. */
9959 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9960 || diff == 3 || diff == 5 || diff == 9)
9961 || (compare_code == LT && ct == -1)
9962 || (compare_code == GE && cf == -1))
9965 * notl op1 (if necessary)
9973 code = reverse_condition (code);
9976 out = emit_store_flag (out, code, ix86_compare_op0,
9977 ix86_compare_op1, VOIDmode, 0, -1);
9979 out = expand_simple_binop (mode, IOR,
9981 out, 1, OPTAB_DIRECT);
9982 if (out != operands[0])
9983 emit_move_insn (operands[0], out);
9985 return 1; /* DONE */
9990 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9991 || diff == 3 || diff == 5 || diff == 9)
9992 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9993 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9999 * lea cf(dest*(ct-cf)),dest
10003 * This also catches the degenerate setcc-only case.
10009 out = emit_store_flag (out, code, ix86_compare_op0,
10010 ix86_compare_op1, VOIDmode, 0, 1);
10013 /* On x86_64 the lea instruction operates on Pmode, so we need
10014 to get arithmetics done in proper mode to match. */
10016 tmp = copy_rtx (out);
10020 out1 = copy_rtx (out);
10021 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10025 tmp = gen_rtx_PLUS (mode, tmp, out1);
10031 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10034 if (!rtx_equal_p (tmp, out))
10037 out = force_operand (tmp, copy_rtx (out));
10039 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10041 if (!rtx_equal_p (out, operands[0]))
10042 emit_move_insn (operands[0], copy_rtx (out));
10044 return 1; /* DONE */
10048 * General case: Jumpful:
10049 * xorl dest,dest cmpl op1, op2
10050 * cmpl op1, op2 movl ct, dest
10051 * setcc dest jcc 1f
10052 * decl dest movl cf, dest
10053 * andl (cf-ct),dest 1:
10056 * Size 20. Size 14.
10058 * This is reasonably steep, but branch mispredict costs are
10059 * high on modern cpus, so consider failing only if optimizing
10063 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10064 && BRANCH_COST >= 2)
10070 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10071 /* We may be reversing unordered compare to normal compare,
10072 that is not valid in general (we may convert non-trapping
10073 condition to trapping one), however on i386 we currently
10074 emit all comparisons unordered. */
10075 code = reverse_condition_maybe_unordered (code);
10078 code = reverse_condition (code);
10079 if (compare_code != NIL)
10080 compare_code = reverse_condition (compare_code);
10084 if (compare_code != NIL)
10086 /* notl op1 (if needed)
10091 For x < 0 (resp. x <= -1) there will be no notl,
10092 so if possible swap the constants to get rid of the
10094 True/false will be -1/0 while code below (store flag
10095 followed by decrement) is 0/-1, so the constants need
10096 to be exchanged once more. */
10098 if (compare_code == GE || !cf)
10100 code = reverse_condition (code);
10105 HOST_WIDE_INT tmp = cf;
10110 out = emit_store_flag (out, code, ix86_compare_op0,
10111 ix86_compare_op1, VOIDmode, 0, -1);
10115 out = emit_store_flag (out, code, ix86_compare_op0,
10116 ix86_compare_op1, VOIDmode, 0, 1);
10118 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10119 copy_rtx (out), 1, OPTAB_DIRECT);
10122 out = expand_simple_binop (mode, AND, copy_rtx (out),
10123 gen_int_mode (cf - ct, mode),
10124 copy_rtx (out), 1, OPTAB_DIRECT);
10126 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10127 copy_rtx (out), 1, OPTAB_DIRECT);
10128 if (!rtx_equal_p (out, operands[0]))
10129 emit_move_insn (operands[0], copy_rtx (out));
10131 return 1; /* DONE */
10135 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10137 /* Try a few things more with specific constants and a variable. */
10140 rtx var, orig_out, out, tmp;
10142 if (BRANCH_COST <= 2)
10143 return 0; /* FAIL */
10145 /* If one of the two operands is an interesting constant, load a
10146 constant with the above and mask it in with a logical operation. */
10148 if (GET_CODE (operands[2]) == CONST_INT)
10151 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10152 operands[3] = constm1_rtx, op = and_optab;
10153 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10154 operands[3] = const0_rtx, op = ior_optab;
10156 return 0; /* FAIL */
10158 else if (GET_CODE (operands[3]) == CONST_INT)
10161 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10162 operands[2] = constm1_rtx, op = and_optab;
10163 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10164 operands[2] = const0_rtx, op = ior_optab;
10166 return 0; /* FAIL */
10169 return 0; /* FAIL */
10171 orig_out = operands[0];
10172 tmp = gen_reg_rtx (mode);
10175 /* Recurse to get the constant loaded. */
10176 if (ix86_expand_int_movcc (operands) == 0)
10177 return 0; /* FAIL */
10179 /* Mask in the interesting variable. */
10180 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10182 if (!rtx_equal_p (out, orig_out))
10183 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10185 return 1; /* DONE */
10189 * For comparison with above,
10199 if (! nonimmediate_operand (operands[2], mode))
10200 operands[2] = force_reg (mode, operands[2]);
10201 if (! nonimmediate_operand (operands[3], mode))
10202 operands[3] = force_reg (mode, operands[3]);
10204 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10206 rtx tmp = gen_reg_rtx (mode);
10207 emit_move_insn (tmp, operands[3]);
10210 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10212 rtx tmp = gen_reg_rtx (mode);
10213 emit_move_insn (tmp, operands[2]);
10217 if (! register_operand (operands[2], VOIDmode)
10219 || ! register_operand (operands[3], VOIDmode)))
10220 operands[2] = force_reg (mode, operands[2]);
10223 && ! register_operand (operands[3], VOIDmode))
10224 operands[3] = force_reg (mode, operands[3]);
10226 emit_insn (compare_seq);
10227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10228 gen_rtx_IF_THEN_ELSE (mode,
10229 compare_op, operands[2],
10232 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10233 gen_rtx_IF_THEN_ELSE (mode,
10235 copy_rtx (operands[3]),
10236 copy_rtx (operands[0]))));
10238 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10239 gen_rtx_IF_THEN_ELSE (mode,
10241 copy_rtx (operands[2]),
10242 copy_rtx (operands[0]))));
10244 return 1; /* DONE */
10248 ix86_expand_fp_movcc (rtx operands[])
10250 enum rtx_code code;
10252 rtx compare_op, second_test, bypass_test;
10254 /* For SF/DFmode conditional moves based on comparisons
10255 in same mode, we may want to use SSE min/max instructions. */
10256 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10257 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10258 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10259 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10260 && (!TARGET_IEEE_FP
10261 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10262 /* We may be called from the post-reload splitter. */
10263 && (!REG_P (operands[0])
10264 || SSE_REG_P (operands[0])
10265 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10267 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10268 code = GET_CODE (operands[1]);
10270 /* See if we have (cross) match between comparison operands and
10271 conditional move operands. */
10272 if (rtx_equal_p (operands[2], op1))
10277 code = reverse_condition_maybe_unordered (code);
10279 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10281 /* Check for min operation. */
10282 if (code == LT || code == UNLE)
10290 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10291 if (memory_operand (op0, VOIDmode))
10292 op0 = force_reg (GET_MODE (operands[0]), op0);
10293 if (GET_MODE (operands[0]) == SFmode)
10294 emit_insn (gen_minsf3 (operands[0], op0, op1));
10296 emit_insn (gen_mindf3 (operands[0], op0, op1));
10299 /* Check for max operation. */
10300 if (code == GT || code == UNGE)
10308 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10309 if (memory_operand (op0, VOIDmode))
10310 op0 = force_reg (GET_MODE (operands[0]), op0);
10311 if (GET_MODE (operands[0]) == SFmode)
10312 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10314 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10318 /* Manage condition to be sse_comparison_operator. In case we are
10319 in non-ieee mode, try to canonicalize the destination operand
10320 to be first in the comparison - this helps reload to avoid extra
10322 if (!sse_comparison_operator (operands[1], VOIDmode)
10323 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10325 rtx tmp = ix86_compare_op0;
10326 ix86_compare_op0 = ix86_compare_op1;
10327 ix86_compare_op1 = tmp;
10328 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10329 VOIDmode, ix86_compare_op0,
10332 /* Similarly try to manage result to be first operand of conditional
10333 move. We also don't support the NE comparison on SSE, so try to
10335 if ((rtx_equal_p (operands[0], operands[3])
10336 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10337 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10339 rtx tmp = operands[2];
10340 operands[2] = operands[3];
10342 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10343 (GET_CODE (operands[1])),
10344 VOIDmode, ix86_compare_op0,
10347 if (GET_MODE (operands[0]) == SFmode)
10348 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10349 operands[2], operands[3],
10350 ix86_compare_op0, ix86_compare_op1));
10352 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10353 operands[2], operands[3],
10354 ix86_compare_op0, ix86_compare_op1));
10358 /* The floating point conditional move instructions don't directly
10359 support conditions resulting from a signed integer comparison. */
10361 code = GET_CODE (operands[1]);
10362 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10364 /* The floating point conditional move instructions don't directly
10365 support signed integer comparisons. */
10367 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10369 if (second_test != NULL || bypass_test != NULL)
10371 tmp = gen_reg_rtx (QImode);
10372 ix86_expand_setcc (code, tmp);
10374 ix86_compare_op0 = tmp;
10375 ix86_compare_op1 = const0_rtx;
10376 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10378 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10380 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10381 emit_move_insn (tmp, operands[3]);
10384 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10386 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10387 emit_move_insn (tmp, operands[2]);
10391 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10392 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10397 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10398 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10403 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10404 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10412 /* Expand conditional increment or decrement using adb/sbb instructions.
10413 The default case using setcc followed by the conditional move can be
10414 done by generic code. */
10416 ix86_expand_int_addcc (rtx operands[])
10418 enum rtx_code code = GET_CODE (operands[1]);
10420 rtx val = const0_rtx;
10421 bool fpcmp = false;
10422 enum machine_mode mode = GET_MODE (operands[0]);
10424 if (operands[3] != const1_rtx
10425 && operands[3] != constm1_rtx)
10427 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10428 ix86_compare_op1, &compare_op))
10430 code = GET_CODE (compare_op);
10432 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10433 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10436 code = ix86_fp_compare_code_to_integer (code);
10443 PUT_CODE (compare_op,
10444 reverse_condition_maybe_unordered
10445 (GET_CODE (compare_op)));
10447 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10449 PUT_MODE (compare_op, mode);
10451 /* Construct either adc or sbb insn. */
10452 if ((code == LTU) == (operands[3] == constm1_rtx))
10454 switch (GET_MODE (operands[0]))
10457 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10460 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10463 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10466 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10474 switch (GET_MODE (operands[0]))
10477 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10480 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10483 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10486 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10492 return 1; /* DONE */
10496 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10497 works for floating pointer parameters and nonoffsetable memories.
10498 For pushes, it returns just stack offsets; the values will be saved
10499 in the right order. Maximally three parts are generated. */
10502 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10507 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10509 size = (GET_MODE_SIZE (mode) + 4) / 8;
10511 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10513 if (size < 2 || size > 3)
10516 /* Optimize constant pool reference to immediates. This is used by fp
10517 moves, that force all constants to memory to allow combining. */
10518 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10520 rtx tmp = maybe_get_pool_constant (operand);
10525 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10527 /* The only non-offsetable memories we handle are pushes. */
10528 if (! push_operand (operand, VOIDmode))
10531 operand = copy_rtx (operand);
10532 PUT_MODE (operand, Pmode);
10533 parts[0] = parts[1] = parts[2] = operand;
10535 else if (!TARGET_64BIT)
10537 if (mode == DImode)
10538 split_di (&operand, 1, &parts[0], &parts[1]);
10541 if (REG_P (operand))
10543 if (!reload_completed)
10545 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10546 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10548 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10550 else if (offsettable_memref_p (operand))
10552 operand = adjust_address (operand, SImode, 0);
10553 parts[0] = operand;
10554 parts[1] = adjust_address (operand, SImode, 4);
10556 parts[2] = adjust_address (operand, SImode, 8);
10558 else if (GET_CODE (operand) == CONST_DOUBLE)
10563 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10567 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10568 parts[2] = gen_int_mode (l[2], SImode);
10571 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10576 parts[1] = gen_int_mode (l[1], SImode);
10577 parts[0] = gen_int_mode (l[0], SImode);
10585 if (mode == TImode)
10586 split_ti (&operand, 1, &parts[0], &parts[1]);
10587 if (mode == XFmode || mode == TFmode)
10589 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10590 if (REG_P (operand))
10592 if (!reload_completed)
10594 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10595 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10597 else if (offsettable_memref_p (operand))
10599 operand = adjust_address (operand, DImode, 0);
10600 parts[0] = operand;
10601 parts[1] = adjust_address (operand, upper_mode, 8);
10603 else if (GET_CODE (operand) == CONST_DOUBLE)
10608 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10609 real_to_target (l, &r, mode);
10610 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10611 if (HOST_BITS_PER_WIDE_INT >= 64)
10614 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10615 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10618 parts[0] = immed_double_const (l[0], l[1], DImode);
10619 if (upper_mode == SImode)
10620 parts[1] = gen_int_mode (l[2], SImode);
10621 else if (HOST_BITS_PER_WIDE_INT >= 64)
10624 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10625 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10628 parts[1] = immed_double_const (l[2], l[3], DImode);
10638 /* Emit insns to perform a move or push of DI, DF, and XF values.
10639 Return false when normal moves are needed; true when all required
10640 insns have been emitted. Operands 2-4 contain the input values
10641 int the correct order; operands 5-7 contain the output values. */
10644 ix86_split_long_move (rtx operands[])
10649 int collisions = 0;
10650 enum machine_mode mode = GET_MODE (operands[0]);
10652 /* The DFmode expanders may ask us to move double.
10653 For 64bit target this is single move. By hiding the fact
10654 here we simplify i386.md splitters. */
10655 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10657 /* Optimize constant pool reference to immediates. This is used by
10658 fp moves, that force all constants to memory to allow combining. */
10660 if (GET_CODE (operands[1]) == MEM
10661 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10662 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10663 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10664 if (push_operand (operands[0], VOIDmode))
10666 operands[0] = copy_rtx (operands[0]);
10667 PUT_MODE (operands[0], Pmode);
10670 operands[0] = gen_lowpart (DImode, operands[0]);
10671 operands[1] = gen_lowpart (DImode, operands[1]);
10672 emit_move_insn (operands[0], operands[1]);
10676 /* The only non-offsettable memory we handle is push. */
10677 if (push_operand (operands[0], VOIDmode))
10679 else if (GET_CODE (operands[0]) == MEM
10680 && ! offsettable_memref_p (operands[0]))
10683 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10684 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10686 /* When emitting push, take care for source operands on the stack. */
10687 if (push && GET_CODE (operands[1]) == MEM
10688 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10691 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10692 XEXP (part[1][2], 0));
10693 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10694 XEXP (part[1][1], 0));
10697 /* We need to do copy in the right order in case an address register
10698 of the source overlaps the destination. */
10699 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10701 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10703 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10706 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10709 /* Collision in the middle part can be handled by reordering. */
10710 if (collisions == 1 && nparts == 3
10711 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10714 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10715 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10718 /* If there are more collisions, we can't handle it by reordering.
10719 Do an lea to the last part and use only one colliding move. */
10720 else if (collisions > 1)
10726 base = part[0][nparts - 1];
10728 /* Handle the case when the last part isn't valid for lea.
10729 Happens in 64-bit mode storing the 12-byte XFmode. */
10730 if (GET_MODE (base) != Pmode)
10731 base = gen_rtx_REG (Pmode, REGNO (base));
10733 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10734 part[1][0] = replace_equiv_address (part[1][0], base);
10735 part[1][1] = replace_equiv_address (part[1][1],
10736 plus_constant (base, UNITS_PER_WORD));
10738 part[1][2] = replace_equiv_address (part[1][2],
10739 plus_constant (base, 8));
10749 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10750 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10751 emit_move_insn (part[0][2], part[1][2]);
10756 /* In 64bit mode we don't have 32bit push available. In case this is
10757 register, it is OK - we will just use larger counterpart. We also
10758 retype memory - these comes from attempt to avoid REX prefix on
10759 moving of second half of TFmode value. */
10760 if (GET_MODE (part[1][1]) == SImode)
10762 if (GET_CODE (part[1][1]) == MEM)
10763 part[1][1] = adjust_address (part[1][1], DImode, 0);
10764 else if (REG_P (part[1][1]))
10765 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10768 if (GET_MODE (part[1][0]) == SImode)
10769 part[1][0] = part[1][1];
10772 emit_move_insn (part[0][1], part[1][1]);
10773 emit_move_insn (part[0][0], part[1][0]);
10777 /* Choose correct order to not overwrite the source before it is copied. */
10778 if ((REG_P (part[0][0])
10779 && REG_P (part[1][1])
10780 && (REGNO (part[0][0]) == REGNO (part[1][1])
10782 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10784 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10788 operands[2] = part[0][2];
10789 operands[3] = part[0][1];
10790 operands[4] = part[0][0];
10791 operands[5] = part[1][2];
10792 operands[6] = part[1][1];
10793 operands[7] = part[1][0];
10797 operands[2] = part[0][1];
10798 operands[3] = part[0][0];
10799 operands[5] = part[1][1];
10800 operands[6] = part[1][0];
10807 operands[2] = part[0][0];
10808 operands[3] = part[0][1];
10809 operands[4] = part[0][2];
10810 operands[5] = part[1][0];
10811 operands[6] = part[1][1];
10812 operands[7] = part[1][2];
10816 operands[2] = part[0][0];
10817 operands[3] = part[0][1];
10818 operands[5] = part[1][0];
10819 operands[6] = part[1][1];
10822 emit_move_insn (operands[2], operands[5]);
10823 emit_move_insn (operands[3], operands[6]);
10825 emit_move_insn (operands[4], operands[7]);
10831 ix86_split_ashldi (rtx *operands, rtx scratch)
10833 rtx low[2], high[2];
10836 if (GET_CODE (operands[2]) == CONST_INT)
10838 split_di (operands, 2, low, high);
10839 count = INTVAL (operands[2]) & 63;
10843 emit_move_insn (high[0], low[1]);
10844 emit_move_insn (low[0], const0_rtx);
10847 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10851 if (!rtx_equal_p (operands[0], operands[1]))
10852 emit_move_insn (operands[0], operands[1]);
10853 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10854 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10859 if (!rtx_equal_p (operands[0], operands[1]))
10860 emit_move_insn (operands[0], operands[1]);
10862 split_di (operands, 1, low, high);
10864 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10865 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10867 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10869 if (! no_new_pseudos)
10870 scratch = force_reg (SImode, const0_rtx);
10872 emit_move_insn (scratch, const0_rtx);
10874 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10878 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10883 ix86_split_ashrdi (rtx *operands, rtx scratch)
10885 rtx low[2], high[2];
10888 if (GET_CODE (operands[2]) == CONST_INT)
10890 split_di (operands, 2, low, high);
10891 count = INTVAL (operands[2]) & 63;
10895 emit_move_insn (low[0], high[1]);
10897 if (! reload_completed)
10898 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10901 emit_move_insn (high[0], low[0]);
10902 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10906 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10910 if (!rtx_equal_p (operands[0], operands[1]))
10911 emit_move_insn (operands[0], operands[1]);
10912 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10913 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10918 if (!rtx_equal_p (operands[0], operands[1]))
10919 emit_move_insn (operands[0], operands[1]);
10921 split_di (operands, 1, low, high);
10923 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10924 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10926 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10928 if (! no_new_pseudos)
10929 scratch = gen_reg_rtx (SImode);
10930 emit_move_insn (scratch, high[0]);
10931 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10932 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10936 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10941 ix86_split_lshrdi (rtx *operands, rtx scratch)
10943 rtx low[2], high[2];
10946 if (GET_CODE (operands[2]) == CONST_INT)
10948 split_di (operands, 2, low, high);
10949 count = INTVAL (operands[2]) & 63;
10953 emit_move_insn (low[0], high[1]);
10954 emit_move_insn (high[0], const0_rtx);
10957 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10961 if (!rtx_equal_p (operands[0], operands[1]))
10962 emit_move_insn (operands[0], operands[1]);
10963 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10964 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10969 if (!rtx_equal_p (operands[0], operands[1]))
10970 emit_move_insn (operands[0], operands[1]);
10972 split_di (operands, 1, low, high);
10974 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10975 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10977 /* Heh. By reversing the arguments, we can reuse this pattern. */
10978 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10980 if (! no_new_pseudos)
10981 scratch = force_reg (SImode, const0_rtx);
10983 emit_move_insn (scratch, const0_rtx);
10985 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10989 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10993 /* Helper function for the string operations below. Dest VARIABLE whether
10994 it is aligned to VALUE bytes. If true, jump to the label. */
10996 ix86_expand_aligntest (rtx variable, int value)
10998 rtx label = gen_label_rtx ();
10999 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11000 if (GET_MODE (variable) == DImode)
11001 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11003 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11004 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11009 /* Adjust COUNTER by the VALUE. */
11011 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11013 if (GET_MODE (countreg) == DImode)
11014 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11016 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11019 /* Zero extend possibly SImode EXP to Pmode register. */
11021 ix86_zero_extend_to_Pmode (rtx exp)
11024 if (GET_MODE (exp) == VOIDmode)
11025 return force_reg (Pmode, exp);
11026 if (GET_MODE (exp) == Pmode)
11027 return copy_to_mode_reg (Pmode, exp);
11028 r = gen_reg_rtx (Pmode);
11029 emit_insn (gen_zero_extendsidi2 (r, exp));
11033 /* Expand string move (memcpy) operation. Use i386 string operations when
11034 profitable. expand_clrstr contains similar code. */
11036 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11038 rtx srcreg, destreg, countreg, srcexp, destexp;
11039 enum machine_mode counter_mode;
11040 HOST_WIDE_INT align = 0;
11041 unsigned HOST_WIDE_INT count = 0;
11043 if (GET_CODE (align_exp) == CONST_INT)
11044 align = INTVAL (align_exp);
11046 /* Can't use any of this if the user has appropriated esi or edi. */
11047 if (global_regs[4] || global_regs[5])
11050 /* This simple hack avoids all inlining code and simplifies code below. */
11051 if (!TARGET_ALIGN_STRINGOPS)
11054 if (GET_CODE (count_exp) == CONST_INT)
11056 count = INTVAL (count_exp);
11057 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11061 /* Figure out proper mode for counter. For 32bits it is always SImode,
11062 for 64bits use SImode when possible, otherwise DImode.
11063 Set count to number of bytes copied when known at compile time. */
11064 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11065 || x86_64_zero_extended_value (count_exp))
11066 counter_mode = SImode;
11068 counter_mode = DImode;
11070 if (counter_mode != SImode && counter_mode != DImode)
11073 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11074 if (destreg != XEXP (dst, 0))
11075 dst = replace_equiv_address_nv (dst, destreg);
11076 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11077 if (srcreg != XEXP (src, 0))
11078 src = replace_equiv_address_nv (src, srcreg);
11080 /* When optimizing for size emit simple rep ; movsb instruction for
11081 counts not divisible by 4. */
11083 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11085 emit_insn (gen_cld ());
11086 countreg = ix86_zero_extend_to_Pmode (count_exp);
11087 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11088 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11089 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11093 /* For constant aligned (or small unaligned) copies use rep movsl
11094 followed by code copying the rest. For PentiumPro ensure 8 byte
11095 alignment to allow rep movsl acceleration. */
11097 else if (count != 0
11099 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11100 || optimize_size || count < (unsigned int) 64))
11102 unsigned HOST_WIDE_INT offset = 0;
11103 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11104 rtx srcmem, dstmem;
11106 emit_insn (gen_cld ());
11107 if (count & ~(size - 1))
11109 countreg = copy_to_mode_reg (counter_mode,
11110 GEN_INT ((count >> (size == 4 ? 2 : 3))
11111 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11112 countreg = ix86_zero_extend_to_Pmode (countreg);
11114 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11115 GEN_INT (size == 4 ? 2 : 3));
11116 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11117 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11119 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11120 countreg, destexp, srcexp));
11121 offset = count & ~(size - 1);
11123 if (size == 8 && (count & 0x04))
11125 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11127 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11129 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11134 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11136 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11138 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11143 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11145 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11147 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11150 /* The generic code based on the glibc implementation:
11151 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11152 allowing accelerated copying there)
11153 - copy the data using rep movsl
11154 - copy the rest. */
11159 rtx srcmem, dstmem;
11160 int desired_alignment = (TARGET_PENTIUMPRO
11161 && (count == 0 || count >= (unsigned int) 260)
11162 ? 8 : UNITS_PER_WORD);
11163 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11164 dst = change_address (dst, BLKmode, destreg);
11165 src = change_address (src, BLKmode, srcreg);
11167 /* In case we don't know anything about the alignment, default to
11168 library version, since it is usually equally fast and result in
11171 Also emit call when we know that the count is large and call overhead
11172 will not be important. */
11173 if (!TARGET_INLINE_ALL_STRINGOPS
11174 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11177 if (TARGET_SINGLE_STRINGOP)
11178 emit_insn (gen_cld ());
11180 countreg2 = gen_reg_rtx (Pmode);
11181 countreg = copy_to_mode_reg (counter_mode, count_exp);
11183 /* We don't use loops to align destination and to copy parts smaller
11184 than 4 bytes, because gcc is able to optimize such code better (in
11185 the case the destination or the count really is aligned, gcc is often
11186 able to predict the branches) and also it is friendlier to the
11187 hardware branch prediction.
11189 Using loops is beneficial for generic case, because we can
11190 handle small counts using the loops. Many CPUs (such as Athlon)
11191 have large REP prefix setup costs.
11193 This is quite costly. Maybe we can revisit this decision later or
11194 add some customizability to this code. */
11196 if (count == 0 && align < desired_alignment)
11198 label = gen_label_rtx ();
11199 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11200 LEU, 0, counter_mode, 1, label);
11204 rtx label = ix86_expand_aligntest (destreg, 1);
11205 srcmem = change_address (src, QImode, srcreg);
11206 dstmem = change_address (dst, QImode, destreg);
11207 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11208 ix86_adjust_counter (countreg, 1);
11209 emit_label (label);
11210 LABEL_NUSES (label) = 1;
11214 rtx label = ix86_expand_aligntest (destreg, 2);
11215 srcmem = change_address (src, HImode, srcreg);
11216 dstmem = change_address (dst, HImode, destreg);
11217 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11218 ix86_adjust_counter (countreg, 2);
11219 emit_label (label);
11220 LABEL_NUSES (label) = 1;
11222 if (align <= 4 && desired_alignment > 4)
11224 rtx label = ix86_expand_aligntest (destreg, 4);
11225 srcmem = change_address (src, SImode, srcreg);
11226 dstmem = change_address (dst, SImode, destreg);
11227 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11228 ix86_adjust_counter (countreg, 4);
11229 emit_label (label);
11230 LABEL_NUSES (label) = 1;
11233 if (label && desired_alignment > 4 && !TARGET_64BIT)
11235 emit_label (label);
11236 LABEL_NUSES (label) = 1;
11239 if (!TARGET_SINGLE_STRINGOP)
11240 emit_insn (gen_cld ());
11243 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11245 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11249 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11250 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11252 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11253 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11254 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11255 countreg2, destexp, srcexp));
11259 emit_label (label);
11260 LABEL_NUSES (label) = 1;
11262 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11264 srcmem = change_address (src, SImode, srcreg);
11265 dstmem = change_address (dst, SImode, destreg);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11268 if ((align <= 4 || count == 0) && TARGET_64BIT)
11270 rtx label = ix86_expand_aligntest (countreg, 4);
11271 srcmem = change_address (src, SImode, srcreg);
11272 dstmem = change_address (dst, SImode, destreg);
11273 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11274 emit_label (label);
11275 LABEL_NUSES (label) = 1;
11277 if (align > 2 && count != 0 && (count & 2))
11279 srcmem = change_address (src, HImode, srcreg);
11280 dstmem = change_address (dst, HImode, destreg);
11281 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11283 if (align <= 2 || count == 0)
11285 rtx label = ix86_expand_aligntest (countreg, 2);
11286 srcmem = change_address (src, HImode, srcreg);
11287 dstmem = change_address (dst, HImode, destreg);
11288 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11289 emit_label (label);
11290 LABEL_NUSES (label) = 1;
11292 if (align > 1 && count != 0 && (count & 1))
11294 srcmem = change_address (src, QImode, srcreg);
11295 dstmem = change_address (dst, QImode, destreg);
11296 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11298 if (align <= 1 || count == 0)
11300 rtx label = ix86_expand_aligntest (countreg, 1);
11301 srcmem = change_address (src, QImode, srcreg);
11302 dstmem = change_address (dst, QImode, destreg);
11303 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11304 emit_label (label);
11305 LABEL_NUSES (label) = 1;
11312 /* Expand string clear operation (bzero). Use i386 string operations when
11313 profitable. expand_movstr contains similar code. */
11315 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11317 rtx destreg, zeroreg, countreg, destexp;
11318 enum machine_mode counter_mode;
11319 HOST_WIDE_INT align = 0;
11320 unsigned HOST_WIDE_INT count = 0;
11322 if (GET_CODE (align_exp) == CONST_INT)
11323 align = INTVAL (align_exp);
11325 /* Can't use any of this if the user has appropriated esi. */
11326 if (global_regs[4])
11329 /* This simple hack avoids all inlining code and simplifies code below. */
11330 if (!TARGET_ALIGN_STRINGOPS)
11333 if (GET_CODE (count_exp) == CONST_INT)
11335 count = INTVAL (count_exp);
11336 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11339 /* Figure out proper mode for counter. For 32bits it is always SImode,
11340 for 64bits use SImode when possible, otherwise DImode.
11341 Set count to number of bytes copied when known at compile time. */
11342 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11343 || x86_64_zero_extended_value (count_exp))
11344 counter_mode = SImode;
11346 counter_mode = DImode;
11348 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11349 if (destreg != XEXP (dst, 0))
11350 dst = replace_equiv_address_nv (dst, destreg);
11352 emit_insn (gen_cld ());
11354 /* When optimizing for size emit simple rep ; movsb instruction for
11355 counts not divisible by 4. */
11357 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11359 countreg = ix86_zero_extend_to_Pmode (count_exp);
11360 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11361 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11362 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11364 else if (count != 0
11366 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11367 || optimize_size || count < (unsigned int) 64))
11369 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11370 unsigned HOST_WIDE_INT offset = 0;
11372 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11373 if (count & ~(size - 1))
11375 countreg = copy_to_mode_reg (counter_mode,
11376 GEN_INT ((count >> (size == 4 ? 2 : 3))
11377 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11378 countreg = ix86_zero_extend_to_Pmode (countreg);
11379 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11380 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11381 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11382 offset = count & ~(size - 1);
11384 if (size == 8 && (count & 0x04))
11386 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11388 emit_insn (gen_strset (destreg, mem,
11389 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11394 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11396 emit_insn (gen_strset (destreg, mem,
11397 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11402 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11404 emit_insn (gen_strset (destreg, mem,
11405 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11412 /* Compute desired alignment of the string operation. */
11413 int desired_alignment = (TARGET_PENTIUMPRO
11414 && (count == 0 || count >= (unsigned int) 260)
11415 ? 8 : UNITS_PER_WORD);
11417 /* In case we don't know anything about the alignment, default to
11418 library version, since it is usually equally fast and result in
11421 Also emit call when we know that the count is large and call overhead
11422 will not be important. */
11423 if (!TARGET_INLINE_ALL_STRINGOPS
11424 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11427 if (TARGET_SINGLE_STRINGOP)
11428 emit_insn (gen_cld ());
11430 countreg2 = gen_reg_rtx (Pmode);
11431 countreg = copy_to_mode_reg (counter_mode, count_exp);
11432 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11433 /* Get rid of MEM_OFFSET, it won't be accurate. */
11434 dst = change_address (dst, BLKmode, destreg);
11436 if (count == 0 && align < desired_alignment)
11438 label = gen_label_rtx ();
11439 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11440 LEU, 0, counter_mode, 1, label);
11444 rtx label = ix86_expand_aligntest (destreg, 1);
11445 emit_insn (gen_strset (destreg, dst,
11446 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11447 ix86_adjust_counter (countreg, 1);
11448 emit_label (label);
11449 LABEL_NUSES (label) = 1;
11453 rtx label = ix86_expand_aligntest (destreg, 2);
11454 emit_insn (gen_strset (destreg, dst,
11455 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11456 ix86_adjust_counter (countreg, 2);
11457 emit_label (label);
11458 LABEL_NUSES (label) = 1;
11460 if (align <= 4 && desired_alignment > 4)
11462 rtx label = ix86_expand_aligntest (destreg, 4);
11463 emit_insn (gen_strset (destreg, dst,
11465 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11467 ix86_adjust_counter (countreg, 4);
11468 emit_label (label);
11469 LABEL_NUSES (label) = 1;
11472 if (label && desired_alignment > 4 && !TARGET_64BIT)
11474 emit_label (label);
11475 LABEL_NUSES (label) = 1;
11479 if (!TARGET_SINGLE_STRINGOP)
11480 emit_insn (gen_cld ());
11483 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11485 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11489 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11490 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11492 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11493 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11497 emit_label (label);
11498 LABEL_NUSES (label) = 1;
11501 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11502 emit_insn (gen_strset (destreg, dst,
11503 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11504 if (TARGET_64BIT && (align <= 4 || count == 0))
11506 rtx label = ix86_expand_aligntest (countreg, 4);
11507 emit_insn (gen_strset (destreg, dst,
11508 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11509 emit_label (label);
11510 LABEL_NUSES (label) = 1;
11512 if (align > 2 && count != 0 && (count & 2))
11513 emit_insn (gen_strset (destreg, dst,
11514 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11515 if (align <= 2 || count == 0)
11517 rtx label = ix86_expand_aligntest (countreg, 2);
11518 emit_insn (gen_strset (destreg, dst,
11519 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11520 emit_label (label);
11521 LABEL_NUSES (label) = 1;
11523 if (align > 1 && count != 0 && (count & 1))
11524 emit_insn (gen_strset (destreg, dst,
11525 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11526 if (align <= 1 || count == 0)
11528 rtx label = ix86_expand_aligntest (countreg, 1);
11529 emit_insn (gen_strset (destreg, dst,
11530 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11531 emit_label (label);
11532 LABEL_NUSES (label) = 1;
11538 /* Expand strlen. */
11540 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11542 rtx addr, scratch1, scratch2, scratch3, scratch4;
11544 /* The generic case of strlen expander is long. Avoid it's
11545 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11547 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11548 && !TARGET_INLINE_ALL_STRINGOPS
11550 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11553 addr = force_reg (Pmode, XEXP (src, 0));
11554 scratch1 = gen_reg_rtx (Pmode);
11556 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11559 /* Well it seems that some optimizer does not combine a call like
11560 foo(strlen(bar), strlen(bar));
11561 when the move and the subtraction is done here. It does calculate
11562 the length just once when these instructions are done inside of
11563 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11564 often used and I use one fewer register for the lifetime of
11565 output_strlen_unroll() this is better. */
11567 emit_move_insn (out, addr);
11569 ix86_expand_strlensi_unroll_1 (out, src, align);
11571 /* strlensi_unroll_1 returns the address of the zero at the end of
11572 the string, like memchr(), so compute the length by subtracting
11573 the start address. */
11575 emit_insn (gen_subdi3 (out, out, addr));
11577 emit_insn (gen_subsi3 (out, out, addr));
11582 scratch2 = gen_reg_rtx (Pmode);
11583 scratch3 = gen_reg_rtx (Pmode);
11584 scratch4 = force_reg (Pmode, constm1_rtx);
11586 emit_move_insn (scratch3, addr);
11587 eoschar = force_reg (QImode, eoschar);
11589 emit_insn (gen_cld ());
11590 src = replace_equiv_address_nv (src, scratch3);
11592 /* If .md starts supporting :P, this can be done in .md. */
11593 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11594 scratch4), UNSPEC_SCAS);
11595 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11598 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11599 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11603 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11604 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11610 /* Expand the appropriate insns for doing strlen if not just doing
11613 out = result, initialized with the start address
11614 align_rtx = alignment of the address.
11615 scratch = scratch register, initialized with the startaddress when
11616 not aligned, otherwise undefined
11618 This is just the body. It needs the initializations mentioned above and
11619 some address computing at the end. These things are done in i386.md. */
11622 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11626 rtx align_2_label = NULL_RTX;
11627 rtx align_3_label = NULL_RTX;
11628 rtx align_4_label = gen_label_rtx ();
11629 rtx end_0_label = gen_label_rtx ();
11631 rtx tmpreg = gen_reg_rtx (SImode);
11632 rtx scratch = gen_reg_rtx (SImode);
11636 if (GET_CODE (align_rtx) == CONST_INT)
11637 align = INTVAL (align_rtx);
11639 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11641 /* Is there a known alignment and is it less than 4? */
11644 rtx scratch1 = gen_reg_rtx (Pmode);
11645 emit_move_insn (scratch1, out);
11646 /* Is there a known alignment and is it not 2? */
11649 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11650 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11652 /* Leave just the 3 lower bits. */
11653 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11654 NULL_RTX, 0, OPTAB_WIDEN);
11656 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11657 Pmode, 1, align_4_label);
11658 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11659 Pmode, 1, align_2_label);
11660 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11661 Pmode, 1, align_3_label);
11665 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11666 check if is aligned to 4 - byte. */
11668 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11669 NULL_RTX, 0, OPTAB_WIDEN);
11671 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11672 Pmode, 1, align_4_label);
11675 mem = change_address (src, QImode, out);
11677 /* Now compare the bytes. */
11679 /* Compare the first n unaligned byte on a byte per byte basis. */
11680 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11681 QImode, 1, end_0_label);
11683 /* Increment the address. */
11685 emit_insn (gen_adddi3 (out, out, const1_rtx));
11687 emit_insn (gen_addsi3 (out, out, const1_rtx));
11689 /* Not needed with an alignment of 2 */
11692 emit_label (align_2_label);
11694 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11698 emit_insn (gen_adddi3 (out, out, const1_rtx));
11700 emit_insn (gen_addsi3 (out, out, const1_rtx));
11702 emit_label (align_3_label);
11705 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11709 emit_insn (gen_adddi3 (out, out, const1_rtx));
11711 emit_insn (gen_addsi3 (out, out, const1_rtx));
11714 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11715 align this loop. It gives only huge programs, but does not help to
11717 emit_label (align_4_label);
11719 mem = change_address (src, SImode, out);
11720 emit_move_insn (scratch, mem);
11722 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11724 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11726 /* This formula yields a nonzero result iff one of the bytes is zero.
11727 This saves three branches inside loop and many cycles. */
11729 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11730 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11731 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11732 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11733 gen_int_mode (0x80808080, SImode)));
11734 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11739 rtx reg = gen_reg_rtx (SImode);
11740 rtx reg2 = gen_reg_rtx (Pmode);
11741 emit_move_insn (reg, tmpreg);
11742 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11744 /* If zero is not in the first two bytes, move two bytes forward. */
11745 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11746 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11747 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11748 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11749 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11752 /* Emit lea manually to avoid clobbering of flags. */
11753 emit_insn (gen_rtx_SET (SImode, reg2,
11754 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11756 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11757 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11758 emit_insn (gen_rtx_SET (VOIDmode, out,
11759 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11766 rtx end_2_label = gen_label_rtx ();
11767 /* Is zero in the first two bytes? */
11769 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11770 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11771 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11772 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11773 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11775 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11776 JUMP_LABEL (tmp) = end_2_label;
11778 /* Not in the first two. Move two bytes forward. */
11779 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11781 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11783 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11785 emit_label (end_2_label);
11789 /* Avoid branch in fixing the byte. */
11790 tmpreg = gen_lowpart (QImode, tmpreg);
11791 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11792 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11794 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11796 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11798 emit_label (end_0_label);
11802 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11803 rtx callarg2 ATTRIBUTE_UNUSED,
11804 rtx pop, int sibcall)
11806 rtx use = NULL, call;
11808 if (pop == const0_rtx)
11810 if (TARGET_64BIT && pop)
11814 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11815 fnaddr = machopic_indirect_call_target (fnaddr);
11817 /* Static functions and indirect calls don't need the pic register. */
11818 if (! TARGET_64BIT && flag_pic
11819 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11820 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11821 use_reg (&use, pic_offset_table_rtx);
11823 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11825 rtx al = gen_rtx_REG (QImode, 0);
11826 emit_move_insn (al, callarg2);
11827 use_reg (&use, al);
11829 #endif /* TARGET_MACHO */
11831 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11833 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11834 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11836 if (sibcall && TARGET_64BIT
11837 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11840 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11841 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11842 emit_move_insn (fnaddr, addr);
11843 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11846 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11848 call = gen_rtx_SET (VOIDmode, retval, call);
11851 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11852 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11853 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11856 call = emit_call_insn (call);
11858 CALL_INSN_FUNCTION_USAGE (call) = use;
11862 /* Clear stack slot assignments remembered from previous functions.
11863 This is called from INIT_EXPANDERS once before RTL is emitted for each
11866 static struct machine_function *
11867 ix86_init_machine_status (void)
11869 struct machine_function *f;
11871 f = ggc_alloc_cleared (sizeof (struct machine_function));
11872 f->use_fast_prologue_epilogue_nregs = -1;
11877 /* Return a MEM corresponding to a stack slot with mode MODE.
11878 Allocate a new slot if necessary.
11880 The RTL for a function can have several slots available: N is
11881 which slot to use. */
11884 assign_386_stack_local (enum machine_mode mode, int n)
11886 struct stack_local_entry *s;
11888 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11891 for (s = ix86_stack_locals; s; s = s->next)
11892 if (s->mode == mode && s->n == n)
11895 s = (struct stack_local_entry *)
11896 ggc_alloc (sizeof (struct stack_local_entry));
11899 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11901 s->next = ix86_stack_locals;
11902 ix86_stack_locals = s;
11906 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11908 static GTY(()) rtx ix86_tls_symbol;
11910 ix86_tls_get_addr (void)
11913 if (!ix86_tls_symbol)
11915 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11916 (TARGET_GNU_TLS && !TARGET_64BIT)
11917 ? "___tls_get_addr"
11918 : "__tls_get_addr");
11921 return ix86_tls_symbol;
11924 /* Calculate the length of the memory address in the instruction
11925 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11928 memory_address_length (rtx addr)
11930 struct ix86_address parts;
11931 rtx base, index, disp;
11934 if (GET_CODE (addr) == PRE_DEC
11935 || GET_CODE (addr) == POST_INC
11936 || GET_CODE (addr) == PRE_MODIFY
11937 || GET_CODE (addr) == POST_MODIFY)
11940 if (! ix86_decompose_address (addr, &parts))
11944 index = parts.index;
11949 - esp as the base always wants an index,
11950 - ebp as the base always wants a displacement. */
11952 /* Register Indirect. */
11953 if (base && !index && !disp)
11955 /* esp (for its index) and ebp (for its displacement) need
11956 the two-byte modrm form. */
11957 if (addr == stack_pointer_rtx
11958 || addr == arg_pointer_rtx
11959 || addr == frame_pointer_rtx
11960 || addr == hard_frame_pointer_rtx)
11964 /* Direct Addressing. */
11965 else if (disp && !base && !index)
11970 /* Find the length of the displacement constant. */
11973 if (GET_CODE (disp) == CONST_INT
11974 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11980 /* ebp always wants a displacement. */
11981 else if (base == hard_frame_pointer_rtx)
11984 /* An index requires the two-byte modrm form.... */
11986 /* ...like esp, which always wants an index. */
11987 || base == stack_pointer_rtx
11988 || base == arg_pointer_rtx
11989 || base == frame_pointer_rtx)
11996 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11997 is set, expect that insn have 8bit immediate alternative. */
11999 ix86_attr_length_immediate_default (rtx insn, int shortform)
12003 extract_insn_cached (insn);
12004 for (i = recog_data.n_operands - 1; i >= 0; --i)
12005 if (CONSTANT_P (recog_data.operand[i]))
12010 && GET_CODE (recog_data.operand[i]) == CONST_INT
12011 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12015 switch (get_attr_mode (insn))
12026 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12031 fatal_insn ("unknown insn mode", insn);
12037 /* Compute default value for "length_address" attribute. */
12039 ix86_attr_length_address_default (rtx insn)
12043 if (get_attr_type (insn) == TYPE_LEA)
12045 rtx set = PATTERN (insn);
12046 if (GET_CODE (set) == SET)
12048 else if (GET_CODE (set) == PARALLEL
12049 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12050 set = XVECEXP (set, 0, 0);
12053 #ifdef ENABLE_CHECKING
12059 return memory_address_length (SET_SRC (set));
12062 extract_insn_cached (insn);
12063 for (i = recog_data.n_operands - 1; i >= 0; --i)
12064 if (GET_CODE (recog_data.operand[i]) == MEM)
12066 return memory_address_length (XEXP (recog_data.operand[i], 0));
12072 /* Return the maximum number of instructions a cpu can issue. */
12075 ix86_issue_rate (void)
12079 case PROCESSOR_PENTIUM:
12083 case PROCESSOR_PENTIUMPRO:
12084 case PROCESSOR_PENTIUM4:
12085 case PROCESSOR_ATHLON:
12094 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12095 by DEP_INSN and nothing set by DEP_INSN. */
12098 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12102 /* Simplify the test for uninteresting insns. */
12103 if (insn_type != TYPE_SETCC
12104 && insn_type != TYPE_ICMOV
12105 && insn_type != TYPE_FCMOV
12106 && insn_type != TYPE_IBR)
12109 if ((set = single_set (dep_insn)) != 0)
12111 set = SET_DEST (set);
12114 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12115 && XVECLEN (PATTERN (dep_insn), 0) == 2
12116 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12117 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12119 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12120 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12125 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12128 /* This test is true if the dependent insn reads the flags but
12129 not any other potentially set register. */
12130 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12133 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12139 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12140 address with operands set by DEP_INSN. */
12143 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12147 if (insn_type == TYPE_LEA
12150 addr = PATTERN (insn);
12151 if (GET_CODE (addr) == SET)
12153 else if (GET_CODE (addr) == PARALLEL
12154 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12155 addr = XVECEXP (addr, 0, 0);
12158 addr = SET_SRC (addr);
12163 extract_insn_cached (insn);
12164 for (i = recog_data.n_operands - 1; i >= 0; --i)
12165 if (GET_CODE (recog_data.operand[i]) == MEM)
12167 addr = XEXP (recog_data.operand[i], 0);
12174 return modified_in_p (addr, dep_insn);
12178 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12180 enum attr_type insn_type, dep_insn_type;
12181 enum attr_memory memory, dep_memory;
12183 int dep_insn_code_number;
12185 /* Anti and output dependencies have zero cost on all CPUs. */
12186 if (REG_NOTE_KIND (link) != 0)
12189 dep_insn_code_number = recog_memoized (dep_insn);
12191 /* If we can't recognize the insns, we can't really do anything. */
12192 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12195 insn_type = get_attr_type (insn);
12196 dep_insn_type = get_attr_type (dep_insn);
12200 case PROCESSOR_PENTIUM:
12201 /* Address Generation Interlock adds a cycle of latency. */
12202 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12205 /* ??? Compares pair with jump/setcc. */
12206 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12209 /* Floating point stores require value to be ready one cycle earlier. */
12210 if (insn_type == TYPE_FMOV
12211 && get_attr_memory (insn) == MEMORY_STORE
12212 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12216 case PROCESSOR_PENTIUMPRO:
12217 memory = get_attr_memory (insn);
12218 dep_memory = get_attr_memory (dep_insn);
12220 /* Since we can't represent delayed latencies of load+operation,
12221 increase the cost here for non-imov insns. */
12222 if (dep_insn_type != TYPE_IMOV
12223 && dep_insn_type != TYPE_FMOV
12224 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12227 /* INT->FP conversion is expensive. */
12228 if (get_attr_fp_int_src (dep_insn))
12231 /* There is one cycle extra latency between an FP op and a store. */
12232 if (insn_type == TYPE_FMOV
12233 && (set = single_set (dep_insn)) != NULL_RTX
12234 && (set2 = single_set (insn)) != NULL_RTX
12235 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12236 && GET_CODE (SET_DEST (set2)) == MEM)
12239 /* Show ability of reorder buffer to hide latency of load by executing
12240 in parallel with previous instruction in case
12241 previous instruction is not needed to compute the address. */
12242 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12243 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12245 /* Claim moves to take one cycle, as core can issue one load
12246 at time and the next load can start cycle later. */
12247 if (dep_insn_type == TYPE_IMOV
12248 || dep_insn_type == TYPE_FMOV)
12256 memory = get_attr_memory (insn);
12257 dep_memory = get_attr_memory (dep_insn);
12258 /* The esp dependency is resolved before the instruction is really
12260 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12261 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12264 /* Since we can't represent delayed latencies of load+operation,
12265 increase the cost here for non-imov insns. */
12266 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12267 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12269 /* INT->FP conversion is expensive. */
12270 if (get_attr_fp_int_src (dep_insn))
12273 /* Show ability of reorder buffer to hide latency of load by executing
12274 in parallel with previous instruction in case
12275 previous instruction is not needed to compute the address. */
12276 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12277 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12279 /* Claim moves to take one cycle, as core can issue one load
12280 at time and the next load can start cycle later. */
12281 if (dep_insn_type == TYPE_IMOV
12282 || dep_insn_type == TYPE_FMOV)
12291 case PROCESSOR_ATHLON:
12293 memory = get_attr_memory (insn);
12294 dep_memory = get_attr_memory (dep_insn);
12296 /* Show ability of reorder buffer to hide latency of load by executing
12297 in parallel with previous instruction in case
12298 previous instruction is not needed to compute the address. */
12299 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12300 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12302 enum attr_unit unit = get_attr_unit (insn);
12305 /* Because of the difference between the length of integer and
12306 floating unit pipeline preparation stages, the memory operands
12307 for floating point are cheaper.
12309 ??? For Athlon it the difference is most probably 2. */
12310 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12313 loadcost = TARGET_ATHLON ? 2 : 0;
12315 if (cost >= loadcost)
12330 struct ppro_sched_data
12333 int issued_this_cycle;
12337 static enum attr_ppro_uops
12338 ix86_safe_ppro_uops (rtx insn)
12340 if (recog_memoized (insn) >= 0)
12341 return get_attr_ppro_uops (insn);
12343 return PPRO_UOPS_MANY;
12347 ix86_dump_ppro_packet (FILE *dump)
12349 if (ix86_sched_data.ppro.decode[0])
12351 fprintf (dump, "PPRO packet: %d",
12352 INSN_UID (ix86_sched_data.ppro.decode[0]));
12353 if (ix86_sched_data.ppro.decode[1])
12354 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12355 if (ix86_sched_data.ppro.decode[2])
12356 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12357 fputc ('\n', dump);
12361 /* We're beginning a new block. Initialize data structures as necessary. */
12364 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12365 int sched_verbose ATTRIBUTE_UNUSED,
12366 int veclen ATTRIBUTE_UNUSED)
12368 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12371 /* Shift INSN to SLOT, and shift everything else down. */
12374 ix86_reorder_insn (rtx *insnp, rtx *slot)
12380 insnp[0] = insnp[1];
12381 while (++insnp != slot);
12387 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12390 enum attr_ppro_uops cur_uops;
12391 int issued_this_cycle;
12395 /* At this point .ppro.decode contains the state of the three
12396 decoders from last "cycle". That is, those insns that were
12397 actually independent. But here we're scheduling for the
12398 decoder, and we may find things that are decodable in the
12401 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12402 issued_this_cycle = 0;
12405 cur_uops = ix86_safe_ppro_uops (*insnp);
12407 /* If the decoders are empty, and we've a complex insn at the
12408 head of the priority queue, let it issue without complaint. */
12409 if (decode[0] == NULL)
12411 if (cur_uops == PPRO_UOPS_MANY)
12413 decode[0] = *insnp;
12417 /* Otherwise, search for a 2-4 uop unsn to issue. */
12418 while (cur_uops != PPRO_UOPS_FEW)
12420 if (insnp == ready)
12422 cur_uops = ix86_safe_ppro_uops (*--insnp);
12425 /* If so, move it to the head of the line. */
12426 if (cur_uops == PPRO_UOPS_FEW)
12427 ix86_reorder_insn (insnp, e_ready);
12429 /* Issue the head of the queue. */
12430 issued_this_cycle = 1;
12431 decode[0] = *e_ready--;
12434 /* Look for simple insns to fill in the other two slots. */
12435 for (i = 1; i < 3; ++i)
12436 if (decode[i] == NULL)
12438 if (ready > e_ready)
12442 cur_uops = ix86_safe_ppro_uops (*insnp);
12443 while (cur_uops != PPRO_UOPS_ONE)
12445 if (insnp == ready)
12447 cur_uops = ix86_safe_ppro_uops (*--insnp);
12450 /* Found one. Move it to the head of the queue and issue it. */
12451 if (cur_uops == PPRO_UOPS_ONE)
12453 ix86_reorder_insn (insnp, e_ready);
12454 decode[i] = *e_ready--;
12455 issued_this_cycle++;
12459 /* ??? Didn't find one. Ideally, here we would do a lazy split
12460 of 2-uop insns, issue one and queue the other. */
12464 if (issued_this_cycle == 0)
12465 issued_this_cycle = 1;
12466 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12469 /* We are about to being issuing insns for this clock cycle.
12470 Override the default sort algorithm to better slot instructions. */
12472 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12473 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12474 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12476 int n_ready = *n_readyp;
12477 rtx *e_ready = ready + n_ready - 1;
12479 /* Make sure to go ahead and initialize key items in
12480 ix86_sched_data if we are not going to bother trying to
12481 reorder the ready queue. */
12484 ix86_sched_data.ppro.issued_this_cycle = 1;
12493 case PROCESSOR_PENTIUMPRO:
12494 ix86_sched_reorder_ppro (ready, e_ready);
12499 return ix86_issue_rate ();
12502 /* We are about to issue INSN. Return the number of insns left on the
12503 ready queue that can be issued this cycle. */
12506 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12507 int can_issue_more)
12513 return can_issue_more - 1;
12515 case PROCESSOR_PENTIUMPRO:
12517 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12519 if (uops == PPRO_UOPS_MANY)
12522 ix86_dump_ppro_packet (dump);
12523 ix86_sched_data.ppro.decode[0] = insn;
12524 ix86_sched_data.ppro.decode[1] = NULL;
12525 ix86_sched_data.ppro.decode[2] = NULL;
12527 ix86_dump_ppro_packet (dump);
12528 ix86_sched_data.ppro.decode[0] = NULL;
12530 else if (uops == PPRO_UOPS_FEW)
12533 ix86_dump_ppro_packet (dump);
12534 ix86_sched_data.ppro.decode[0] = insn;
12535 ix86_sched_data.ppro.decode[1] = NULL;
12536 ix86_sched_data.ppro.decode[2] = NULL;
12540 for (i = 0; i < 3; ++i)
12541 if (ix86_sched_data.ppro.decode[i] == NULL)
12543 ix86_sched_data.ppro.decode[i] = insn;
12551 ix86_dump_ppro_packet (dump);
12552 ix86_sched_data.ppro.decode[0] = NULL;
12553 ix86_sched_data.ppro.decode[1] = NULL;
12554 ix86_sched_data.ppro.decode[2] = NULL;
12558 return --ix86_sched_data.ppro.issued_this_cycle;
12563 ia32_use_dfa_pipeline_interface (void)
12565 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12570 /* How many alternative schedules to try. This should be as wide as the
12571 scheduling freedom in the DFA, but no wider. Making this value too
12572 large results extra work for the scheduler. */
12575 ia32_multipass_dfa_lookahead (void)
12577 if (ix86_tune == PROCESSOR_PENTIUM)
12584 /* Compute the alignment given to a constant that is being placed in memory.
12585 EXP is the constant and ALIGN is the alignment that the object would
12587 The value of this function is used instead of that alignment to align
12591 ix86_constant_alignment (tree exp, int align)
12593 if (TREE_CODE (exp) == REAL_CST)
12595 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12597 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12600 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12601 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12602 return BITS_PER_WORD;
12607 /* Compute the alignment for a static variable.
12608 TYPE is the data type, and ALIGN is the alignment that
12609 the object would ordinarily have. The value of this function is used
12610 instead of that alignment to align the object. */
12613 ix86_data_alignment (tree type, int align)
12615 if (AGGREGATE_TYPE_P (type)
12616 && TYPE_SIZE (type)
12617 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12618 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12619 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12622 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12623 to 16byte boundary. */
12626 if (AGGREGATE_TYPE_P (type)
12627 && TYPE_SIZE (type)
12628 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12629 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12630 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12634 if (TREE_CODE (type) == ARRAY_TYPE)
12636 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12638 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12641 else if (TREE_CODE (type) == COMPLEX_TYPE)
12644 if (TYPE_MODE (type) == DCmode && align < 64)
12646 if (TYPE_MODE (type) == XCmode && align < 128)
12649 else if ((TREE_CODE (type) == RECORD_TYPE
12650 || TREE_CODE (type) == UNION_TYPE
12651 || TREE_CODE (type) == QUAL_UNION_TYPE)
12652 && TYPE_FIELDS (type))
12654 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12656 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12659 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12660 || TREE_CODE (type) == INTEGER_TYPE)
12662 if (TYPE_MODE (type) == DFmode && align < 64)
12664 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12671 /* Compute the alignment for a local variable.
12672 TYPE is the data type, and ALIGN is the alignment that
12673 the object would ordinarily have. The value of this macro is used
12674 instead of that alignment to align the object. */
12677 ix86_local_alignment (tree type, int align)
12679 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12680 to 16byte boundary. */
12683 if (AGGREGATE_TYPE_P (type)
12684 && TYPE_SIZE (type)
12685 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12686 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12687 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12690 if (TREE_CODE (type) == ARRAY_TYPE)
12692 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12694 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12697 else if (TREE_CODE (type) == COMPLEX_TYPE)
12699 if (TYPE_MODE (type) == DCmode && align < 64)
12701 if (TYPE_MODE (type) == XCmode && align < 128)
12704 else if ((TREE_CODE (type) == RECORD_TYPE
12705 || TREE_CODE (type) == UNION_TYPE
12706 || TREE_CODE (type) == QUAL_UNION_TYPE)
12707 && TYPE_FIELDS (type))
12709 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12711 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12714 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12715 || TREE_CODE (type) == INTEGER_TYPE)
12718 if (TYPE_MODE (type) == DFmode && align < 64)
12720 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12726 /* Emit RTL insns to initialize the variable parts of a trampoline.
12727 FNADDR is an RTX for the address of the function's pure code.
12728 CXT is an RTX for the static chain value for the function. */
12730 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12734 /* Compute offset from the end of the jmp to the target function. */
12735 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12736 plus_constant (tramp, 10),
12737 NULL_RTX, 1, OPTAB_DIRECT);
12738 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12739 gen_int_mode (0xb9, QImode));
12740 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12741 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12742 gen_int_mode (0xe9, QImode));
12743 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12748 /* Try to load address using shorter movl instead of movabs.
12749 We may want to support movq for kernel mode, but kernel does not use
12750 trampolines at the moment. */
12751 if (x86_64_zero_extended_value (fnaddr))
12753 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12754 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12755 gen_int_mode (0xbb41, HImode));
12756 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12757 gen_lowpart (SImode, fnaddr));
12762 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12763 gen_int_mode (0xbb49, HImode));
12764 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12768 /* Load static chain using movabs to r10. */
12769 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12770 gen_int_mode (0xba49, HImode));
12771 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12774 /* Jump to the r11 */
12775 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12776 gen_int_mode (0xff49, HImode));
12777 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12778 gen_int_mode (0xe3, QImode));
12780 if (offset > TRAMPOLINE_SIZE)
12784 #ifdef TRANSFER_FROM_TRAMPOLINE
12785 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12786 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12790 #define def_builtin(MASK, NAME, TYPE, CODE) \
12792 if ((MASK) & target_flags \
12793 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12794 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12795 NULL, NULL_TREE); \
12798 struct builtin_description
12800 const unsigned int mask;
12801 const enum insn_code icode;
12802 const char *const name;
12803 const enum ix86_builtins code;
12804 const enum rtx_code comparison;
12805 const unsigned int flag;
12808 static const struct builtin_description bdesc_comi[] =
12810 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12811 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12812 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12813 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12814 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12815 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12816 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12817 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12818 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12819 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12820 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12821 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12822 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12825 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12826 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12827 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12829 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12832 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12833 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12836 static const struct builtin_description bdesc_2arg[] =
12839 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12840 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12841 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12842 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12843 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12844 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12845 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12849 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12850 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12851 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12852 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12853 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12854 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12855 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12856 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12857 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12858 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12859 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12860 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12861 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12862 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12863 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12864 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12865 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12866 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12867 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12869 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12870 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12871 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12872 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12874 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12875 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12876 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12877 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12879 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12880 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12881 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12882 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12883 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12886 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12887 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12888 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12889 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12890 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12891 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12893 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12895 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12896 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12897 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12898 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12899 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12900 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12901 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12902 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12905 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12906 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12913 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12917 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12918 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12919 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12920 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12921 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12923 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12924 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12925 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12926 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12928 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12929 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12930 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12936 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12937 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12940 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12941 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12942 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12944 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12945 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12947 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12948 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12949 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12951 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12952 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12953 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12954 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12955 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12956 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12958 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12959 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12960 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12961 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12964 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12977 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12978 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12979 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12980 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12981 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12982 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12983 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12984 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12985 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12986 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12987 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12988 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12989 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12990 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12991 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12992 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12993 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12994 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12995 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12997 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13021 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13022 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13023 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13024 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13025 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13026 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13027 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13028 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13074 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13075 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13078 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13079 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13092 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13093 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13094 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13098 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13099 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13100 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13101 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13102 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13103 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13106 static const struct builtin_description bdesc_1arg[] =
13108 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13109 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13111 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13112 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13113 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13115 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13116 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13117 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13118 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13119 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13120 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13127 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13134 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13140 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13141 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13142 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13145 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13146 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13147 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13149 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13152 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13153 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13154 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13158 ix86_init_builtins (void)
13161 ix86_init_mmx_sse_builtins ();
13164 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13165 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13168 ix86_init_mmx_sse_builtins (void)
13170 const struct builtin_description * d;
13173 tree pchar_type_node = build_pointer_type (char_type_node);
13174 tree pcchar_type_node = build_pointer_type (
13175 build_type_variant (char_type_node, 1, 0));
13176 tree pfloat_type_node = build_pointer_type (float_type_node);
13177 tree pcfloat_type_node = build_pointer_type (
13178 build_type_variant (float_type_node, 1, 0));
13179 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13180 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13181 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13184 tree int_ftype_v4sf_v4sf
13185 = build_function_type_list (integer_type_node,
13186 V4SF_type_node, V4SF_type_node, NULL_TREE);
13187 tree v4si_ftype_v4sf_v4sf
13188 = build_function_type_list (V4SI_type_node,
13189 V4SF_type_node, V4SF_type_node, NULL_TREE);
13190 /* MMX/SSE/integer conversions. */
13191 tree int_ftype_v4sf
13192 = build_function_type_list (integer_type_node,
13193 V4SF_type_node, NULL_TREE);
13194 tree int64_ftype_v4sf
13195 = build_function_type_list (long_long_integer_type_node,
13196 V4SF_type_node, NULL_TREE);
13197 tree int_ftype_v8qi
13198 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13199 tree v4sf_ftype_v4sf_int
13200 = build_function_type_list (V4SF_type_node,
13201 V4SF_type_node, integer_type_node, NULL_TREE);
13202 tree v4sf_ftype_v4sf_int64
13203 = build_function_type_list (V4SF_type_node,
13204 V4SF_type_node, long_long_integer_type_node,
13206 tree v4sf_ftype_v4sf_v2si
13207 = build_function_type_list (V4SF_type_node,
13208 V4SF_type_node, V2SI_type_node, NULL_TREE);
13209 tree int_ftype_v4hi_int
13210 = build_function_type_list (integer_type_node,
13211 V4HI_type_node, integer_type_node, NULL_TREE);
13212 tree v4hi_ftype_v4hi_int_int
13213 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13214 integer_type_node, integer_type_node,
13216 /* Miscellaneous. */
13217 tree v8qi_ftype_v4hi_v4hi
13218 = build_function_type_list (V8QI_type_node,
13219 V4HI_type_node, V4HI_type_node, NULL_TREE);
13220 tree v4hi_ftype_v2si_v2si
13221 = build_function_type_list (V4HI_type_node,
13222 V2SI_type_node, V2SI_type_node, NULL_TREE);
13223 tree v4sf_ftype_v4sf_v4sf_int
13224 = build_function_type_list (V4SF_type_node,
13225 V4SF_type_node, V4SF_type_node,
13226 integer_type_node, NULL_TREE);
13227 tree v2si_ftype_v4hi_v4hi
13228 = build_function_type_list (V2SI_type_node,
13229 V4HI_type_node, V4HI_type_node, NULL_TREE);
13230 tree v4hi_ftype_v4hi_int
13231 = build_function_type_list (V4HI_type_node,
13232 V4HI_type_node, integer_type_node, NULL_TREE);
13233 tree v4hi_ftype_v4hi_di
13234 = build_function_type_list (V4HI_type_node,
13235 V4HI_type_node, long_long_unsigned_type_node,
13237 tree v2si_ftype_v2si_di
13238 = build_function_type_list (V2SI_type_node,
13239 V2SI_type_node, long_long_unsigned_type_node,
13241 tree void_ftype_void
13242 = build_function_type (void_type_node, void_list_node);
13243 tree void_ftype_unsigned
13244 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13245 tree void_ftype_unsigned_unsigned
13246 = build_function_type_list (void_type_node, unsigned_type_node,
13247 unsigned_type_node, NULL_TREE);
13248 tree void_ftype_pcvoid_unsigned_unsigned
13249 = build_function_type_list (void_type_node, const_ptr_type_node,
13250 unsigned_type_node, unsigned_type_node,
13252 tree unsigned_ftype_void
13253 = build_function_type (unsigned_type_node, void_list_node);
13255 = build_function_type (long_long_unsigned_type_node, void_list_node);
13256 tree v4sf_ftype_void
13257 = build_function_type (V4SF_type_node, void_list_node);
13258 tree v2si_ftype_v4sf
13259 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13260 /* Loads/stores. */
13261 tree void_ftype_v8qi_v8qi_pchar
13262 = build_function_type_list (void_type_node,
13263 V8QI_type_node, V8QI_type_node,
13264 pchar_type_node, NULL_TREE);
13265 tree v4sf_ftype_pcfloat
13266 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13267 /* @@@ the type is bogus */
13268 tree v4sf_ftype_v4sf_pv2si
13269 = build_function_type_list (V4SF_type_node,
13270 V4SF_type_node, pv2si_type_node, NULL_TREE);
13271 tree void_ftype_pv2si_v4sf
13272 = build_function_type_list (void_type_node,
13273 pv2si_type_node, V4SF_type_node, NULL_TREE);
13274 tree void_ftype_pfloat_v4sf
13275 = build_function_type_list (void_type_node,
13276 pfloat_type_node, V4SF_type_node, NULL_TREE);
13277 tree void_ftype_pdi_di
13278 = build_function_type_list (void_type_node,
13279 pdi_type_node, long_long_unsigned_type_node,
13281 tree void_ftype_pv2di_v2di
13282 = build_function_type_list (void_type_node,
13283 pv2di_type_node, V2DI_type_node, NULL_TREE);
13284 /* Normal vector unops. */
13285 tree v4sf_ftype_v4sf
13286 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13288 /* Normal vector binops. */
13289 tree v4sf_ftype_v4sf_v4sf
13290 = build_function_type_list (V4SF_type_node,
13291 V4SF_type_node, V4SF_type_node, NULL_TREE);
13292 tree v8qi_ftype_v8qi_v8qi
13293 = build_function_type_list (V8QI_type_node,
13294 V8QI_type_node, V8QI_type_node, NULL_TREE);
13295 tree v4hi_ftype_v4hi_v4hi
13296 = build_function_type_list (V4HI_type_node,
13297 V4HI_type_node, V4HI_type_node, NULL_TREE);
13298 tree v2si_ftype_v2si_v2si
13299 = build_function_type_list (V2SI_type_node,
13300 V2SI_type_node, V2SI_type_node, NULL_TREE);
13301 tree di_ftype_di_di
13302 = build_function_type_list (long_long_unsigned_type_node,
13303 long_long_unsigned_type_node,
13304 long_long_unsigned_type_node, NULL_TREE);
13306 tree v2si_ftype_v2sf
13307 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13308 tree v2sf_ftype_v2si
13309 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13310 tree v2si_ftype_v2si
13311 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13312 tree v2sf_ftype_v2sf
13313 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13314 tree v2sf_ftype_v2sf_v2sf
13315 = build_function_type_list (V2SF_type_node,
13316 V2SF_type_node, V2SF_type_node, NULL_TREE);
13317 tree v2si_ftype_v2sf_v2sf
13318 = build_function_type_list (V2SI_type_node,
13319 V2SF_type_node, V2SF_type_node, NULL_TREE);
13320 tree pint_type_node = build_pointer_type (integer_type_node);
13321 tree pcint_type_node = build_pointer_type (
13322 build_type_variant (integer_type_node, 1, 0));
13323 tree pdouble_type_node = build_pointer_type (double_type_node);
13324 tree pcdouble_type_node = build_pointer_type (
13325 build_type_variant (double_type_node, 1, 0));
13326 tree int_ftype_v2df_v2df
13327 = build_function_type_list (integer_type_node,
13328 V2DF_type_node, V2DF_type_node, NULL_TREE);
13331 = build_function_type (intTI_type_node, void_list_node);
13332 tree v2di_ftype_void
13333 = build_function_type (V2DI_type_node, void_list_node);
13334 tree ti_ftype_ti_ti
13335 = build_function_type_list (intTI_type_node,
13336 intTI_type_node, intTI_type_node, NULL_TREE);
13337 tree void_ftype_pcvoid
13338 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13340 = build_function_type_list (V2DI_type_node,
13341 long_long_unsigned_type_node, NULL_TREE);
13343 = build_function_type_list (long_long_unsigned_type_node,
13344 V2DI_type_node, NULL_TREE);
13345 tree v4sf_ftype_v4si
13346 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13347 tree v4si_ftype_v4sf
13348 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13349 tree v2df_ftype_v4si
13350 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13351 tree v4si_ftype_v2df
13352 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13353 tree v2si_ftype_v2df
13354 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13355 tree v4sf_ftype_v2df
13356 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13357 tree v2df_ftype_v2si
13358 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13359 tree v2df_ftype_v4sf
13360 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13361 tree int_ftype_v2df
13362 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13363 tree int64_ftype_v2df
13364 = build_function_type_list (long_long_integer_type_node,
13365 V2DF_type_node, NULL_TREE);
13366 tree v2df_ftype_v2df_int
13367 = build_function_type_list (V2DF_type_node,
13368 V2DF_type_node, integer_type_node, NULL_TREE);
13369 tree v2df_ftype_v2df_int64
13370 = build_function_type_list (V2DF_type_node,
13371 V2DF_type_node, long_long_integer_type_node,
13373 tree v4sf_ftype_v4sf_v2df
13374 = build_function_type_list (V4SF_type_node,
13375 V4SF_type_node, V2DF_type_node, NULL_TREE);
13376 tree v2df_ftype_v2df_v4sf
13377 = build_function_type_list (V2DF_type_node,
13378 V2DF_type_node, V4SF_type_node, NULL_TREE);
13379 tree v2df_ftype_v2df_v2df_int
13380 = build_function_type_list (V2DF_type_node,
13381 V2DF_type_node, V2DF_type_node,
13384 tree v2df_ftype_v2df_pv2si
13385 = build_function_type_list (V2DF_type_node,
13386 V2DF_type_node, pv2si_type_node, NULL_TREE);
13387 tree void_ftype_pv2si_v2df
13388 = build_function_type_list (void_type_node,
13389 pv2si_type_node, V2DF_type_node, NULL_TREE);
13390 tree void_ftype_pdouble_v2df
13391 = build_function_type_list (void_type_node,
13392 pdouble_type_node, V2DF_type_node, NULL_TREE);
13393 tree void_ftype_pint_int
13394 = build_function_type_list (void_type_node,
13395 pint_type_node, integer_type_node, NULL_TREE);
13396 tree void_ftype_v16qi_v16qi_pchar
13397 = build_function_type_list (void_type_node,
13398 V16QI_type_node, V16QI_type_node,
13399 pchar_type_node, NULL_TREE);
13400 tree v2df_ftype_pcdouble
13401 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13402 tree v2df_ftype_v2df_v2df
13403 = build_function_type_list (V2DF_type_node,
13404 V2DF_type_node, V2DF_type_node, NULL_TREE);
13405 tree v16qi_ftype_v16qi_v16qi
13406 = build_function_type_list (V16QI_type_node,
13407 V16QI_type_node, V16QI_type_node, NULL_TREE);
13408 tree v8hi_ftype_v8hi_v8hi
13409 = build_function_type_list (V8HI_type_node,
13410 V8HI_type_node, V8HI_type_node, NULL_TREE);
13411 tree v4si_ftype_v4si_v4si
13412 = build_function_type_list (V4SI_type_node,
13413 V4SI_type_node, V4SI_type_node, NULL_TREE);
13414 tree v2di_ftype_v2di_v2di
13415 = build_function_type_list (V2DI_type_node,
13416 V2DI_type_node, V2DI_type_node, NULL_TREE);
13417 tree v2di_ftype_v2df_v2df
13418 = build_function_type_list (V2DI_type_node,
13419 V2DF_type_node, V2DF_type_node, NULL_TREE);
13420 tree v2df_ftype_v2df
13421 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13422 tree v2df_ftype_double
13423 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13424 tree v2df_ftype_double_double
13425 = build_function_type_list (V2DF_type_node,
13426 double_type_node, double_type_node, NULL_TREE);
13427 tree int_ftype_v8hi_int
13428 = build_function_type_list (integer_type_node,
13429 V8HI_type_node, integer_type_node, NULL_TREE);
13430 tree v8hi_ftype_v8hi_int_int
13431 = build_function_type_list (V8HI_type_node,
13432 V8HI_type_node, integer_type_node,
13433 integer_type_node, NULL_TREE);
13434 tree v2di_ftype_v2di_int
13435 = build_function_type_list (V2DI_type_node,
13436 V2DI_type_node, integer_type_node, NULL_TREE);
13437 tree v4si_ftype_v4si_int
13438 = build_function_type_list (V4SI_type_node,
13439 V4SI_type_node, integer_type_node, NULL_TREE);
13440 tree v8hi_ftype_v8hi_int
13441 = build_function_type_list (V8HI_type_node,
13442 V8HI_type_node, integer_type_node, NULL_TREE);
13443 tree v8hi_ftype_v8hi_v2di
13444 = build_function_type_list (V8HI_type_node,
13445 V8HI_type_node, V2DI_type_node, NULL_TREE);
13446 tree v4si_ftype_v4si_v2di
13447 = build_function_type_list (V4SI_type_node,
13448 V4SI_type_node, V2DI_type_node, NULL_TREE);
13449 tree v4si_ftype_v8hi_v8hi
13450 = build_function_type_list (V4SI_type_node,
13451 V8HI_type_node, V8HI_type_node, NULL_TREE);
13452 tree di_ftype_v8qi_v8qi
13453 = build_function_type_list (long_long_unsigned_type_node,
13454 V8QI_type_node, V8QI_type_node, NULL_TREE);
13455 tree v2di_ftype_v16qi_v16qi
13456 = build_function_type_list (V2DI_type_node,
13457 V16QI_type_node, V16QI_type_node, NULL_TREE);
13458 tree int_ftype_v16qi
13459 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13460 tree v16qi_ftype_pcchar
13461 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13462 tree void_ftype_pchar_v16qi
13463 = build_function_type_list (void_type_node,
13464 pchar_type_node, V16QI_type_node, NULL_TREE);
13465 tree v4si_ftype_pcint
13466 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13467 tree void_ftype_pcint_v4si
13468 = build_function_type_list (void_type_node,
13469 pcint_type_node, V4SI_type_node, NULL_TREE);
13470 tree v2di_ftype_v2di
13471 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13474 tree float128_type;
13476 /* The __float80 type. */
13477 if (TYPE_MODE (long_double_type_node) == XFmode)
13478 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13482 /* The __float80 type. */
13483 float80_type = make_node (REAL_TYPE);
13484 TYPE_PRECISION (float80_type) = 96;
13485 layout_type (float80_type);
13486 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13489 float128_type = make_node (REAL_TYPE);
13490 TYPE_PRECISION (float128_type) = 128;
13491 layout_type (float128_type);
13492 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13494 /* Add all builtins that are more or less simple operations on two
13496 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13498 /* Use one of the operands; the target can have a different mode for
13499 mask-generating compares. */
13500 enum machine_mode mode;
13505 mode = insn_data[d->icode].operand[1].mode;
13510 type = v16qi_ftype_v16qi_v16qi;
13513 type = v8hi_ftype_v8hi_v8hi;
13516 type = v4si_ftype_v4si_v4si;
13519 type = v2di_ftype_v2di_v2di;
13522 type = v2df_ftype_v2df_v2df;
13525 type = ti_ftype_ti_ti;
13528 type = v4sf_ftype_v4sf_v4sf;
13531 type = v8qi_ftype_v8qi_v8qi;
13534 type = v4hi_ftype_v4hi_v4hi;
13537 type = v2si_ftype_v2si_v2si;
13540 type = di_ftype_di_di;
13547 /* Override for comparisons. */
13548 if (d->icode == CODE_FOR_maskcmpv4sf3
13549 || d->icode == CODE_FOR_maskncmpv4sf3
13550 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13551 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13552 type = v4si_ftype_v4sf_v4sf;
13554 if (d->icode == CODE_FOR_maskcmpv2df3
13555 || d->icode == CODE_FOR_maskncmpv2df3
13556 || d->icode == CODE_FOR_vmmaskcmpv2df3
13557 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13558 type = v2di_ftype_v2df_v2df;
13560 def_builtin (d->mask, d->name, type, d->code);
13563 /* Add the remaining MMX insns with somewhat more complicated types. */
13564 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13565 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13566 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13567 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13568 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13570 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13571 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13572 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13574 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13575 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13577 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13578 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13580 /* comi/ucomi insns. */
13581 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13582 if (d->mask == MASK_SSE2)
13583 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13585 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13587 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13588 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13589 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13591 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13592 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13593 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13595 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13596 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13597 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13598 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13599 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13600 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13601 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13603 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13604 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13606 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13608 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13609 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13611 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13612 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13613 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13615 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13616 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13617 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13618 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13620 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13621 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13622 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13623 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13625 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13627 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13629 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13630 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13631 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13632 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13633 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13634 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13636 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13638 /* Original 3DNow! */
13639 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13640 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13641 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13642 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13643 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13644 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13645 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13647 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13648 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13649 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13650 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13651 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13652 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13653 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13654 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13655 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13656 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13657 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13658 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13660 /* 3DNow! extension as used in the Athlon CPU. */
13661 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13662 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13663 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13664 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13665 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13666 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13668 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13719 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13720 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13727 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13734 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13739 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13751 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13753 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13758 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13759 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13761 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13764 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13765 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13766 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13767 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13769 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13770 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13771 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13772 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13774 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13775 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13777 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13779 /* Prescott New Instructions. */
13780 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13781 void_ftype_pcvoid_unsigned_unsigned,
13782 IX86_BUILTIN_MONITOR);
13783 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13784 void_ftype_unsigned_unsigned,
13785 IX86_BUILTIN_MWAIT);
13786 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13788 IX86_BUILTIN_MOVSHDUP);
13789 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13791 IX86_BUILTIN_MOVSLDUP);
13792 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13793 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13794 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13795 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13796 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13797 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13800 /* Errors in the source file can cause expand_expr to return const0_rtx
13801 where we expect a vector. To avoid crashing, use one of the vector
13802 clear instructions. */
13804 safe_vector_operand (rtx x, enum machine_mode mode)
13806 if (x != const0_rtx)
13808 x = gen_reg_rtx (mode);
13810 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13811 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13812 : gen_rtx_SUBREG (DImode, x, 0)));
13814 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13815 : gen_rtx_SUBREG (V4SFmode, x, 0),
13816 CONST0_RTX (V4SFmode)));
13820 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13823 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13826 tree arg0 = TREE_VALUE (arglist);
13827 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13828 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13829 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13830 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13831 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13832 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13834 if (VECTOR_MODE_P (mode0))
13835 op0 = safe_vector_operand (op0, mode0);
13836 if (VECTOR_MODE_P (mode1))
13837 op1 = safe_vector_operand (op1, mode1);
13840 || GET_MODE (target) != tmode
13841 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13842 target = gen_reg_rtx (tmode);
13844 if (GET_MODE (op1) == SImode && mode1 == TImode)
13846 rtx x = gen_reg_rtx (V4SImode);
13847 emit_insn (gen_sse2_loadd (x, op1));
13848 op1 = gen_lowpart (TImode, x);
13851 /* In case the insn wants input operands in modes different from
13852 the result, abort. */
13853 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13854 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13858 op0 = copy_to_mode_reg (mode0, op0);
13859 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13860 op1 = copy_to_mode_reg (mode1, op1);
13862 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13863 yet one of the two must not be a memory. This is normally enforced
13864 by expanders, but we didn't bother to create one here. */
13865 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13866 op0 = copy_to_mode_reg (mode0, op0);
13868 pat = GEN_FCN (icode) (target, op0, op1);
13875 /* Subroutine of ix86_expand_builtin to take care of stores. */
13878 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13881 tree arg0 = TREE_VALUE (arglist);
13882 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13883 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13884 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13885 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13886 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13888 if (VECTOR_MODE_P (mode1))
13889 op1 = safe_vector_operand (op1, mode1);
13891 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13892 op1 = copy_to_mode_reg (mode1, op1);
13894 pat = GEN_FCN (icode) (op0, op1);
13900 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13903 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13904 rtx target, int do_load)
13907 tree arg0 = TREE_VALUE (arglist);
13908 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13909 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13910 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13913 || GET_MODE (target) != tmode
13914 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13915 target = gen_reg_rtx (tmode);
13917 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13920 if (VECTOR_MODE_P (mode0))
13921 op0 = safe_vector_operand (op0, mode0);
13923 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13924 op0 = copy_to_mode_reg (mode0, op0);
13927 pat = GEN_FCN (icode) (target, op0);
13934 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13935 sqrtss, rsqrtss, rcpss. */
13938 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13941 tree arg0 = TREE_VALUE (arglist);
13942 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13943 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13944 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13947 || GET_MODE (target) != tmode
13948 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13949 target = gen_reg_rtx (tmode);
13951 if (VECTOR_MODE_P (mode0))
13952 op0 = safe_vector_operand (op0, mode0);
13954 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13955 op0 = copy_to_mode_reg (mode0, op0);
13958 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13959 op1 = copy_to_mode_reg (mode0, op1);
13961 pat = GEN_FCN (icode) (target, op0, op1);
13968 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13971 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13975 tree arg0 = TREE_VALUE (arglist);
13976 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13977 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13978 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13980 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13981 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13982 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13983 enum rtx_code comparison = d->comparison;
13985 if (VECTOR_MODE_P (mode0))
13986 op0 = safe_vector_operand (op0, mode0);
13987 if (VECTOR_MODE_P (mode1))
13988 op1 = safe_vector_operand (op1, mode1);
13990 /* Swap operands if we have a comparison that isn't available in
13994 rtx tmp = gen_reg_rtx (mode1);
13995 emit_move_insn (tmp, op1);
14001 || GET_MODE (target) != tmode
14002 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14003 target = gen_reg_rtx (tmode);
14005 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14006 op0 = copy_to_mode_reg (mode0, op0);
14007 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14008 op1 = copy_to_mode_reg (mode1, op1);
14010 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14011 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14018 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14021 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14025 tree arg0 = TREE_VALUE (arglist);
14026 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14027 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14028 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14030 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14031 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14032 enum rtx_code comparison = d->comparison;
14034 if (VECTOR_MODE_P (mode0))
14035 op0 = safe_vector_operand (op0, mode0);
14036 if (VECTOR_MODE_P (mode1))
14037 op1 = safe_vector_operand (op1, mode1);
14039 /* Swap operands if we have a comparison that isn't available in
14048 target = gen_reg_rtx (SImode);
14049 emit_move_insn (target, const0_rtx);
14050 target = gen_rtx_SUBREG (QImode, target, 0);
14052 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14053 op0 = copy_to_mode_reg (mode0, op0);
14054 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14055 op1 = copy_to_mode_reg (mode1, op1);
14057 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14058 pat = GEN_FCN (d->icode) (op0, op1);
14062 emit_insn (gen_rtx_SET (VOIDmode,
14063 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14064 gen_rtx_fmt_ee (comparison, QImode,
14068 return SUBREG_REG (target);
14071 /* Expand an expression EXP that calls a built-in function,
14072 with result going to TARGET if that's convenient
14073 (and in mode MODE if that's convenient).
14074 SUBTARGET may be used as the target for computing one of EXP's operands.
14075 IGNORE is nonzero if the value is to be ignored. */
14078 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14079 enum machine_mode mode ATTRIBUTE_UNUSED,
14080 int ignore ATTRIBUTE_UNUSED)
14082 const struct builtin_description *d;
14084 enum insn_code icode;
14085 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14086 tree arglist = TREE_OPERAND (exp, 1);
14087 tree arg0, arg1, arg2;
14088 rtx op0, op1, op2, pat;
14089 enum machine_mode tmode, mode0, mode1, mode2;
14090 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14094 case IX86_BUILTIN_EMMS:
14095 emit_insn (gen_emms ());
14098 case IX86_BUILTIN_SFENCE:
14099 emit_insn (gen_sfence ());
14102 case IX86_BUILTIN_PEXTRW:
14103 case IX86_BUILTIN_PEXTRW128:
14104 icode = (fcode == IX86_BUILTIN_PEXTRW
14105 ? CODE_FOR_mmx_pextrw
14106 : CODE_FOR_sse2_pextrw);
14107 arg0 = TREE_VALUE (arglist);
14108 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14109 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14110 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14111 tmode = insn_data[icode].operand[0].mode;
14112 mode0 = insn_data[icode].operand[1].mode;
14113 mode1 = insn_data[icode].operand[2].mode;
14115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14116 op0 = copy_to_mode_reg (mode0, op0);
14117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14119 error ("selector must be an integer constant in the range 0..%i",
14120 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14121 return gen_reg_rtx (tmode);
14124 || GET_MODE (target) != tmode
14125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14126 target = gen_reg_rtx (tmode);
14127 pat = GEN_FCN (icode) (target, op0, op1);
14133 case IX86_BUILTIN_PINSRW:
14134 case IX86_BUILTIN_PINSRW128:
14135 icode = (fcode == IX86_BUILTIN_PINSRW
14136 ? CODE_FOR_mmx_pinsrw
14137 : CODE_FOR_sse2_pinsrw);
14138 arg0 = TREE_VALUE (arglist);
14139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14140 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14141 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14142 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14143 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14144 tmode = insn_data[icode].operand[0].mode;
14145 mode0 = insn_data[icode].operand[1].mode;
14146 mode1 = insn_data[icode].operand[2].mode;
14147 mode2 = insn_data[icode].operand[3].mode;
14149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14150 op0 = copy_to_mode_reg (mode0, op0);
14151 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14152 op1 = copy_to_mode_reg (mode1, op1);
14153 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14155 error ("selector must be an integer constant in the range 0..%i",
14156 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14160 || GET_MODE (target) != tmode
14161 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14162 target = gen_reg_rtx (tmode);
14163 pat = GEN_FCN (icode) (target, op0, op1, op2);
14169 case IX86_BUILTIN_MASKMOVQ:
14170 case IX86_BUILTIN_MASKMOVDQU:
14171 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14172 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14173 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14174 : CODE_FOR_sse2_maskmovdqu));
14175 /* Note the arg order is different from the operand order. */
14176 arg1 = TREE_VALUE (arglist);
14177 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14178 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14179 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14180 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14181 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14182 mode0 = insn_data[icode].operand[0].mode;
14183 mode1 = insn_data[icode].operand[1].mode;
14184 mode2 = insn_data[icode].operand[2].mode;
14186 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14187 op0 = copy_to_mode_reg (mode0, op0);
14188 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14189 op1 = copy_to_mode_reg (mode1, op1);
14190 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14191 op2 = copy_to_mode_reg (mode2, op2);
14192 pat = GEN_FCN (icode) (op0, op1, op2);
14198 case IX86_BUILTIN_SQRTSS:
14199 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14200 case IX86_BUILTIN_RSQRTSS:
14201 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14202 case IX86_BUILTIN_RCPSS:
14203 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14205 case IX86_BUILTIN_LOADAPS:
14206 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14208 case IX86_BUILTIN_LOADUPS:
14209 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14211 case IX86_BUILTIN_STOREAPS:
14212 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14214 case IX86_BUILTIN_STOREUPS:
14215 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14217 case IX86_BUILTIN_LOADSS:
14218 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14220 case IX86_BUILTIN_STORESS:
14221 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14223 case IX86_BUILTIN_LOADHPS:
14224 case IX86_BUILTIN_LOADLPS:
14225 case IX86_BUILTIN_LOADHPD:
14226 case IX86_BUILTIN_LOADLPD:
14227 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14228 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14229 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14230 : CODE_FOR_sse2_movsd);
14231 arg0 = TREE_VALUE (arglist);
14232 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14233 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14234 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14235 tmode = insn_data[icode].operand[0].mode;
14236 mode0 = insn_data[icode].operand[1].mode;
14237 mode1 = insn_data[icode].operand[2].mode;
14239 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14240 op0 = copy_to_mode_reg (mode0, op0);
14241 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14243 || GET_MODE (target) != tmode
14244 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14245 target = gen_reg_rtx (tmode);
14246 pat = GEN_FCN (icode) (target, op0, op1);
14252 case IX86_BUILTIN_STOREHPS:
14253 case IX86_BUILTIN_STORELPS:
14254 case IX86_BUILTIN_STOREHPD:
14255 case IX86_BUILTIN_STORELPD:
14256 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14257 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14258 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14259 : CODE_FOR_sse2_movsd);
14260 arg0 = TREE_VALUE (arglist);
14261 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14262 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14263 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14264 mode0 = insn_data[icode].operand[1].mode;
14265 mode1 = insn_data[icode].operand[2].mode;
14267 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14268 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14269 op1 = copy_to_mode_reg (mode1, op1);
14271 pat = GEN_FCN (icode) (op0, op0, op1);
14277 case IX86_BUILTIN_MOVNTPS:
14278 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14279 case IX86_BUILTIN_MOVNTQ:
14280 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14282 case IX86_BUILTIN_LDMXCSR:
14283 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14284 target = assign_386_stack_local (SImode, 0);
14285 emit_move_insn (target, op0);
14286 emit_insn (gen_ldmxcsr (target));
14289 case IX86_BUILTIN_STMXCSR:
14290 target = assign_386_stack_local (SImode, 0);
14291 emit_insn (gen_stmxcsr (target));
14292 return copy_to_mode_reg (SImode, target);
14294 case IX86_BUILTIN_SHUFPS:
14295 case IX86_BUILTIN_SHUFPD:
14296 icode = (fcode == IX86_BUILTIN_SHUFPS
14297 ? CODE_FOR_sse_shufps
14298 : CODE_FOR_sse2_shufpd);
14299 arg0 = TREE_VALUE (arglist);
14300 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14301 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14302 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14303 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14304 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14305 tmode = insn_data[icode].operand[0].mode;
14306 mode0 = insn_data[icode].operand[1].mode;
14307 mode1 = insn_data[icode].operand[2].mode;
14308 mode2 = insn_data[icode].operand[3].mode;
14310 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14311 op0 = copy_to_mode_reg (mode0, op0);
14312 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14313 op1 = copy_to_mode_reg (mode1, op1);
14314 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14316 /* @@@ better error message */
14317 error ("mask must be an immediate");
14318 return gen_reg_rtx (tmode);
14321 || GET_MODE (target) != tmode
14322 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14323 target = gen_reg_rtx (tmode);
14324 pat = GEN_FCN (icode) (target, op0, op1, op2);
14330 case IX86_BUILTIN_PSHUFW:
14331 case IX86_BUILTIN_PSHUFD:
14332 case IX86_BUILTIN_PSHUFHW:
14333 case IX86_BUILTIN_PSHUFLW:
14334 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14335 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14336 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14337 : CODE_FOR_mmx_pshufw);
14338 arg0 = TREE_VALUE (arglist);
14339 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14340 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14341 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14342 tmode = insn_data[icode].operand[0].mode;
14343 mode1 = insn_data[icode].operand[1].mode;
14344 mode2 = insn_data[icode].operand[2].mode;
14346 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14347 op0 = copy_to_mode_reg (mode1, op0);
14348 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14350 /* @@@ better error message */
14351 error ("mask must be an immediate");
14355 || GET_MODE (target) != tmode
14356 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14357 target = gen_reg_rtx (tmode);
14358 pat = GEN_FCN (icode) (target, op0, op1);
14364 case IX86_BUILTIN_PSLLDQI128:
14365 case IX86_BUILTIN_PSRLDQI128:
14366 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14367 : CODE_FOR_sse2_lshrti3);
14368 arg0 = TREE_VALUE (arglist);
14369 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14370 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14371 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14372 tmode = insn_data[icode].operand[0].mode;
14373 mode1 = insn_data[icode].operand[1].mode;
14374 mode2 = insn_data[icode].operand[2].mode;
14376 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14378 op0 = copy_to_reg (op0);
14379 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14381 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14383 error ("shift must be an immediate");
14386 target = gen_reg_rtx (V2DImode);
14387 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14393 case IX86_BUILTIN_FEMMS:
14394 emit_insn (gen_femms ());
14397 case IX86_BUILTIN_PAVGUSB:
14398 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14400 case IX86_BUILTIN_PF2ID:
14401 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14403 case IX86_BUILTIN_PFACC:
14404 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14406 case IX86_BUILTIN_PFADD:
14407 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14409 case IX86_BUILTIN_PFCMPEQ:
14410 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14412 case IX86_BUILTIN_PFCMPGE:
14413 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14415 case IX86_BUILTIN_PFCMPGT:
14416 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14418 case IX86_BUILTIN_PFMAX:
14419 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14421 case IX86_BUILTIN_PFMIN:
14422 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14424 case IX86_BUILTIN_PFMUL:
14425 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14427 case IX86_BUILTIN_PFRCP:
14428 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14430 case IX86_BUILTIN_PFRCPIT1:
14431 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14433 case IX86_BUILTIN_PFRCPIT2:
14434 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14436 case IX86_BUILTIN_PFRSQIT1:
14437 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14439 case IX86_BUILTIN_PFRSQRT:
14440 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14442 case IX86_BUILTIN_PFSUB:
14443 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14445 case IX86_BUILTIN_PFSUBR:
14446 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14448 case IX86_BUILTIN_PI2FD:
14449 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14451 case IX86_BUILTIN_PMULHRW:
14452 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14454 case IX86_BUILTIN_PF2IW:
14455 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14457 case IX86_BUILTIN_PFNACC:
14458 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14460 case IX86_BUILTIN_PFPNACC:
14461 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14463 case IX86_BUILTIN_PI2FW:
14464 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14466 case IX86_BUILTIN_PSWAPDSI:
14467 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14469 case IX86_BUILTIN_PSWAPDSF:
14470 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14472 case IX86_BUILTIN_SSE_ZERO:
14473 target = gen_reg_rtx (V4SFmode);
14474 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14477 case IX86_BUILTIN_MMX_ZERO:
14478 target = gen_reg_rtx (DImode);
14479 emit_insn (gen_mmx_clrdi (target));
14482 case IX86_BUILTIN_CLRTI:
14483 target = gen_reg_rtx (V2DImode);
14484 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14488 case IX86_BUILTIN_SQRTSD:
14489 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14490 case IX86_BUILTIN_LOADAPD:
14491 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14492 case IX86_BUILTIN_LOADUPD:
14493 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14495 case IX86_BUILTIN_STOREAPD:
14496 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14497 case IX86_BUILTIN_STOREUPD:
14498 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14500 case IX86_BUILTIN_LOADSD:
14501 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14503 case IX86_BUILTIN_STORESD:
14504 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14506 case IX86_BUILTIN_SETPD1:
14507 target = assign_386_stack_local (DFmode, 0);
14508 arg0 = TREE_VALUE (arglist);
14509 emit_move_insn (adjust_address (target, DFmode, 0),
14510 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14511 op0 = gen_reg_rtx (V2DFmode);
14512 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14513 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14516 case IX86_BUILTIN_SETPD:
14517 target = assign_386_stack_local (V2DFmode, 0);
14518 arg0 = TREE_VALUE (arglist);
14519 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14520 emit_move_insn (adjust_address (target, DFmode, 0),
14521 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14522 emit_move_insn (adjust_address (target, DFmode, 8),
14523 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14524 op0 = gen_reg_rtx (V2DFmode);
14525 emit_insn (gen_sse2_movapd (op0, target));
14528 case IX86_BUILTIN_LOADRPD:
14529 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14530 gen_reg_rtx (V2DFmode), 1);
14531 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14534 case IX86_BUILTIN_LOADPD1:
14535 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14536 gen_reg_rtx (V2DFmode), 1);
14537 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14540 case IX86_BUILTIN_STOREPD1:
14541 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14542 case IX86_BUILTIN_STORERPD:
14543 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14545 case IX86_BUILTIN_CLRPD:
14546 target = gen_reg_rtx (V2DFmode);
14547 emit_insn (gen_sse_clrv2df (target));
14550 case IX86_BUILTIN_MFENCE:
14551 emit_insn (gen_sse2_mfence ());
14553 case IX86_BUILTIN_LFENCE:
14554 emit_insn (gen_sse2_lfence ());
14557 case IX86_BUILTIN_CLFLUSH:
14558 arg0 = TREE_VALUE (arglist);
14559 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14560 icode = CODE_FOR_sse2_clflush;
14561 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14562 op0 = copy_to_mode_reg (Pmode, op0);
14564 emit_insn (gen_sse2_clflush (op0));
14567 case IX86_BUILTIN_MOVNTPD:
14568 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14569 case IX86_BUILTIN_MOVNTDQ:
14570 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14571 case IX86_BUILTIN_MOVNTI:
14572 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14574 case IX86_BUILTIN_LOADDQA:
14575 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14576 case IX86_BUILTIN_LOADDQU:
14577 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14578 case IX86_BUILTIN_LOADD:
14579 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14581 case IX86_BUILTIN_STOREDQA:
14582 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14583 case IX86_BUILTIN_STOREDQU:
14584 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14585 case IX86_BUILTIN_STORED:
14586 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14588 case IX86_BUILTIN_MONITOR:
14589 arg0 = TREE_VALUE (arglist);
14590 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14591 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14592 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14593 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14594 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14596 op0 = copy_to_mode_reg (SImode, op0);
14598 op1 = copy_to_mode_reg (SImode, op1);
14600 op2 = copy_to_mode_reg (SImode, op2);
14601 emit_insn (gen_monitor (op0, op1, op2));
14604 case IX86_BUILTIN_MWAIT:
14605 arg0 = TREE_VALUE (arglist);
14606 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14607 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14608 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14610 op0 = copy_to_mode_reg (SImode, op0);
14612 op1 = copy_to_mode_reg (SImode, op1);
14613 emit_insn (gen_mwait (op0, op1));
14616 case IX86_BUILTIN_LOADDDUP:
14617 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14619 case IX86_BUILTIN_LDDQU:
14620 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14627 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14628 if (d->code == fcode)
14630 /* Compares are treated specially. */
14631 if (d->icode == CODE_FOR_maskcmpv4sf3
14632 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14633 || d->icode == CODE_FOR_maskncmpv4sf3
14634 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14635 || d->icode == CODE_FOR_maskcmpv2df3
14636 || d->icode == CODE_FOR_vmmaskcmpv2df3
14637 || d->icode == CODE_FOR_maskncmpv2df3
14638 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14639 return ix86_expand_sse_compare (d, arglist, target);
14641 return ix86_expand_binop_builtin (d->icode, arglist, target);
14644 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14645 if (d->code == fcode)
14646 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14648 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14649 if (d->code == fcode)
14650 return ix86_expand_sse_comi (d, arglist, target);
14652 /* @@@ Should really do something sensible here. */
14656 /* Store OPERAND to the memory after reload is completed. This means
14657 that we can't easily use assign_stack_local. */
14659 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14662 if (!reload_completed)
14664 if (TARGET_RED_ZONE)
14666 result = gen_rtx_MEM (mode,
14667 gen_rtx_PLUS (Pmode,
14669 GEN_INT (-RED_ZONE_SIZE)));
14670 emit_move_insn (result, operand);
14672 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14678 operand = gen_lowpart (DImode, operand);
14682 gen_rtx_SET (VOIDmode,
14683 gen_rtx_MEM (DImode,
14684 gen_rtx_PRE_DEC (DImode,
14685 stack_pointer_rtx)),
14691 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14700 split_di (&operand, 1, operands, operands + 1);
14702 gen_rtx_SET (VOIDmode,
14703 gen_rtx_MEM (SImode,
14704 gen_rtx_PRE_DEC (Pmode,
14705 stack_pointer_rtx)),
14708 gen_rtx_SET (VOIDmode,
14709 gen_rtx_MEM (SImode,
14710 gen_rtx_PRE_DEC (Pmode,
14711 stack_pointer_rtx)),
14716 /* It is better to store HImodes as SImodes. */
14717 if (!TARGET_PARTIAL_REG_STALL)
14718 operand = gen_lowpart (SImode, operand);
14722 gen_rtx_SET (VOIDmode,
14723 gen_rtx_MEM (GET_MODE (operand),
14724 gen_rtx_PRE_DEC (SImode,
14725 stack_pointer_rtx)),
14731 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14736 /* Free operand from the memory. */
14738 ix86_free_from_memory (enum machine_mode mode)
14740 if (!TARGET_RED_ZONE)
14744 if (mode == DImode || TARGET_64BIT)
14746 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14750 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14751 to pop or add instruction if registers are available. */
14752 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14753 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14758 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14759 QImode must go into class Q_REGS.
14760 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14761 movdf to do mem-to-mem moves through integer regs. */
14763 ix86_preferred_reload_class (rtx x, enum reg_class class)
14765 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14767 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14769 /* SSE can't load any constant directly yet. */
14770 if (SSE_CLASS_P (class))
14772 /* Floats can load 0 and 1. */
14773 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14775 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14776 if (MAYBE_SSE_CLASS_P (class))
14777 return (reg_class_subset_p (class, GENERAL_REGS)
14778 ? GENERAL_REGS : FLOAT_REGS);
14782 /* General regs can load everything. */
14783 if (reg_class_subset_p (class, GENERAL_REGS))
14784 return GENERAL_REGS;
14785 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14786 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14789 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14791 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14796 /* If we are copying between general and FP registers, we need a memory
14797 location. The same is true for SSE and MMX registers.
14799 The macro can't work reliably when one of the CLASSES is class containing
14800 registers from multiple units (SSE, MMX, integer). We avoid this by never
14801 combining those units in single alternative in the machine description.
14802 Ensure that this constraint holds to avoid unexpected surprises.
14804 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14805 enforce these sanity checks. */
14807 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14808 enum machine_mode mode, int strict)
14810 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14811 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14812 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14813 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14814 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14815 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14822 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14823 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14824 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14825 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14826 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14828 /* Return the cost of moving data from a register in class CLASS1 to
14829 one in class CLASS2.
14831 It is not required that the cost always equal 2 when FROM is the same as TO;
14832 on some machines it is expensive to move between registers if they are not
14833 general registers. */
14835 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14836 enum reg_class class2)
14838 /* In case we require secondary memory, compute cost of the store followed
14839 by load. In order to avoid bad register allocation choices, we need
14840 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14842 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14846 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14847 MEMORY_MOVE_COST (mode, class1, 1));
14848 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14849 MEMORY_MOVE_COST (mode, class2, 1));
14851 /* In case of copying from general_purpose_register we may emit multiple
14852 stores followed by single load causing memory size mismatch stall.
14853 Count this as arbitrarily high cost of 20. */
14854 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14857 /* In the case of FP/MMX moves, the registers actually overlap, and we
14858 have to switch modes in order to treat them differently. */
14859 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14860 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14866 /* Moves between SSE/MMX and integer unit are expensive. */
14867 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14868 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14869 return ix86_cost->mmxsse_to_integer;
14870 if (MAYBE_FLOAT_CLASS_P (class1))
14871 return ix86_cost->fp_move;
14872 if (MAYBE_SSE_CLASS_P (class1))
14873 return ix86_cost->sse_move;
14874 if (MAYBE_MMX_CLASS_P (class1))
14875 return ix86_cost->mmx_move;
14879 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14881 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14883 /* Flags and only flags can only hold CCmode values. */
14884 if (CC_REGNO_P (regno))
14885 return GET_MODE_CLASS (mode) == MODE_CC;
14886 if (GET_MODE_CLASS (mode) == MODE_CC
14887 || GET_MODE_CLASS (mode) == MODE_RANDOM
14888 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14890 if (FP_REGNO_P (regno))
14891 return VALID_FP_MODE_P (mode);
14892 if (SSE_REGNO_P (regno))
14893 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14894 if (MMX_REGNO_P (regno))
14896 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14897 /* We handle both integer and floats in the general purpose registers.
14898 In future we should be able to handle vector modes as well. */
14899 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14901 /* Take care for QImode values - they can be in non-QI regs, but then
14902 they do cause partial register stalls. */
14903 if (regno < 4 || mode != QImode || TARGET_64BIT)
14905 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14908 /* Return the cost of moving data of mode M between a
14909 register and memory. A value of 2 is the default; this cost is
14910 relative to those in `REGISTER_MOVE_COST'.
14912 If moving between registers and memory is more expensive than
14913 between two registers, you should define this macro to express the
14916 Model also increased moving costs of QImode registers in non
14920 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14922 if (FLOAT_CLASS_P (class))
14939 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14941 if (SSE_CLASS_P (class))
14944 switch (GET_MODE_SIZE (mode))
14958 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14960 if (MMX_CLASS_P (class))
14963 switch (GET_MODE_SIZE (mode))
14974 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14976 switch (GET_MODE_SIZE (mode))
14980 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14981 : ix86_cost->movzbl_load);
14983 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14984 : ix86_cost->int_store[0] + 4);
14987 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14989 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14990 if (mode == TFmode)
14992 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14993 * (((int) GET_MODE_SIZE (mode)
14994 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14998 /* Compute a (partial) cost for rtx X. Return true if the complete
14999 cost has been computed, and false if subexpressions should be
15000 scanned. In either case, *TOTAL contains the cost result. */
15003 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15005 enum machine_mode mode = GET_MODE (x);
15013 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15015 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15017 else if (flag_pic && SYMBOLIC_CONST (x)
15019 || (!GET_CODE (x) != LABEL_REF
15020 && (GET_CODE (x) != SYMBOL_REF
15021 || !SYMBOL_REF_LOCAL_P (x)))))
15028 if (mode == VOIDmode)
15031 switch (standard_80387_constant_p (x))
15036 default: /* Other constants */
15041 /* Start with (MEM (SYMBOL_REF)), since that's where
15042 it'll probably end up. Add a penalty for size. */
15043 *total = (COSTS_N_INSNS (1)
15044 + (flag_pic != 0 && !TARGET_64BIT)
15045 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15051 /* The zero extensions is often completely free on x86_64, so make
15052 it as cheap as possible. */
15053 if (TARGET_64BIT && mode == DImode
15054 && GET_MODE (XEXP (x, 0)) == SImode)
15056 else if (TARGET_ZERO_EXTEND_WITH_AND)
15057 *total = COSTS_N_INSNS (ix86_cost->add);
15059 *total = COSTS_N_INSNS (ix86_cost->movzx);
15063 *total = COSTS_N_INSNS (ix86_cost->movsx);
15067 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15068 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15070 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15073 *total = COSTS_N_INSNS (ix86_cost->add);
15076 if ((value == 2 || value == 3)
15077 && !TARGET_DECOMPOSE_LEA
15078 && ix86_cost->lea <= ix86_cost->shift_const)
15080 *total = COSTS_N_INSNS (ix86_cost->lea);
15090 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15092 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15094 if (INTVAL (XEXP (x, 1)) > 32)
15095 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15097 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15101 if (GET_CODE (XEXP (x, 1)) == AND)
15102 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15104 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15109 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15110 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15112 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15117 if (FLOAT_MODE_P (mode))
15118 *total = COSTS_N_INSNS (ix86_cost->fmul);
15119 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15121 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15124 for (nbits = 0; value != 0; value >>= 1)
15127 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15128 + nbits * ix86_cost->mult_bit);
15132 /* This is arbitrary */
15133 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15134 + 7 * ix86_cost->mult_bit);
15142 if (FLOAT_MODE_P (mode))
15143 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15145 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15149 if (FLOAT_MODE_P (mode))
15150 *total = COSTS_N_INSNS (ix86_cost->fadd);
15151 else if (!TARGET_DECOMPOSE_LEA
15152 && GET_MODE_CLASS (mode) == MODE_INT
15153 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15155 if (GET_CODE (XEXP (x, 0)) == PLUS
15156 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15157 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15158 && CONSTANT_P (XEXP (x, 1)))
15160 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15161 if (val == 2 || val == 4 || val == 8)
15163 *total = COSTS_N_INSNS (ix86_cost->lea);
15164 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15165 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15167 *total += rtx_cost (XEXP (x, 1), outer_code);
15171 else if (GET_CODE (XEXP (x, 0)) == MULT
15172 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15174 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15175 if (val == 2 || val == 4 || val == 8)
15177 *total = COSTS_N_INSNS (ix86_cost->lea);
15178 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15179 *total += rtx_cost (XEXP (x, 1), outer_code);
15183 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15185 *total = COSTS_N_INSNS (ix86_cost->lea);
15186 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15187 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15188 *total += rtx_cost (XEXP (x, 1), outer_code);
15195 if (FLOAT_MODE_P (mode))
15197 *total = COSTS_N_INSNS (ix86_cost->fadd);
15205 if (!TARGET_64BIT && mode == DImode)
15207 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15208 + (rtx_cost (XEXP (x, 0), outer_code)
15209 << (GET_MODE (XEXP (x, 0)) != DImode))
15210 + (rtx_cost (XEXP (x, 1), outer_code)
15211 << (GET_MODE (XEXP (x, 1)) != DImode)));
15217 if (FLOAT_MODE_P (mode))
15219 *total = COSTS_N_INSNS (ix86_cost->fchs);
15225 if (!TARGET_64BIT && mode == DImode)
15226 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15228 *total = COSTS_N_INSNS (ix86_cost->add);
15232 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15237 if (FLOAT_MODE_P (mode))
15238 *total = COSTS_N_INSNS (ix86_cost->fabs);
15242 if (FLOAT_MODE_P (mode))
15243 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15247 if (XINT (x, 1) == UNSPEC_TP)
15256 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15258 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15261 fputs ("\tpushl $", asm_out_file);
15262 assemble_name (asm_out_file, XSTR (symbol, 0));
15263 fputc ('\n', asm_out_file);
15269 static int current_machopic_label_num;
15271 /* Given a symbol name and its associated stub, write out the
15272 definition of the stub. */
15275 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15277 unsigned int length;
15278 char *binder_name, *symbol_name, lazy_ptr_name[32];
15279 int label = ++current_machopic_label_num;
15281 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15282 symb = (*targetm.strip_name_encoding) (symb);
15284 length = strlen (stub);
15285 binder_name = alloca (length + 32);
15286 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15288 length = strlen (symb);
15289 symbol_name = alloca (length + 32);
15290 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15292 sprintf (lazy_ptr_name, "L%d$lz", label);
15295 machopic_picsymbol_stub_section ();
15297 machopic_symbol_stub_section ();
15299 fprintf (file, "%s:\n", stub);
15300 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15304 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15305 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15306 fprintf (file, "\tjmp %%edx\n");
15309 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15311 fprintf (file, "%s:\n", binder_name);
15315 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15316 fprintf (file, "\tpushl %%eax\n");
15319 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15321 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15323 machopic_lazy_symbol_ptr_section ();
15324 fprintf (file, "%s:\n", lazy_ptr_name);
15325 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15326 fprintf (file, "\t.long %s\n", binder_name);
15328 #endif /* TARGET_MACHO */
15330 /* Order the registers for register allocator. */
15333 x86_order_regs_for_local_alloc (void)
15338 /* First allocate the local general purpose registers. */
15339 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15340 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15341 reg_alloc_order [pos++] = i;
15343 /* Global general purpose registers. */
15344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15345 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15346 reg_alloc_order [pos++] = i;
15348 /* x87 registers come first in case we are doing FP math
15350 if (!TARGET_SSE_MATH)
15351 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15352 reg_alloc_order [pos++] = i;
15354 /* SSE registers. */
15355 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15356 reg_alloc_order [pos++] = i;
15357 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15358 reg_alloc_order [pos++] = i;
15360 /* x87 registers. */
15361 if (TARGET_SSE_MATH)
15362 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15363 reg_alloc_order [pos++] = i;
15365 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15366 reg_alloc_order [pos++] = i;
15368 /* Initialize the rest of array as we do not allocate some registers
15370 while (pos < FIRST_PSEUDO_REGISTER)
15371 reg_alloc_order [pos++] = 0;
15374 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15375 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15378 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15379 struct attribute_spec.handler. */
15381 ix86_handle_struct_attribute (tree *node, tree name,
15382 tree args ATTRIBUTE_UNUSED,
15383 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15386 if (DECL_P (*node))
15388 if (TREE_CODE (*node) == TYPE_DECL)
15389 type = &TREE_TYPE (*node);
15394 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15395 || TREE_CODE (*type) == UNION_TYPE)))
15397 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15398 *no_add_attrs = true;
15401 else if ((is_attribute_p ("ms_struct", name)
15402 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15403 || ((is_attribute_p ("gcc_struct", name)
15404 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15406 warning ("`%s' incompatible attribute ignored",
15407 IDENTIFIER_POINTER (name));
15408 *no_add_attrs = true;
15415 ix86_ms_bitfield_layout_p (tree record_type)
15417 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15418 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15419 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15422 /* Returns an expression indicating where the this parameter is
15423 located on entry to the FUNCTION. */
15426 x86_this_parameter (tree function)
15428 tree type = TREE_TYPE (function);
15432 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15433 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15436 if (ix86_function_regparm (type, function) > 0)
15440 parm = TYPE_ARG_TYPES (type);
15441 /* Figure out whether or not the function has a variable number of
15443 for (; parm; parm = TREE_CHAIN (parm))
15444 if (TREE_VALUE (parm) == void_type_node)
15446 /* If not, the this parameter is in the first argument. */
15450 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15452 return gen_rtx_REG (SImode, regno);
15456 if (aggregate_value_p (TREE_TYPE (type), type))
15457 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15459 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15462 /* Determine whether x86_output_mi_thunk can succeed. */
15465 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15466 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15467 HOST_WIDE_INT vcall_offset, tree function)
15469 /* 64-bit can handle anything. */
15473 /* For 32-bit, everything's fine if we have one free register. */
15474 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15477 /* Need a free register for vcall_offset. */
15481 /* Need a free register for GOT references. */
15482 if (flag_pic && !(*targetm.binds_local_p) (function))
15485 /* Otherwise ok. */
15489 /* Output the assembler code for a thunk function. THUNK_DECL is the
15490 declaration for the thunk function itself, FUNCTION is the decl for
15491 the target function. DELTA is an immediate constant offset to be
15492 added to THIS. If VCALL_OFFSET is nonzero, the word at
15493 *(*this + vcall_offset) should be added to THIS. */
15496 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15497 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15498 HOST_WIDE_INT vcall_offset, tree function)
15501 rtx this = x86_this_parameter (function);
15504 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15505 pull it in now and let DELTA benefit. */
15508 else if (vcall_offset)
15510 /* Put the this parameter into %eax. */
15512 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15513 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15516 this_reg = NULL_RTX;
15518 /* Adjust the this parameter by a fixed constant. */
15521 xops[0] = GEN_INT (delta);
15522 xops[1] = this_reg ? this_reg : this;
15525 if (!x86_64_general_operand (xops[0], DImode))
15527 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15529 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15533 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15536 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15539 /* Adjust the this parameter by a value stored in the vtable. */
15543 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15546 int tmp_regno = 2 /* ECX */;
15547 if (lookup_attribute ("fastcall",
15548 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15549 tmp_regno = 0 /* EAX */;
15550 tmp = gen_rtx_REG (SImode, tmp_regno);
15553 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15556 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15558 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15560 /* Adjust the this parameter. */
15561 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15562 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15564 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15565 xops[0] = GEN_INT (vcall_offset);
15567 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15568 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15570 xops[1] = this_reg;
15572 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15574 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15577 /* If necessary, drop THIS back to its stack slot. */
15578 if (this_reg && this_reg != this)
15580 xops[0] = this_reg;
15582 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15585 xops[0] = XEXP (DECL_RTL (function), 0);
15588 if (!flag_pic || (*targetm.binds_local_p) (function))
15589 output_asm_insn ("jmp\t%P0", xops);
15592 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15593 tmp = gen_rtx_CONST (Pmode, tmp);
15594 tmp = gen_rtx_MEM (QImode, tmp);
15596 output_asm_insn ("jmp\t%A0", xops);
15601 if (!flag_pic || (*targetm.binds_local_p) (function))
15602 output_asm_insn ("jmp\t%P0", xops);
15607 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15608 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15609 tmp = gen_rtx_MEM (QImode, tmp);
15611 output_asm_insn ("jmp\t%0", xops);
15614 #endif /* TARGET_MACHO */
15616 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15617 output_set_got (tmp);
15620 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15621 output_asm_insn ("jmp\t{*}%1", xops);
15627 x86_file_start (void)
15629 default_file_start ();
15630 if (X86_FILE_START_VERSION_DIRECTIVE)
15631 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15632 if (X86_FILE_START_FLTUSED)
15633 fputs ("\t.global\t__fltused\n", asm_out_file);
15634 if (ix86_asm_dialect == ASM_INTEL)
15635 fputs ("\t.intel_syntax\n", asm_out_file);
15639 x86_field_alignment (tree field, int computed)
15641 enum machine_mode mode;
15642 tree type = TREE_TYPE (field);
15644 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15646 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15647 ? get_inner_array_type (type) : type);
15648 if (mode == DFmode || mode == DCmode
15649 || GET_MODE_CLASS (mode) == MODE_INT
15650 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15651 return MIN (32, computed);
15655 /* Output assembler code to FILE to increment profiler label # LABELNO
15656 for profiling a function entry. */
15658 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15663 #ifndef NO_PROFILE_COUNTERS
15664 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15666 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15670 #ifndef NO_PROFILE_COUNTERS
15671 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15673 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15677 #ifndef NO_PROFILE_COUNTERS
15678 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15679 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15681 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15685 #ifndef NO_PROFILE_COUNTERS
15686 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15687 PROFILE_COUNT_REGISTER);
15689 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15693 /* We don't have exact information about the insn sizes, but we may assume
15694 quite safely that we are informed about all 1 byte insns and memory
15695 address sizes. This is enough to eliminate unnecessary padding in
15699 min_insn_size (rtx insn)
15703 if (!INSN_P (insn) || !active_insn_p (insn))
15706 /* Discard alignments we've emit and jump instructions. */
15707 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15708 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15710 if (GET_CODE (insn) == JUMP_INSN
15711 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15712 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15715 /* Important case - calls are always 5 bytes.
15716 It is common to have many calls in the row. */
15717 if (GET_CODE (insn) == CALL_INSN
15718 && symbolic_reference_mentioned_p (PATTERN (insn))
15719 && !SIBLING_CALL_P (insn))
15721 if (get_attr_length (insn) <= 1)
15724 /* For normal instructions we may rely on the sizes of addresses
15725 and the presence of symbol to require 4 bytes of encoding.
15726 This is not the case for jumps where references are PC relative. */
15727 if (GET_CODE (insn) != JUMP_INSN)
15729 l = get_attr_length_address (insn);
15730 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15739 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15743 k8_avoid_jump_misspredicts (void)
15745 rtx insn, start = get_insns ();
15746 int nbytes = 0, njumps = 0;
15749 /* Look for all minimal intervals of instructions containing 4 jumps.
15750 The intervals are bounded by START and INSN. NBYTES is the total
15751 size of instructions in the interval including INSN and not including
15752 START. When the NBYTES is smaller than 16 bytes, it is possible
15753 that the end of START and INSN ends up in the same 16byte page.
15755 The smallest offset in the page INSN can start is the case where START
15756 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15757 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15759 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15762 nbytes += min_insn_size (insn);
15764 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15765 INSN_UID (insn), min_insn_size (insn));
15766 if ((GET_CODE (insn) == JUMP_INSN
15767 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15768 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15769 || GET_CODE (insn) == CALL_INSN)
15776 start = NEXT_INSN (start);
15777 if ((GET_CODE (start) == JUMP_INSN
15778 && GET_CODE (PATTERN (start)) != ADDR_VEC
15779 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15780 || GET_CODE (start) == CALL_INSN)
15781 njumps--, isjump = 1;
15784 nbytes -= min_insn_size (start);
15789 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15790 INSN_UID (start), INSN_UID (insn), nbytes);
15792 if (njumps == 3 && isjump && nbytes < 16)
15794 int padsize = 15 - nbytes + min_insn_size (insn);
15797 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15798 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15803 /* Implement machine specific optimizations.
15804 At the moment we implement single transformation: AMD Athlon works faster
15805 when RET is not destination of conditional jump or directly preceded
15806 by other jump instruction. We avoid the penalty by inserting NOP just
15807 before the RET instructions in such cases. */
15813 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15815 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15817 basic_block bb = e->src;
15818 rtx ret = BB_END (bb);
15820 bool replace = false;
15822 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15823 || !maybe_hot_bb_p (bb))
15825 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15826 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15828 if (prev && GET_CODE (prev) == CODE_LABEL)
15831 for (e = bb->pred; e; e = e->pred_next)
15832 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15833 && !(e->flags & EDGE_FALLTHRU))
15838 prev = prev_active_insn (ret);
15840 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15841 || GET_CODE (prev) == CALL_INSN))
15843 /* Empty functions get branch mispredict even when the jump destination
15844 is not visible to us. */
15845 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15850 emit_insn_before (gen_return_internal_long (), ret);
15854 k8_avoid_jump_misspredicts ();
15857 /* Return nonzero when QImode register that must be represented via REX prefix
15860 x86_extended_QIreg_mentioned_p (rtx insn)
15863 extract_insn_cached (insn);
15864 for (i = 0; i < recog_data.n_operands; i++)
15865 if (REG_P (recog_data.operand[i])
15866 && REGNO (recog_data.operand[i]) >= 4)
15871 /* Return nonzero when P points to register encoded via REX prefix.
15872 Called via for_each_rtx. */
15874 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15876 unsigned int regno;
15879 regno = REGNO (*p);
15880 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15883 /* Return true when INSN mentions register that must be encoded using REX
15886 x86_extended_reg_mentioned_p (rtx insn)
15888 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15891 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15892 optabs would emit if we didn't have TFmode patterns. */
15895 x86_emit_floatuns (rtx operands[2])
15897 rtx neglab, donelab, i0, i1, f0, in, out;
15898 enum machine_mode mode, inmode;
15900 inmode = GET_MODE (operands[1]);
15901 if (inmode != SImode
15902 && inmode != DImode)
15906 in = force_reg (inmode, operands[1]);
15907 mode = GET_MODE (out);
15908 neglab = gen_label_rtx ();
15909 donelab = gen_label_rtx ();
15910 i1 = gen_reg_rtx (Pmode);
15911 f0 = gen_reg_rtx (mode);
15913 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15915 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15916 emit_jump_insn (gen_jump (donelab));
15919 emit_label (neglab);
15921 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15922 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15923 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15924 expand_float (f0, i0, 0);
15925 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15927 emit_label (donelab);
15930 /* Return if we do not know how to pass TYPE solely in registers. */
15932 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15934 if (default_must_pass_in_stack (mode, type))
15936 return (!TARGET_64BIT && type && mode == TImode);
15939 /* Initialize vector TARGET via VALS. */
15941 ix86_expand_vector_init (rtx target, rtx vals)
15943 enum machine_mode mode = GET_MODE (target);
15944 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15945 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15948 for (i = n_elts - 1; i >= 0; i--)
15949 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15950 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15953 /* Few special cases first...
15954 ... constants are best loaded from constant pool. */
15957 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15961 /* ... values where only first field is non-constant are best loaded
15962 from the pool and overwriten via move later. */
15965 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15966 GET_MODE_INNER (mode), 0);
15968 op = force_reg (mode, op);
15969 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15970 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15971 switch (GET_MODE (target))
15974 emit_insn (gen_sse2_movsd (target, target, op));
15977 emit_insn (gen_sse_movss (target, target, op));
15985 /* And the busy sequence doing rotations. */
15986 switch (GET_MODE (target))
15991 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15993 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15995 vecop0 = force_reg (V2DFmode, vecop0);
15996 vecop1 = force_reg (V2DFmode, vecop1);
15997 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16003 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16005 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16007 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16009 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16010 rtx tmp1 = gen_reg_rtx (V4SFmode);
16011 rtx tmp2 = gen_reg_rtx (V4SFmode);
16013 vecop0 = force_reg (V4SFmode, vecop0);
16014 vecop1 = force_reg (V4SFmode, vecop1);
16015 vecop2 = force_reg (V4SFmode, vecop2);
16016 vecop3 = force_reg (V4SFmode, vecop3);
16017 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16018 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16019 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16027 #include "gt-i386.h"