1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
526 /* ??? HACK! The following is a lie. SSE can hold e.g. SImode, and
527 indeed *must* be able to hold SImode so that SSE2 shifts are able
528 to work right. But this can result in some mighty surprising
529 register allocation when building kernels. Turning this off should
530 make us less likely to all-of-the-sudden select an SSE register. */
531 const int x86_inter_unit_moves = 0; /* ~(m_ATHLON_K8) */
533 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
535 /* In case the average insn count for single function invocation is
536 lower than this constant, emit fast (but longer) prologue and
538 #define FAST_PROLOGUE_INSN_COUNT 20
540 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
541 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
542 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
543 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
545 /* Array of the smallest class containing reg number REGNO, indexed by
546 REGNO. Used by REGNO_REG_CLASS in i386.h. */
548 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
551 AREG, DREG, CREG, BREG,
553 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
555 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
556 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
559 /* flags, fpsr, dirflag, frame */
560 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
563 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
565 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
566 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
567 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
571 /* The "default" register map used in 32bit mode. */
573 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
575 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
576 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
577 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
578 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
579 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
580 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
581 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
584 static int const x86_64_int_parameter_registers[6] =
586 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
587 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
590 static int const x86_64_int_return_registers[4] =
592 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
595 /* The "default" register map used in 64bit mode. */
596 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
598 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
599 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
600 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
601 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
602 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
603 8,9,10,11,12,13,14,15, /* extended integer registers */
604 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
607 /* Define the register numbers to be used in Dwarf debugging information.
608 The SVR4 reference port C compiler uses the following register numbers
609 in its Dwarf output code:
610 0 for %eax (gcc regno = 0)
611 1 for %ecx (gcc regno = 2)
612 2 for %edx (gcc regno = 1)
613 3 for %ebx (gcc regno = 3)
614 4 for %esp (gcc regno = 7)
615 5 for %ebp (gcc regno = 6)
616 6 for %esi (gcc regno = 4)
617 7 for %edi (gcc regno = 5)
618 The following three DWARF register numbers are never generated by
619 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
620 believes these numbers have these meanings.
621 8 for %eip (no gcc equivalent)
622 9 for %eflags (gcc regno = 17)
623 10 for %trapno (no gcc equivalent)
624 It is not at all clear how we should number the FP stack registers
625 for the x86 architecture. If the version of SDB on x86/svr4 were
626 a bit less brain dead with respect to floating-point then we would
627 have a precedent to follow with respect to DWARF register numbers
628 for x86 FP registers, but the SDB on x86/svr4 is so completely
629 broken with respect to FP registers that it is hardly worth thinking
630 of it as something to strive for compatibility with.
631 The version of x86/svr4 SDB I have at the moment does (partially)
632 seem to believe that DWARF register number 11 is associated with
633 the x86 register %st(0), but that's about all. Higher DWARF
634 register numbers don't seem to be associated with anything in
635 particular, and even for DWARF regno 11, SDB only seems to under-
636 stand that it should say that a variable lives in %st(0) (when
637 asked via an `=' command) if we said it was in DWARF regno 11,
638 but SDB still prints garbage when asked for the value of the
639 variable in question (via a `/' command).
640 (Also note that the labels SDB prints for various FP stack regs
641 when doing an `x' command are all wrong.)
642 Note that these problems generally don't affect the native SVR4
643 C compiler because it doesn't allow the use of -O with -g and
644 because when it is *not* optimizing, it allocates a memory
645 location for each floating-point variable, and the memory
646 location is what gets described in the DWARF AT_location
647 attribute for the variable in question.
648 Regardless of the severe mental illness of the x86/svr4 SDB, we
649 do something sensible here and we use the following DWARF
650 register numbers. Note that these are all stack-top-relative
652 11 for %st(0) (gcc regno = 8)
653 12 for %st(1) (gcc regno = 9)
654 13 for %st(2) (gcc regno = 10)
655 14 for %st(3) (gcc regno = 11)
656 15 for %st(4) (gcc regno = 12)
657 16 for %st(5) (gcc regno = 13)
658 17 for %st(6) (gcc regno = 14)
659 18 for %st(7) (gcc regno = 15)
661 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
663 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
664 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
665 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
666 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
667 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
668 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
669 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
672 /* Test and compare insns in i386.md store the information needed to
673 generate branch and scc insns here. */
675 rtx ix86_compare_op0 = NULL_RTX;
676 rtx ix86_compare_op1 = NULL_RTX;
678 #define MAX_386_STACK_LOCALS 3
679 /* Size of the register save area. */
680 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
682 /* Define the structure for the machine field in struct function. */
684 struct stack_local_entry GTY(())
689 struct stack_local_entry *next;
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
727 /* When save_regs_using_mov is set, emit prologue using
728 move instead of push instructions. */
729 bool save_regs_using_mov;
732 /* Used to enable/disable debugging features. */
733 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
734 /* Code model option as passed by user. */
735 const char *ix86_cmodel_string;
737 enum cmodel ix86_cmodel;
739 const char *ix86_asm_string;
740 enum asm_dialect ix86_asm_dialect = ASM_ATT;
742 const char *ix86_tls_dialect_string;
743 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
745 /* Which unit we are generating floating point math for. */
746 enum fpmath_unit ix86_fpmath;
748 /* Which cpu are we scheduling for. */
749 enum processor_type ix86_tune;
750 /* Which instruction set architecture to use. */
751 enum processor_type ix86_arch;
753 /* Strings to hold which cpu and instruction set architecture to use. */
754 const char *ix86_tune_string; /* for -mtune=<xxx> */
755 const char *ix86_arch_string; /* for -march=<xxx> */
756 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
758 /* # of registers to use to pass arguments. */
759 const char *ix86_regparm_string;
761 /* true if sse prefetch instruction is not NOOP. */
762 int x86_prefetch_sse;
764 /* ix86_regparm_string as a number */
767 /* Alignment to use for loops and jumps: */
769 /* Power of two alignment for loops. */
770 const char *ix86_align_loops_string;
772 /* Power of two alignment for non-loop jumps. */
773 const char *ix86_align_jumps_string;
775 /* Power of two alignment for stack boundary in bytes. */
776 const char *ix86_preferred_stack_boundary_string;
778 /* Preferred alignment for stack boundary in bits. */
779 int ix86_preferred_stack_boundary;
781 /* Values 1-5: see jump.c */
782 int ix86_branch_cost;
783 const char *ix86_branch_cost_string;
785 /* Power of two alignment for functions. */
786 const char *ix86_align_funcs_string;
788 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
789 static char internal_label_prefix[16];
790 static int internal_label_prefix_len;
792 static int local_symbolic_operand (rtx, enum machine_mode);
793 static int tls_symbolic_operand_1 (rtx, enum tls_model);
794 static void output_pic_addr_const (FILE *, rtx, int);
795 static void put_condition_code (enum rtx_code, enum machine_mode,
797 static const char *get_some_local_dynamic_name (void);
798 static int get_some_local_dynamic_name_1 (rtx *, void *);
799 static rtx maybe_get_pool_constant (rtx);
800 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
801 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
803 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
804 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
806 static rtx get_thread_pointer (int);
807 static rtx legitimize_tls_address (rtx, enum tls_model, int);
808 static void get_pc_thunk_name (char [32], unsigned int);
809 static rtx gen_push (rtx);
810 static int memory_address_length (rtx addr);
811 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
812 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
813 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
814 static void ix86_dump_ppro_packet (FILE *);
815 static void ix86_reorder_insn (rtx *, rtx *);
816 static struct machine_function * ix86_init_machine_status (void);
817 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
818 static int ix86_nsaved_regs (void);
819 static void ix86_emit_save_regs (void);
820 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
821 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
822 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
823 static void ix86_sched_reorder_ppro (rtx *, rtx *);
824 static HOST_WIDE_INT ix86_GOT_alias_set (void);
825 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
826 static rtx ix86_expand_aligntest (rtx, int);
827 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
828 static int ix86_issue_rate (void);
829 static int ix86_adjust_cost (rtx, rtx, rtx, int);
830 static void ix86_sched_init (FILE *, int, int);
831 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
832 static int ix86_variable_issue (FILE *, int, rtx, int);
833 static int ia32_use_dfa_pipeline_interface (void);
834 static int ia32_multipass_dfa_lookahead (void);
835 static void ix86_init_mmx_sse_builtins (void);
836 static rtx x86_this_parameter (tree);
837 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
838 HOST_WIDE_INT, tree);
839 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
840 static void x86_file_start (void);
841 static void ix86_reorg (void);
842 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
843 static tree ix86_build_builtin_va_list (void);
847 rtx base, index, disp;
849 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
852 static int ix86_decompose_address (rtx, struct ix86_address *);
853 static int ix86_address_cost (rtx);
854 static bool ix86_cannot_force_const_mem (rtx);
855 static rtx ix86_delegitimize_address (rtx);
857 struct builtin_description;
858 static rtx ix86_expand_sse_comi (const struct builtin_description *,
860 static rtx ix86_expand_sse_compare (const struct builtin_description *,
862 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
863 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
864 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
865 static rtx ix86_expand_store_builtin (enum insn_code, tree);
866 static rtx safe_vector_operand (rtx, enum machine_mode);
867 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
868 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
869 enum rtx_code *, enum rtx_code *);
870 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
871 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
872 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
873 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
874 static int ix86_fp_comparison_cost (enum rtx_code code);
875 static unsigned int ix86_select_alt_pic_regnum (void);
876 static int ix86_save_reg (unsigned int, int);
877 static void ix86_compute_frame_layout (struct ix86_frame *);
878 static int ix86_comp_type_attributes (tree, tree);
879 static int ix86_function_regparm (tree, tree);
880 const struct attribute_spec ix86_attribute_table[];
881 static bool ix86_function_ok_for_sibcall (tree, tree);
882 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
883 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
884 static int ix86_value_regno (enum machine_mode);
885 static bool contains_128bit_aligned_vector_p (tree);
886 static bool ix86_ms_bitfield_layout_p (tree);
887 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
888 static int extended_reg_mentioned_1 (rtx *, void *);
889 static bool ix86_rtx_costs (rtx, int, int, int *);
890 static int min_insn_size (rtx);
891 static void k8_avoid_jump_misspredicts (void);
893 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
894 static void ix86_svr3_asm_out_constructor (rtx, int);
897 /* Register class used for passing given 64bit part of the argument.
898 These represent classes as documented by the PS ABI, with the exception
899 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
900 use SF or DFmode move instead of DImode to avoid reformatting penalties.
902 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
903 whenever possible (upper half does contain padding).
905 enum x86_64_reg_class
908 X86_64_INTEGER_CLASS,
909 X86_64_INTEGERSI_CLASS,
918 static const char * const x86_64_reg_class_name[] =
919 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
921 #define MAX_CLASSES 4
922 static int classify_argument (enum machine_mode, tree,
923 enum x86_64_reg_class [MAX_CLASSES], int);
924 static int examine_argument (enum machine_mode, tree, int, int *, int *);
925 static rtx construct_container (enum machine_mode, tree, int, int, int,
927 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
928 enum x86_64_reg_class);
930 /* Table of constants used by fldpi, fldln2, etc.... */
931 static REAL_VALUE_TYPE ext_80387_constants_table [5];
932 static bool ext_80387_constants_init = 0;
933 static void init_ext_80387_constants (void);
935 /* Initialize the GCC target structure. */
936 #undef TARGET_ATTRIBUTE_TABLE
937 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
938 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
939 # undef TARGET_MERGE_DECL_ATTRIBUTES
940 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
943 #undef TARGET_COMP_TYPE_ATTRIBUTES
944 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
946 #undef TARGET_INIT_BUILTINS
947 #define TARGET_INIT_BUILTINS ix86_init_builtins
949 #undef TARGET_EXPAND_BUILTIN
950 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
952 #undef TARGET_ASM_FUNCTION_EPILOGUE
953 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
955 #undef TARGET_ASM_OPEN_PAREN
956 #define TARGET_ASM_OPEN_PAREN ""
957 #undef TARGET_ASM_CLOSE_PAREN
958 #define TARGET_ASM_CLOSE_PAREN ""
960 #undef TARGET_ASM_ALIGNED_HI_OP
961 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
962 #undef TARGET_ASM_ALIGNED_SI_OP
963 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
965 #undef TARGET_ASM_ALIGNED_DI_OP
966 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
969 #undef TARGET_ASM_UNALIGNED_HI_OP
970 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
971 #undef TARGET_ASM_UNALIGNED_SI_OP
972 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
973 #undef TARGET_ASM_UNALIGNED_DI_OP
974 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
976 #undef TARGET_SCHED_ADJUST_COST
977 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
978 #undef TARGET_SCHED_ISSUE_RATE
979 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
980 #undef TARGET_SCHED_VARIABLE_ISSUE
981 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
982 #undef TARGET_SCHED_INIT
983 #define TARGET_SCHED_INIT ix86_sched_init
984 #undef TARGET_SCHED_REORDER
985 #define TARGET_SCHED_REORDER ix86_sched_reorder
986 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
987 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
988 ia32_use_dfa_pipeline_interface
989 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
990 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
991 ia32_multipass_dfa_lookahead
993 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
994 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
997 #undef TARGET_HAVE_TLS
998 #define TARGET_HAVE_TLS true
1000 #undef TARGET_CANNOT_FORCE_CONST_MEM
1001 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1003 #undef TARGET_DELEGITIMIZE_ADDRESS
1004 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1006 #undef TARGET_MS_BITFIELD_LAYOUT_P
1007 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1009 #undef TARGET_ASM_OUTPUT_MI_THUNK
1010 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1011 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1012 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1014 #undef TARGET_ASM_FILE_START
1015 #define TARGET_ASM_FILE_START x86_file_start
1017 #undef TARGET_RTX_COSTS
1018 #define TARGET_RTX_COSTS ix86_rtx_costs
1019 #undef TARGET_ADDRESS_COST
1020 #define TARGET_ADDRESS_COST ix86_address_cost
1022 #undef TARGET_FIXED_CONDITION_CODE_REGS
1023 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1024 #undef TARGET_CC_MODES_COMPATIBLE
1025 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1027 #undef TARGET_MACHINE_DEPENDENT_REORG
1028 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1030 #undef TARGET_BUILD_BUILTIN_VA_LIST
1031 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1033 struct gcc_target targetm = TARGET_INITIALIZER;
1035 /* The svr4 ABI for the i386 says that records and unions are returned
1037 #ifndef DEFAULT_PCC_STRUCT_RETURN
1038 #define DEFAULT_PCC_STRUCT_RETURN 1
1041 /* Sometimes certain combinations of command options do not make
1042 sense on a particular target machine. You can define a macro
1043 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1044 defined, is executed once just after all the command options have
1047 Don't use this macro to turn on various extra optimizations for
1048 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1051 override_options (void)
1054 /* Comes from final.c -- no real reason to change it. */
1055 #define MAX_CODE_ALIGN 16
1059 const struct processor_costs *cost; /* Processor costs */
1060 const int target_enable; /* Target flags to enable. */
1061 const int target_disable; /* Target flags to disable. */
1062 const int align_loop; /* Default alignments. */
1063 const int align_loop_max_skip;
1064 const int align_jump;
1065 const int align_jump_max_skip;
1066 const int align_func;
1068 const processor_target_table[PROCESSOR_max] =
1070 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1071 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1072 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1073 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1074 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1075 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1076 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1077 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1080 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1083 const char *const name; /* processor name or nickname. */
1084 const enum processor_type processor;
1085 const enum pta_flags
1091 PTA_PREFETCH_SSE = 16,
1097 const processor_alias_table[] =
1099 {"i386", PROCESSOR_I386, 0},
1100 {"i486", PROCESSOR_I486, 0},
1101 {"i586", PROCESSOR_PENTIUM, 0},
1102 {"pentium", PROCESSOR_PENTIUM, 0},
1103 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1104 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1105 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1106 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1107 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1108 {"i686", PROCESSOR_PENTIUMPRO, 0},
1109 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1110 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1111 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1112 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1113 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1114 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1115 | PTA_MMX | PTA_PREFETCH_SSE},
1116 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1117 | PTA_MMX | PTA_PREFETCH_SSE},
1118 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1119 | PTA_MMX | PTA_PREFETCH_SSE},
1120 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1121 | PTA_MMX | PTA_PREFETCH_SSE},
1122 {"k6", PROCESSOR_K6, PTA_MMX},
1123 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1124 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1125 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1128 | PTA_3DNOW | PTA_3DNOW_A},
1129 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1130 | PTA_3DNOW_A | PTA_SSE},
1131 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1132 | PTA_3DNOW_A | PTA_SSE},
1133 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1134 | PTA_3DNOW_A | PTA_SSE},
1135 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1136 | PTA_SSE | PTA_SSE2 },
1137 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1138 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1139 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1140 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1141 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1142 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1143 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1144 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1147 int const pta_size = ARRAY_SIZE (processor_alias_table);
1149 /* Set the default values for switches whose default depends on TARGET_64BIT
1150 in case they weren't overwritten by command line options. */
1153 if (flag_omit_frame_pointer == 2)
1154 flag_omit_frame_pointer = 1;
1155 if (flag_asynchronous_unwind_tables == 2)
1156 flag_asynchronous_unwind_tables = 1;
1157 if (flag_pcc_struct_return == 2)
1158 flag_pcc_struct_return = 0;
1162 if (flag_omit_frame_pointer == 2)
1163 flag_omit_frame_pointer = 0;
1164 if (flag_asynchronous_unwind_tables == 2)
1165 flag_asynchronous_unwind_tables = 0;
1166 if (flag_pcc_struct_return == 2)
1167 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1170 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1171 SUBTARGET_OVERRIDE_OPTIONS;
1174 if (!ix86_tune_string && ix86_arch_string)
1175 ix86_tune_string = ix86_arch_string;
1176 if (!ix86_tune_string)
1177 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1178 if (!ix86_arch_string)
1179 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1181 if (ix86_cmodel_string != 0)
1183 if (!strcmp (ix86_cmodel_string, "small"))
1184 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1186 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1187 else if (!strcmp (ix86_cmodel_string, "32"))
1188 ix86_cmodel = CM_32;
1189 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1190 ix86_cmodel = CM_KERNEL;
1191 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1192 ix86_cmodel = CM_MEDIUM;
1193 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1194 ix86_cmodel = CM_LARGE;
1196 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1200 ix86_cmodel = CM_32;
1202 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1204 if (ix86_asm_string != 0)
1206 if (!strcmp (ix86_asm_string, "intel"))
1207 ix86_asm_dialect = ASM_INTEL;
1208 else if (!strcmp (ix86_asm_string, "att"))
1209 ix86_asm_dialect = ASM_ATT;
1211 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1213 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1214 error ("code model `%s' not supported in the %s bit mode",
1215 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1216 if (ix86_cmodel == CM_LARGE)
1217 sorry ("code model `large' not supported yet");
1218 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1219 sorry ("%i-bit mode not compiled in",
1220 (target_flags & MASK_64BIT) ? 64 : 32);
1222 for (i = 0; i < pta_size; i++)
1223 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1225 ix86_arch = processor_alias_table[i].processor;
1226 /* Default cpu tuning to the architecture. */
1227 ix86_tune = ix86_arch;
1228 if (processor_alias_table[i].flags & PTA_MMX
1229 && !(target_flags_explicit & MASK_MMX))
1230 target_flags |= MASK_MMX;
1231 if (processor_alias_table[i].flags & PTA_3DNOW
1232 && !(target_flags_explicit & MASK_3DNOW))
1233 target_flags |= MASK_3DNOW;
1234 if (processor_alias_table[i].flags & PTA_3DNOW_A
1235 && !(target_flags_explicit & MASK_3DNOW_A))
1236 target_flags |= MASK_3DNOW_A;
1237 if (processor_alias_table[i].flags & PTA_SSE
1238 && !(target_flags_explicit & MASK_SSE))
1239 target_flags |= MASK_SSE;
1240 if (processor_alias_table[i].flags & PTA_SSE2
1241 && !(target_flags_explicit & MASK_SSE2))
1242 target_flags |= MASK_SSE2;
1243 if (processor_alias_table[i].flags & PTA_SSE3
1244 && !(target_flags_explicit & MASK_SSE3))
1245 target_flags |= MASK_SSE3;
1246 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1247 x86_prefetch_sse = true;
1248 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1249 error ("CPU you selected does not support x86-64 instruction set");
1254 error ("bad value (%s) for -march= switch", ix86_arch_string);
1256 for (i = 0; i < pta_size; i++)
1257 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1259 ix86_tune = processor_alias_table[i].processor;
1260 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1261 error ("CPU you selected does not support x86-64 instruction set");
1263 /* Intel CPUs have always interpreted SSE prefetch instructions as
1264 NOPs; so, we can enable SSE prefetch instructions even when
1265 -mtune (rather than -march) points us to a processor that has them.
1266 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1267 higher processors. */
1268 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1269 x86_prefetch_sse = true;
1273 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1276 ix86_cost = &size_cost;
1278 ix86_cost = processor_target_table[ix86_tune].cost;
1279 target_flags |= processor_target_table[ix86_tune].target_enable;
1280 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1282 /* Arrange to set up i386_stack_locals for all functions. */
1283 init_machine_status = ix86_init_machine_status;
1285 /* Validate -mregparm= value. */
1286 if (ix86_regparm_string)
1288 i = atoi (ix86_regparm_string);
1289 if (i < 0 || i > REGPARM_MAX)
1290 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1296 ix86_regparm = REGPARM_MAX;
1298 /* If the user has provided any of the -malign-* options,
1299 warn and use that value only if -falign-* is not set.
1300 Remove this code in GCC 3.2 or later. */
1301 if (ix86_align_loops_string)
1303 warning ("-malign-loops is obsolete, use -falign-loops");
1304 if (align_loops == 0)
1306 i = atoi (ix86_align_loops_string);
1307 if (i < 0 || i > MAX_CODE_ALIGN)
1308 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1310 align_loops = 1 << i;
1314 if (ix86_align_jumps_string)
1316 warning ("-malign-jumps is obsolete, use -falign-jumps");
1317 if (align_jumps == 0)
1319 i = atoi (ix86_align_jumps_string);
1320 if (i < 0 || i > MAX_CODE_ALIGN)
1321 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1323 align_jumps = 1 << i;
1327 if (ix86_align_funcs_string)
1329 warning ("-malign-functions is obsolete, use -falign-functions");
1330 if (align_functions == 0)
1332 i = atoi (ix86_align_funcs_string);
1333 if (i < 0 || i > MAX_CODE_ALIGN)
1334 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1336 align_functions = 1 << i;
1340 /* Default align_* from the processor table. */
1341 if (align_loops == 0)
1343 align_loops = processor_target_table[ix86_tune].align_loop;
1344 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1346 if (align_jumps == 0)
1348 align_jumps = processor_target_table[ix86_tune].align_jump;
1349 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1351 if (align_functions == 0)
1353 align_functions = processor_target_table[ix86_tune].align_func;
1356 /* Validate -mpreferred-stack-boundary= value, or provide default.
1357 The default of 128 bits is for Pentium III's SSE __m128, but we
1358 don't want additional code to keep the stack aligned when
1359 optimizing for code size. */
1360 ix86_preferred_stack_boundary = (optimize_size
1361 ? TARGET_64BIT ? 128 : 32
1363 if (ix86_preferred_stack_boundary_string)
1365 i = atoi (ix86_preferred_stack_boundary_string);
1366 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1367 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1368 TARGET_64BIT ? 4 : 2);
1370 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1373 /* Validate -mbranch-cost= value, or provide default. */
1374 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1375 if (ix86_branch_cost_string)
1377 i = atoi (ix86_branch_cost_string);
1379 error ("-mbranch-cost=%d is not between 0 and 5", i);
1381 ix86_branch_cost = i;
1384 if (ix86_tls_dialect_string)
1386 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1387 ix86_tls_dialect = TLS_DIALECT_GNU;
1388 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1389 ix86_tls_dialect = TLS_DIALECT_SUN;
1391 error ("bad value (%s) for -mtls-dialect= switch",
1392 ix86_tls_dialect_string);
1395 /* Keep nonleaf frame pointers. */
1396 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1397 flag_omit_frame_pointer = 1;
1399 /* If we're doing fast math, we don't care about comparison order
1400 wrt NaNs. This lets us use a shorter comparison sequence. */
1401 if (flag_unsafe_math_optimizations)
1402 target_flags &= ~MASK_IEEE_FP;
1404 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1405 since the insns won't need emulation. */
1406 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1407 target_flags &= ~MASK_NO_FANCY_MATH_387;
1409 /* Turn on SSE2 builtins for -msse3. */
1411 target_flags |= MASK_SSE2;
1413 /* Turn on SSE builtins for -msse2. */
1415 target_flags |= MASK_SSE;
1419 if (TARGET_ALIGN_DOUBLE)
1420 error ("-malign-double makes no sense in the 64bit mode");
1422 error ("-mrtd calling convention not supported in the 64bit mode");
1423 /* Enable by default the SSE and MMX builtins. */
1424 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1425 ix86_fpmath = FPMATH_SSE;
1429 ix86_fpmath = FPMATH_387;
1430 /* i386 ABI does not specify red zone. It still makes sense to use it
1431 when programmer takes care to stack from being destroyed. */
1432 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1433 target_flags |= MASK_NO_RED_ZONE;
1436 if (ix86_fpmath_string != 0)
1438 if (! strcmp (ix86_fpmath_string, "387"))
1439 ix86_fpmath = FPMATH_387;
1440 else if (! strcmp (ix86_fpmath_string, "sse"))
1444 warning ("SSE instruction set disabled, using 387 arithmetics");
1445 ix86_fpmath = FPMATH_387;
1448 ix86_fpmath = FPMATH_SSE;
1450 else if (! strcmp (ix86_fpmath_string, "387,sse")
1451 || ! strcmp (ix86_fpmath_string, "sse,387"))
1455 warning ("SSE instruction set disabled, using 387 arithmetics");
1456 ix86_fpmath = FPMATH_387;
1458 else if (!TARGET_80387)
1460 warning ("387 instruction set disabled, using SSE arithmetics");
1461 ix86_fpmath = FPMATH_SSE;
1464 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1467 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1470 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1474 target_flags |= MASK_MMX;
1475 x86_prefetch_sse = true;
1478 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1481 target_flags |= MASK_MMX;
1482 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1483 extensions it adds. */
1484 if (x86_3dnow_a & (1 << ix86_arch))
1485 target_flags |= MASK_3DNOW_A;
1487 if ((x86_accumulate_outgoing_args & TUNEMASK)
1488 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1490 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1492 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1495 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1496 p = strchr (internal_label_prefix, 'X');
1497 internal_label_prefix_len = p - internal_label_prefix;
1503 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1505 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1506 make the problem with not enough registers even worse. */
1507 #ifdef INSN_SCHEDULING
1509 flag_schedule_insns = 0;
1512 /* The default values of these switches depend on the TARGET_64BIT
1513 that is not known at this moment. Mark these values with 2 and
1514 let user the to override these. In case there is no command line option
1515 specifying them, we will set the defaults in override_options. */
1517 flag_omit_frame_pointer = 2;
1518 flag_pcc_struct_return = 2;
1519 flag_asynchronous_unwind_tables = 2;
1522 /* Table of valid machine attributes. */
1523 const struct attribute_spec ix86_attribute_table[] =
1525 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1526 /* Stdcall attribute says callee is responsible for popping arguments
1527 if they are not variable. */
1528 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1529 /* Fastcall attribute says callee is responsible for popping arguments
1530 if they are not variable. */
1531 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1532 /* Cdecl attribute says the callee is a normal C declaration */
1533 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1534 /* Regparm attribute specifies how many integer arguments are to be
1535 passed in registers. */
1536 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1537 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1538 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1539 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1540 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1542 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1543 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1544 { NULL, 0, 0, false, false, false, NULL }
1547 /* Decide whether we can make a sibling call to a function. DECL is the
1548 declaration of the function being targeted by the call and EXP is the
1549 CALL_EXPR representing the call. */
1552 ix86_function_ok_for_sibcall (tree decl, tree exp)
1554 /* If we are generating position-independent code, we cannot sibcall
1555 optimize any indirect call, or a direct call to a global function,
1556 as the PLT requires %ebx be live. */
1557 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1560 /* If we are returning floats on the 80387 register stack, we cannot
1561 make a sibcall from a function that doesn't return a float to a
1562 function that does or, conversely, from a function that does return
1563 a float to a function that doesn't; the necessary stack adjustment
1564 would not be executed. */
1565 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1566 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1569 /* If this call is indirect, we'll need to be able to use a call-clobbered
1570 register for the address of the target function. Make sure that all
1571 such registers are not used for passing parameters. */
1572 if (!decl && !TARGET_64BIT)
1576 /* We're looking at the CALL_EXPR, we need the type of the function. */
1577 type = TREE_OPERAND (exp, 0); /* pointer expression */
1578 type = TREE_TYPE (type); /* pointer type */
1579 type = TREE_TYPE (type); /* function type */
1581 if (ix86_function_regparm (type, NULL) >= 3)
1583 /* ??? Need to count the actual number of registers to be used,
1584 not the possible number of registers. Fix later. */
1589 /* Otherwise okay. That also includes certain types of indirect calls. */
1593 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1594 arguments as in struct attribute_spec.handler. */
1596 ix86_handle_cdecl_attribute (tree *node, tree name,
1597 tree args ATTRIBUTE_UNUSED,
1598 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1600 if (TREE_CODE (*node) != FUNCTION_TYPE
1601 && TREE_CODE (*node) != METHOD_TYPE
1602 && TREE_CODE (*node) != FIELD_DECL
1603 && TREE_CODE (*node) != TYPE_DECL)
1605 warning ("`%s' attribute only applies to functions",
1606 IDENTIFIER_POINTER (name));
1607 *no_add_attrs = true;
1611 if (is_attribute_p ("fastcall", name))
1613 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1615 error ("fastcall and stdcall attributes are not compatible");
1617 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1619 error ("fastcall and regparm attributes are not compatible");
1622 else if (is_attribute_p ("stdcall", name))
1624 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1626 error ("fastcall and stdcall attributes are not compatible");
1633 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1634 *no_add_attrs = true;
1640 /* Handle a "regparm" attribute;
1641 arguments as in struct attribute_spec.handler. */
1643 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1644 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1646 if (TREE_CODE (*node) != FUNCTION_TYPE
1647 && TREE_CODE (*node) != METHOD_TYPE
1648 && TREE_CODE (*node) != FIELD_DECL
1649 && TREE_CODE (*node) != TYPE_DECL)
1651 warning ("`%s' attribute only applies to functions",
1652 IDENTIFIER_POINTER (name));
1653 *no_add_attrs = true;
1659 cst = TREE_VALUE (args);
1660 if (TREE_CODE (cst) != INTEGER_CST)
1662 warning ("`%s' attribute requires an integer constant argument",
1663 IDENTIFIER_POINTER (name));
1664 *no_add_attrs = true;
1666 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1668 warning ("argument to `%s' attribute larger than %d",
1669 IDENTIFIER_POINTER (name), REGPARM_MAX);
1670 *no_add_attrs = true;
1673 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1675 error ("fastcall and regparm attributes are not compatible");
1682 /* Return 0 if the attributes for two types are incompatible, 1 if they
1683 are compatible, and 2 if they are nearly compatible (which causes a
1684 warning to be generated). */
1687 ix86_comp_type_attributes (tree type1, tree type2)
1689 /* Check for mismatch of non-default calling convention. */
1690 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1692 if (TREE_CODE (type1) != FUNCTION_TYPE)
1695 /* Check for mismatched fastcall types */
1696 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1697 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1700 /* Check for mismatched return types (cdecl vs stdcall). */
1701 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1702 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1704 if (ix86_function_regparm (type1, NULL)
1705 != ix86_function_regparm (type2, NULL))
1710 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1711 DECL may be NULL when calling function indirectly
1712 or considering a libcall. */
1715 ix86_function_regparm (tree type, tree decl)
1718 int regparm = ix86_regparm;
1719 bool user_convention = false;
1723 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1726 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1727 user_convention = true;
1730 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1733 user_convention = true;
1736 /* Use register calling convention for local functions when possible. */
1737 if (!TARGET_64BIT && !user_convention && decl
1738 && flag_unit_at_a_time && !profile_flag)
1740 struct cgraph_local_info *i = cgraph_local_info (decl);
1743 /* We can't use regparm(3) for nested functions as these use
1744 static chain pointer in third argument. */
1745 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1755 /* Return true if EAX is live at the start of the function. Used by
1756 ix86_expand_prologue to determine if we need special help before
1757 calling allocate_stack_worker. */
1760 ix86_eax_live_at_start_p (void)
1762 /* Cheat. Don't bother working forward from ix86_function_regparm
1763 to the function type to whether an actual argument is located in
1764 eax. Instead just look at cfg info, which is still close enough
1765 to correct at this point. This gives false positives for broken
1766 functions that might use uninitialized data that happens to be
1767 allocated in eax, but who cares? */
1768 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1771 /* Value is the number of bytes of arguments automatically
1772 popped when returning from a subroutine call.
1773 FUNDECL is the declaration node of the function (as a tree),
1774 FUNTYPE is the data type of the function (as a tree),
1775 or for a library call it is an identifier node for the subroutine name.
1776 SIZE is the number of bytes of arguments passed on the stack.
1778 On the 80386, the RTD insn may be used to pop them if the number
1779 of args is fixed, but if the number is variable then the caller
1780 must pop them all. RTD can't be used for library calls now
1781 because the library is compiled with the Unix compiler.
1782 Use of RTD is a selectable option, since it is incompatible with
1783 standard Unix calling sequences. If the option is not selected,
1784 the caller must always pop the args.
1786 The attribute stdcall is equivalent to RTD on a per module basis. */
1789 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1791 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1793 /* Cdecl functions override -mrtd, and never pop the stack. */
1794 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1796 /* Stdcall and fastcall functions will pop the stack if not
1798 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1799 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1803 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1804 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1805 == void_type_node)))
1809 /* Lose any fake structure return argument if it is passed on the stack. */
1810 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1813 int nregs = ix86_function_regparm (funtype, fundecl);
1816 return GET_MODE_SIZE (Pmode);
1822 /* Argument support functions. */
1824 /* Return true when register may be used to pass function parameters. */
1826 ix86_function_arg_regno_p (int regno)
1830 return (regno < REGPARM_MAX
1831 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1832 if (SSE_REGNO_P (regno) && TARGET_SSE)
1834 /* RAX is used as hidden argument to va_arg functions. */
1837 for (i = 0; i < REGPARM_MAX; i++)
1838 if (regno == x86_64_int_parameter_registers[i])
1843 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1844 for a call to a function whose data type is FNTYPE.
1845 For a library call, FNTYPE is 0. */
1848 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1849 tree fntype, /* tree ptr for function decl */
1850 rtx libname, /* SYMBOL_REF of library name or 0 */
1853 static CUMULATIVE_ARGS zero_cum;
1854 tree param, next_param;
1856 if (TARGET_DEBUG_ARG)
1858 fprintf (stderr, "\ninit_cumulative_args (");
1860 fprintf (stderr, "fntype code = %s, ret code = %s",
1861 tree_code_name[(int) TREE_CODE (fntype)],
1862 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1864 fprintf (stderr, "no fntype");
1867 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1872 /* Set up the number of registers to use for passing arguments. */
1874 cum->nregs = ix86_function_regparm (fntype, fndecl);
1876 cum->nregs = ix86_regparm;
1877 cum->sse_nregs = SSE_REGPARM_MAX;
1878 cum->mmx_nregs = MMX_REGPARM_MAX;
1879 cum->warn_sse = true;
1880 cum->warn_mmx = true;
1881 cum->maybe_vaarg = false;
1883 /* Use ecx and edx registers if function has fastcall attribute */
1884 if (fntype && !TARGET_64BIT)
1886 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1894 /* Determine if this function has variable arguments. This is
1895 indicated by the last argument being 'void_type_mode' if there
1896 are no variable arguments. If there are variable arguments, then
1897 we won't pass anything in registers */
1899 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1901 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1902 param != 0; param = next_param)
1904 next_param = TREE_CHAIN (param);
1905 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1916 cum->maybe_vaarg = true;
1920 if ((!fntype && !libname)
1921 || (fntype && !TYPE_ARG_TYPES (fntype)))
1922 cum->maybe_vaarg = 1;
1924 if (TARGET_DEBUG_ARG)
1925 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1930 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1931 of this code is to classify each 8bytes of incoming argument by the register
1932 class and assign registers accordingly. */
1934 /* Return the union class of CLASS1 and CLASS2.
1935 See the x86-64 PS ABI for details. */
1937 static enum x86_64_reg_class
1938 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1940 /* Rule #1: If both classes are equal, this is the resulting class. */
1941 if (class1 == class2)
1944 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1946 if (class1 == X86_64_NO_CLASS)
1948 if (class2 == X86_64_NO_CLASS)
1951 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1952 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1953 return X86_64_MEMORY_CLASS;
1955 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1956 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1957 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1958 return X86_64_INTEGERSI_CLASS;
1959 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1960 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1961 return X86_64_INTEGER_CLASS;
1963 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1964 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1965 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1966 return X86_64_MEMORY_CLASS;
1968 /* Rule #6: Otherwise class SSE is used. */
1969 return X86_64_SSE_CLASS;
1972 /* Classify the argument of type TYPE and mode MODE.
1973 CLASSES will be filled by the register class used to pass each word
1974 of the operand. The number of words is returned. In case the parameter
1975 should be passed in memory, 0 is returned. As a special case for zero
1976 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1978 BIT_OFFSET is used internally for handling records and specifies offset
1979 of the offset in bits modulo 256 to avoid overflow cases.
1981 See the x86-64 PS ABI for details.
1985 classify_argument (enum machine_mode mode, tree type,
1986 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1988 HOST_WIDE_INT bytes =
1989 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1990 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1992 /* Variable sized entities are always passed/returned in memory. */
1996 if (mode != VOIDmode
1997 && MUST_PASS_IN_STACK (mode, type))
2000 if (type && AGGREGATE_TYPE_P (type))
2004 enum x86_64_reg_class subclasses[MAX_CLASSES];
2006 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2010 for (i = 0; i < words; i++)
2011 classes[i] = X86_64_NO_CLASS;
2013 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2014 signalize memory class, so handle it as special case. */
2017 classes[0] = X86_64_NO_CLASS;
2021 /* Classify each field of record and merge classes. */
2022 if (TREE_CODE (type) == RECORD_TYPE)
2024 /* For classes first merge in the field of the subclasses. */
2025 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2027 tree bases = TYPE_BINFO_BASETYPES (type);
2028 int n_bases = TREE_VEC_LENGTH (bases);
2031 for (i = 0; i < n_bases; ++i)
2033 tree binfo = TREE_VEC_ELT (bases, i);
2035 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2036 tree type = BINFO_TYPE (binfo);
2038 num = classify_argument (TYPE_MODE (type),
2040 (offset + bit_offset) % 256);
2043 for (i = 0; i < num; i++)
2045 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2047 merge_classes (subclasses[i], classes[i + pos]);
2051 /* And now merge the fields of structure. */
2052 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2054 if (TREE_CODE (field) == FIELD_DECL)
2058 /* Bitfields are always classified as integer. Handle them
2059 early, since later code would consider them to be
2060 misaligned integers. */
2061 if (DECL_BIT_FIELD (field))
2063 for (i = int_bit_position (field) / 8 / 8;
2064 i < (int_bit_position (field)
2065 + tree_low_cst (DECL_SIZE (field), 0)
2068 merge_classes (X86_64_INTEGER_CLASS,
2073 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2074 TREE_TYPE (field), subclasses,
2075 (int_bit_position (field)
2076 + bit_offset) % 256);
2079 for (i = 0; i < num; i++)
2082 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2084 merge_classes (subclasses[i], classes[i + pos]);
2090 /* Arrays are handled as small records. */
2091 else if (TREE_CODE (type) == ARRAY_TYPE)
2094 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2095 TREE_TYPE (type), subclasses, bit_offset);
2099 /* The partial classes are now full classes. */
2100 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2101 subclasses[0] = X86_64_SSE_CLASS;
2102 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2103 subclasses[0] = X86_64_INTEGER_CLASS;
2105 for (i = 0; i < words; i++)
2106 classes[i] = subclasses[i % num];
2108 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2109 else if (TREE_CODE (type) == UNION_TYPE
2110 || TREE_CODE (type) == QUAL_UNION_TYPE)
2112 /* For classes first merge in the field of the subclasses. */
2113 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2115 tree bases = TYPE_BINFO_BASETYPES (type);
2116 int n_bases = TREE_VEC_LENGTH (bases);
2119 for (i = 0; i < n_bases; ++i)
2121 tree binfo = TREE_VEC_ELT (bases, i);
2123 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2124 tree type = BINFO_TYPE (binfo);
2126 num = classify_argument (TYPE_MODE (type),
2128 (offset + (bit_offset % 64)) % 256);
2131 for (i = 0; i < num; i++)
2133 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2135 merge_classes (subclasses[i], classes[i + pos]);
2139 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2141 if (TREE_CODE (field) == FIELD_DECL)
2144 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2145 TREE_TYPE (field), subclasses,
2149 for (i = 0; i < num; i++)
2150 classes[i] = merge_classes (subclasses[i], classes[i]);
2154 else if (TREE_CODE (type) == SET_TYPE)
2158 classes[0] = X86_64_INTEGERSI_CLASS;
2161 else if (bytes <= 8)
2163 classes[0] = X86_64_INTEGER_CLASS;
2166 else if (bytes <= 12)
2168 classes[0] = X86_64_INTEGER_CLASS;
2169 classes[1] = X86_64_INTEGERSI_CLASS;
2174 classes[0] = X86_64_INTEGER_CLASS;
2175 classes[1] = X86_64_INTEGER_CLASS;
2182 /* Final merger cleanup. */
2183 for (i = 0; i < words; i++)
2185 /* If one class is MEMORY, everything should be passed in
2187 if (classes[i] == X86_64_MEMORY_CLASS)
2190 /* The X86_64_SSEUP_CLASS should be always preceded by
2191 X86_64_SSE_CLASS. */
2192 if (classes[i] == X86_64_SSEUP_CLASS
2193 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2194 classes[i] = X86_64_SSE_CLASS;
2196 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2197 if (classes[i] == X86_64_X87UP_CLASS
2198 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2199 classes[i] = X86_64_SSE_CLASS;
2204 /* Compute alignment needed. We align all types to natural boundaries with
2205 exception of XFmode that is aligned to 64bits. */
2206 if (mode != VOIDmode && mode != BLKmode)
2208 int mode_alignment = GET_MODE_BITSIZE (mode);
2211 mode_alignment = 128;
2212 else if (mode == XCmode)
2213 mode_alignment = 256;
2214 if (COMPLEX_MODE_P (mode))
2215 mode_alignment /= 2;
2216 /* Misaligned fields are always returned in memory. */
2217 if (bit_offset % mode_alignment)
2221 /* Classification of atomic types. */
2231 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2232 classes[0] = X86_64_INTEGERSI_CLASS;
2234 classes[0] = X86_64_INTEGER_CLASS;
2238 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2241 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2242 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2245 if (!(bit_offset % 64))
2246 classes[0] = X86_64_SSESF_CLASS;
2248 classes[0] = X86_64_SSE_CLASS;
2251 classes[0] = X86_64_SSEDF_CLASS;
2254 classes[0] = X86_64_X87_CLASS;
2255 classes[1] = X86_64_X87UP_CLASS;
2261 classes[0] = X86_64_X87_CLASS;
2262 classes[1] = X86_64_X87UP_CLASS;
2263 classes[2] = X86_64_X87_CLASS;
2264 classes[3] = X86_64_X87UP_CLASS;
2267 classes[0] = X86_64_SSEDF_CLASS;
2268 classes[1] = X86_64_SSEDF_CLASS;
2271 classes[0] = X86_64_SSE_CLASS;
2279 classes[0] = X86_64_SSE_CLASS;
2280 classes[1] = X86_64_SSEUP_CLASS;
2295 /* Examine the argument and return set number of register required in each
2296 class. Return 0 iff parameter should be passed in memory. */
2298 examine_argument (enum machine_mode mode, tree type, int in_return,
2299 int *int_nregs, int *sse_nregs)
2301 enum x86_64_reg_class class[MAX_CLASSES];
2302 int n = classify_argument (mode, type, class, 0);
2308 for (n--; n >= 0; n--)
2311 case X86_64_INTEGER_CLASS:
2312 case X86_64_INTEGERSI_CLASS:
2315 case X86_64_SSE_CLASS:
2316 case X86_64_SSESF_CLASS:
2317 case X86_64_SSEDF_CLASS:
2320 case X86_64_NO_CLASS:
2321 case X86_64_SSEUP_CLASS:
2323 case X86_64_X87_CLASS:
2324 case X86_64_X87UP_CLASS:
2328 case X86_64_MEMORY_CLASS:
2333 /* Construct container for the argument used by GCC interface. See
2334 FUNCTION_ARG for the detailed description. */
2336 construct_container (enum machine_mode mode, tree type, int in_return,
2337 int nintregs, int nsseregs, const int * intreg,
2340 enum machine_mode tmpmode;
2342 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2343 enum x86_64_reg_class class[MAX_CLASSES];
2347 int needed_sseregs, needed_intregs;
2348 rtx exp[MAX_CLASSES];
2351 n = classify_argument (mode, type, class, 0);
2352 if (TARGET_DEBUG_ARG)
2355 fprintf (stderr, "Memory class\n");
2358 fprintf (stderr, "Classes:");
2359 for (i = 0; i < n; i++)
2361 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2363 fprintf (stderr, "\n");
2368 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2370 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2373 /* First construct simple cases. Avoid SCmode, since we want to use
2374 single register to pass this type. */
2375 if (n == 1 && mode != SCmode)
2378 case X86_64_INTEGER_CLASS:
2379 case X86_64_INTEGERSI_CLASS:
2380 return gen_rtx_REG (mode, intreg[0]);
2381 case X86_64_SSE_CLASS:
2382 case X86_64_SSESF_CLASS:
2383 case X86_64_SSEDF_CLASS:
2384 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2385 case X86_64_X87_CLASS:
2386 return gen_rtx_REG (mode, FIRST_STACK_REG);
2387 case X86_64_NO_CLASS:
2388 /* Zero sized array, struct or class. */
2393 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2395 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2397 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2398 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2399 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2400 && class[1] == X86_64_INTEGER_CLASS
2401 && (mode == CDImode || mode == TImode || mode == TFmode)
2402 && intreg[0] + 1 == intreg[1])
2403 return gen_rtx_REG (mode, intreg[0]);
2405 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2406 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2408 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2410 /* Otherwise figure out the entries of the PARALLEL. */
2411 for (i = 0; i < n; i++)
2415 case X86_64_NO_CLASS:
2417 case X86_64_INTEGER_CLASS:
2418 case X86_64_INTEGERSI_CLASS:
2419 /* Merge TImodes on aligned occasions here too. */
2420 if (i * 8 + 8 > bytes)
2421 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2422 else if (class[i] == X86_64_INTEGERSI_CLASS)
2426 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2427 if (tmpmode == BLKmode)
2429 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2430 gen_rtx_REG (tmpmode, *intreg),
2434 case X86_64_SSESF_CLASS:
2435 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2436 gen_rtx_REG (SFmode,
2437 SSE_REGNO (sse_regno)),
2441 case X86_64_SSEDF_CLASS:
2442 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2443 gen_rtx_REG (DFmode,
2444 SSE_REGNO (sse_regno)),
2448 case X86_64_SSE_CLASS:
2449 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2453 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2454 gen_rtx_REG (tmpmode,
2455 SSE_REGNO (sse_regno)),
2457 if (tmpmode == TImode)
2465 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2466 for (i = 0; i < nexps; i++)
2467 XVECEXP (ret, 0, i) = exp [i];
2471 /* Update the data in CUM to advance over an argument
2472 of mode MODE and data type TYPE.
2473 (TYPE is null for libcalls where that information may not be available.) */
2476 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2477 enum machine_mode mode, /* current arg mode */
2478 tree type, /* type of the argument or 0 if lib support */
2479 int named) /* whether or not the argument was named */
2482 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2483 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2485 if (TARGET_DEBUG_ARG)
2487 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2488 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2491 int int_nregs, sse_nregs;
2492 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2493 cum->words += words;
2494 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2496 cum->nregs -= int_nregs;
2497 cum->sse_nregs -= sse_nregs;
2498 cum->regno += int_nregs;
2499 cum->sse_regno += sse_nregs;
2502 cum->words += words;
2506 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2507 && (!type || !AGGREGATE_TYPE_P (type)))
2509 cum->sse_words += words;
2510 cum->sse_nregs -= 1;
2511 cum->sse_regno += 1;
2512 if (cum->sse_nregs <= 0)
2518 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2519 && (!type || !AGGREGATE_TYPE_P (type)))
2521 cum->mmx_words += words;
2522 cum->mmx_nregs -= 1;
2523 cum->mmx_regno += 1;
2524 if (cum->mmx_nregs <= 0)
2532 cum->words += words;
2533 cum->nregs -= words;
2534 cum->regno += words;
2536 if (cum->nregs <= 0)
2546 /* A subroutine of function_arg. We want to pass a parameter whose nominal
2547 type is MODE in REGNO. We try to minimize ABI variation, so MODE may not
2548 actually be valid for REGNO with the current ISA. In this case, ALT_MODE
2549 is used instead. It must be the same size as MODE, and must be known to
2550 be valid for REGNO. Finally, ORIG_MODE is the original mode of the
2551 parameter, as seen by the type system. This may be different from MODE
2552 when we're mucking with things minimizing ABI variations.
2554 Returns a REG or a PARALLEL as appropriate. */
2557 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
2558 enum machine_mode orig_mode, unsigned int regno)
2562 if (HARD_REGNO_MODE_OK (regno, mode))
2563 tmp = gen_rtx_REG (mode, regno);
2566 tmp = gen_rtx_REG (alt_mode, regno);
2567 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2568 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2574 /* Define where to put the arguments to a function.
2575 Value is zero to push the argument on the stack,
2576 or a hard register in which to store the argument.
2578 MODE is the argument's machine mode.
2579 TYPE is the data type of the argument (as a tree).
2580 This is null for libcalls where that information may
2582 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2583 the preceding args and about the function being called.
2584 NAMED is nonzero if this argument is a named parameter
2585 (otherwise it is an extra parameter matching an ellipsis). */
2588 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2589 tree type, int named)
2591 enum machine_mode mode = orig_mode;
2594 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2595 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2596 static bool warnedsse, warnedmmx;
2598 /* Handle a hidden AL argument containing number of registers for varargs
2599 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2601 if (mode == VOIDmode)
2604 return GEN_INT (cum->maybe_vaarg
2605 ? (cum->sse_nregs < 0
2613 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2614 &x86_64_int_parameter_registers [cum->regno],
2619 /* For now, pass fp/complex values on the stack. */
2631 if (words <= cum->nregs)
2633 int regno = cum->regno;
2635 /* Fastcall allocates the first two DWORD (SImode) or
2636 smaller arguments to ECX and EDX. */
2639 if (mode == BLKmode || mode == DImode)
2642 /* ECX not EAX is the first allocated register. */
2646 ret = gen_rtx_REG (mode, regno);
2656 if (!type || !AGGREGATE_TYPE_P (type))
2658 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2661 warning ("SSE vector argument without SSE enabled "
2665 ret = gen_reg_or_parallel (mode, TImode, orig_mode,
2666 cum->sse_regno + FIRST_SSE_REG);
2673 if (!type || !AGGREGATE_TYPE_P (type))
2675 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2678 warning ("MMX vector argument without MMX enabled "
2682 ret = gen_reg_or_parallel (mode, DImode, orig_mode,
2683 cum->mmx_regno + FIRST_MMX_REG);
2688 if (TARGET_DEBUG_ARG)
2691 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2692 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2695 print_simple_rtl (stderr, ret);
2697 fprintf (stderr, ", stack");
2699 fprintf (stderr, " )\n");
2705 /* A C expression that indicates when an argument must be passed by
2706 reference. If nonzero for an argument, a copy of that argument is
2707 made in memory and a pointer to the argument is passed instead of
2708 the argument itself. The pointer is passed in whatever way is
2709 appropriate for passing a pointer to that type. */
2712 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2713 enum machine_mode mode ATTRIBUTE_UNUSED,
2714 tree type, int named ATTRIBUTE_UNUSED)
2719 if (type && int_size_in_bytes (type) == -1)
2721 if (TARGET_DEBUG_ARG)
2722 fprintf (stderr, "function_arg_pass_by_reference\n");
2729 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2732 contains_128bit_aligned_vector_p (tree type)
2734 enum machine_mode mode = TYPE_MODE (type);
2735 if (SSE_REG_MODE_P (mode)
2736 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2738 if (TYPE_ALIGN (type) < 128)
2741 if (AGGREGATE_TYPE_P (type))
2743 /* Walk the aggregates recursively. */
2744 if (TREE_CODE (type) == RECORD_TYPE
2745 || TREE_CODE (type) == UNION_TYPE
2746 || TREE_CODE (type) == QUAL_UNION_TYPE)
2750 if (TYPE_BINFO (type) != NULL
2751 && TYPE_BINFO_BASETYPES (type) != NULL)
2753 tree bases = TYPE_BINFO_BASETYPES (type);
2754 int n_bases = TREE_VEC_LENGTH (bases);
2757 for (i = 0; i < n_bases; ++i)
2759 tree binfo = TREE_VEC_ELT (bases, i);
2760 tree type = BINFO_TYPE (binfo);
2762 if (contains_128bit_aligned_vector_p (type))
2766 /* And now merge the fields of structure. */
2767 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2769 if (TREE_CODE (field) == FIELD_DECL
2770 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2774 /* Just for use if some languages passes arrays by value. */
2775 else if (TREE_CODE (type) == ARRAY_TYPE)
2777 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2786 /* Gives the alignment boundary, in bits, of an argument with the
2787 specified mode and type. */
2790 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2794 align = TYPE_ALIGN (type);
2796 align = GET_MODE_ALIGNMENT (mode);
2797 if (align < PARM_BOUNDARY)
2798 align = PARM_BOUNDARY;
2801 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2802 make an exception for SSE modes since these require 128bit
2805 The handling here differs from field_alignment. ICC aligns MMX
2806 arguments to 4 byte boundaries, while structure fields are aligned
2807 to 8 byte boundaries. */
2810 if (!SSE_REG_MODE_P (mode))
2811 align = PARM_BOUNDARY;
2815 if (!contains_128bit_aligned_vector_p (type))
2816 align = PARM_BOUNDARY;
2824 /* Return true if N is a possible register number of function value. */
2826 ix86_function_value_regno_p (int regno)
2830 return ((regno) == 0
2831 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2832 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2834 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2835 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2836 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2839 /* Define how to find the value returned by a function.
2840 VALTYPE is the data type of the value (as a tree).
2841 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2842 otherwise, FUNC is 0. */
2844 ix86_function_value (tree valtype)
2848 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2849 REGPARM_MAX, SSE_REGPARM_MAX,
2850 x86_64_int_return_registers, 0);
2851 /* For zero sized structures, construct_container return NULL, but we need
2852 to keep rest of compiler happy by returning meaningful value. */
2854 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2858 return gen_rtx_REG (TYPE_MODE (valtype),
2859 ix86_value_regno (TYPE_MODE (valtype)));
2862 /* Return false iff type is returned in memory. */
2864 ix86_return_in_memory (tree type)
2866 int needed_intregs, needed_sseregs, size;
2867 enum machine_mode mode = TYPE_MODE (type);
2870 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2872 if (mode == BLKmode)
2875 size = int_size_in_bytes (type);
2877 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2880 if (VECTOR_MODE_P (mode) || mode == TImode)
2882 /* User-created vectors small enough to fit in EAX. */
2886 /* MMX/3dNow values are returned on the stack, since we've
2887 got to EMMS/FEMMS before returning. */
2891 /* SSE values are returned in XMM0. */
2892 /* ??? Except when it doesn't exist? We have a choice of
2893 either (1) being abi incompatible with a -march switch,
2894 or (2) generating an error here. Given no good solution,
2895 I think the safest thing is one warning. The user won't
2896 be able to use -Werror, but.... */
2907 warning ("SSE vector return without SSE enabled "
2922 /* Define how to find the value returned by a library function
2923 assuming the value has mode MODE. */
2925 ix86_libcall_value (enum machine_mode mode)
2935 return gen_rtx_REG (mode, FIRST_SSE_REG);
2938 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2943 return gen_rtx_REG (mode, 0);
2947 return gen_rtx_REG (mode, ix86_value_regno (mode));
2950 /* Given a mode, return the register to use for a return value. */
2953 ix86_value_regno (enum machine_mode mode)
2955 /* Floating point return values in %st(0). */
2956 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2957 return FIRST_FLOAT_REG;
2958 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2959 we prevent this case when sse is not available. */
2960 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2961 return FIRST_SSE_REG;
2962 /* Everything else in %eax. */
2966 /* Create the va_list data type. */
2969 ix86_build_builtin_va_list (void)
2971 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2973 /* For i386 we use plain pointer to argument area. */
2975 return build_pointer_type (char_type_node);
2977 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2978 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2980 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2981 unsigned_type_node);
2982 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2983 unsigned_type_node);
2984 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2986 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2989 DECL_FIELD_CONTEXT (f_gpr) = record;
2990 DECL_FIELD_CONTEXT (f_fpr) = record;
2991 DECL_FIELD_CONTEXT (f_ovf) = record;
2992 DECL_FIELD_CONTEXT (f_sav) = record;
2994 TREE_CHAIN (record) = type_decl;
2995 TYPE_NAME (record) = type_decl;
2996 TYPE_FIELDS (record) = f_gpr;
2997 TREE_CHAIN (f_gpr) = f_fpr;
2998 TREE_CHAIN (f_fpr) = f_ovf;
2999 TREE_CHAIN (f_ovf) = f_sav;
3001 layout_type (record);
3003 /* The correct type is an array type of one element. */
3004 return build_array_type (record, build_index_type (size_zero_node));
3007 /* Perform any needed actions needed for a function that is receiving a
3008 variable number of arguments.
3012 MODE and TYPE are the mode and type of the current parameter.
3014 PRETEND_SIZE is a variable that should be set to the amount of stack
3015 that must be pushed by the prolog to pretend that our caller pushed
3018 Normally, this macro will push all remaining incoming registers on the
3019 stack and set PRETEND_SIZE to the length of the registers pushed. */
3022 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3023 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3026 CUMULATIVE_ARGS next_cum;
3027 rtx save_area = NULL_RTX, mem;
3040 /* Indicate to allocate space on the stack for varargs save area. */
3041 ix86_save_varrargs_registers = 1;
3043 cfun->stack_alignment_needed = 128;
3045 fntype = TREE_TYPE (current_function_decl);
3046 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3047 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3048 != void_type_node));
3050 /* For varargs, we do not want to skip the dummy va_dcl argument.
3051 For stdargs, we do want to skip the last named argument. */
3054 function_arg_advance (&next_cum, mode, type, 1);
3057 save_area = frame_pointer_rtx;
3059 set = get_varargs_alias_set ();
3061 for (i = next_cum.regno; i < ix86_regparm; i++)
3063 mem = gen_rtx_MEM (Pmode,
3064 plus_constant (save_area, i * UNITS_PER_WORD));
3065 set_mem_alias_set (mem, set);
3066 emit_move_insn (mem, gen_rtx_REG (Pmode,
3067 x86_64_int_parameter_registers[i]));
3070 if (next_cum.sse_nregs)
3072 /* Now emit code to save SSE registers. The AX parameter contains number
3073 of SSE parameter registers used to call this function. We use
3074 sse_prologue_save insn template that produces computed jump across
3075 SSE saves. We need some preparation work to get this working. */
3077 label = gen_label_rtx ();
3078 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3080 /* Compute address to jump to :
3081 label - 5*eax + nnamed_sse_arguments*5 */
3082 tmp_reg = gen_reg_rtx (Pmode);
3083 nsse_reg = gen_reg_rtx (Pmode);
3084 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3085 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3086 gen_rtx_MULT (Pmode, nsse_reg,
3088 if (next_cum.sse_regno)
3091 gen_rtx_CONST (DImode,
3092 gen_rtx_PLUS (DImode,
3094 GEN_INT (next_cum.sse_regno * 4))));
3096 emit_move_insn (nsse_reg, label_ref);
3097 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3099 /* Compute address of memory block we save into. We always use pointer
3100 pointing 127 bytes after first byte to store - this is needed to keep
3101 instruction size limited by 4 bytes. */
3102 tmp_reg = gen_reg_rtx (Pmode);
3103 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3104 plus_constant (save_area,
3105 8 * REGPARM_MAX + 127)));
3106 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3107 set_mem_alias_set (mem, set);
3108 set_mem_align (mem, BITS_PER_WORD);
3110 /* And finally do the dirty job! */
3111 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3112 GEN_INT (next_cum.sse_regno), label));
3117 /* Implement va_start. */
3120 ix86_va_start (tree valist, rtx nextarg)
3122 HOST_WIDE_INT words, n_gpr, n_fpr;
3123 tree f_gpr, f_fpr, f_ovf, f_sav;
3124 tree gpr, fpr, ovf, sav, t;
3126 /* Only 64bit target needs something special. */
3129 std_expand_builtin_va_start (valist, nextarg);
3133 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3134 f_fpr = TREE_CHAIN (f_gpr);
3135 f_ovf = TREE_CHAIN (f_fpr);
3136 f_sav = TREE_CHAIN (f_ovf);
3138 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3139 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3140 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3141 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3142 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3144 /* Count number of gp and fp argument registers used. */
3145 words = current_function_args_info.words;
3146 n_gpr = current_function_args_info.regno;
3147 n_fpr = current_function_args_info.sse_regno;
3149 if (TARGET_DEBUG_ARG)
3150 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3151 (int) words, (int) n_gpr, (int) n_fpr);
3153 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3154 build_int_2 (n_gpr * 8, 0));
3155 TREE_SIDE_EFFECTS (t) = 1;
3156 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3158 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3159 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3160 TREE_SIDE_EFFECTS (t) = 1;
3161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3163 /* Find the overflow area. */
3164 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3166 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3167 build_int_2 (words * UNITS_PER_WORD, 0));
3168 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3169 TREE_SIDE_EFFECTS (t) = 1;
3170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3172 /* Find the register save area.
3173 Prologue of the function save it right above stack frame. */
3174 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3175 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3176 TREE_SIDE_EFFECTS (t) = 1;
3177 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3180 /* Implement va_arg. */
3182 ix86_va_arg (tree valist, tree type)
3184 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3185 tree f_gpr, f_fpr, f_ovf, f_sav;
3186 tree gpr, fpr, ovf, sav, t;
3188 rtx lab_false, lab_over = NULL_RTX;
3193 /* Only 64bit target needs something special. */
3196 return std_expand_builtin_va_arg (valist, type);
3199 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3200 f_fpr = TREE_CHAIN (f_gpr);
3201 f_ovf = TREE_CHAIN (f_fpr);
3202 f_sav = TREE_CHAIN (f_ovf);
3204 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3205 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3206 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3207 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3208 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3210 size = int_size_in_bytes (type);
3213 /* Passed by reference. */
3215 type = build_pointer_type (type);
3216 size = int_size_in_bytes (type);
3218 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3220 container = construct_container (TYPE_MODE (type), type, 0,
3221 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3223 * Pull the value out of the saved registers ...
3226 addr_rtx = gen_reg_rtx (Pmode);
3230 rtx int_addr_rtx, sse_addr_rtx;
3231 int needed_intregs, needed_sseregs;
3234 lab_over = gen_label_rtx ();
3235 lab_false = gen_label_rtx ();
3237 examine_argument (TYPE_MODE (type), type, 0,
3238 &needed_intregs, &needed_sseregs);
3241 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3242 || TYPE_ALIGN (type) > 128);
3244 /* In case we are passing structure, verify that it is consecutive block
3245 on the register save area. If not we need to do moves. */
3246 if (!need_temp && !REG_P (container))
3248 /* Verify that all registers are strictly consecutive */
3249 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3253 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3255 rtx slot = XVECEXP (container, 0, i);
3256 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3257 || INTVAL (XEXP (slot, 1)) != i * 16)
3265 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3267 rtx slot = XVECEXP (container, 0, i);
3268 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3269 || INTVAL (XEXP (slot, 1)) != i * 8)
3276 int_addr_rtx = addr_rtx;
3277 sse_addr_rtx = addr_rtx;
3281 int_addr_rtx = gen_reg_rtx (Pmode);
3282 sse_addr_rtx = gen_reg_rtx (Pmode);
3284 /* First ensure that we fit completely in registers. */
3287 emit_cmp_and_jump_insns (expand_expr
3288 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3289 GEN_INT ((REGPARM_MAX - needed_intregs +
3290 1) * 8), GE, const1_rtx, SImode,
3295 emit_cmp_and_jump_insns (expand_expr
3296 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3297 GEN_INT ((SSE_REGPARM_MAX -
3298 needed_sseregs + 1) * 16 +
3299 REGPARM_MAX * 8), GE, const1_rtx,
3300 SImode, 1, lab_false);
3303 /* Compute index to start of area used for integer regs. */
3306 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3307 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3308 if (r != int_addr_rtx)
3309 emit_move_insn (int_addr_rtx, r);
3313 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3314 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3315 if (r != sse_addr_rtx)
3316 emit_move_insn (sse_addr_rtx, r);
3324 /* Never use the memory itself, as it has the alias set. */
3325 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3326 mem = gen_rtx_MEM (BLKmode, x);
3327 force_operand (x, addr_rtx);
3328 set_mem_alias_set (mem, get_varargs_alias_set ());
3329 set_mem_align (mem, BITS_PER_UNIT);
3331 for (i = 0; i < XVECLEN (container, 0); i++)
3333 rtx slot = XVECEXP (container, 0, i);
3334 rtx reg = XEXP (slot, 0);
3335 enum machine_mode mode = GET_MODE (reg);
3341 if (SSE_REGNO_P (REGNO (reg)))
3343 src_addr = sse_addr_rtx;
3344 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3348 src_addr = int_addr_rtx;
3349 src_offset = REGNO (reg) * 8;
3351 src_mem = gen_rtx_MEM (mode, src_addr);
3352 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3353 src_mem = adjust_address (src_mem, mode, src_offset);
3354 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3355 emit_move_insn (dest_mem, src_mem);
3362 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3363 build_int_2 (needed_intregs * 8, 0));
3364 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3365 TREE_SIDE_EFFECTS (t) = 1;
3366 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3371 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3372 build_int_2 (needed_sseregs * 16, 0));
3373 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3374 TREE_SIDE_EFFECTS (t) = 1;
3375 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3378 emit_jump_insn (gen_jump (lab_over));
3380 emit_label (lab_false);
3383 /* ... otherwise out of the overflow area. */
3385 /* Care for on-stack alignment if needed. */
3386 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3390 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3391 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3392 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3396 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3398 emit_move_insn (addr_rtx, r);
3401 build (PLUS_EXPR, TREE_TYPE (t), t,
3402 build_int_2 (rsize * UNITS_PER_WORD, 0));
3403 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3404 TREE_SIDE_EFFECTS (t) = 1;
3405 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3408 emit_label (lab_over);
3412 r = gen_rtx_MEM (Pmode, addr_rtx);
3413 set_mem_alias_set (r, get_varargs_alias_set ());
3414 emit_move_insn (addr_rtx, r);
3420 /* Return nonzero if OP is either a i387 or SSE fp register. */
3422 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3424 return ANY_FP_REG_P (op);
3427 /* Return nonzero if OP is an i387 fp register. */
3429 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3431 return FP_REG_P (op);
3434 /* Return nonzero if OP is a non-fp register_operand. */
3436 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3438 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3441 /* Return nonzero if OP is a register operand other than an
3442 i387 fp register. */
3444 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3446 return register_operand (op, mode) && !FP_REG_P (op);
3449 /* Return nonzero if OP is general operand representable on x86_64. */
3452 x86_64_general_operand (rtx op, enum machine_mode mode)
3455 return general_operand (op, mode);
3456 if (nonimmediate_operand (op, mode))
3458 return x86_64_sign_extended_value (op);
3461 /* Return nonzero if OP is general operand representable on x86_64
3462 as either sign extended or zero extended constant. */
3465 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3468 return general_operand (op, mode);
3469 if (nonimmediate_operand (op, mode))
3471 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3474 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3477 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3480 return nonmemory_operand (op, mode);
3481 if (register_operand (op, mode))
3483 return x86_64_sign_extended_value (op);
3486 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3489 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3491 if (!TARGET_64BIT || !flag_pic)
3492 return nonmemory_operand (op, mode);
3493 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3495 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3500 /* Return nonzero if OPNUM's MEM should be matched
3501 in movabs* patterns. */
3504 ix86_check_movabs (rtx insn, int opnum)
3508 set = PATTERN (insn);
3509 if (GET_CODE (set) == PARALLEL)
3510 set = XVECEXP (set, 0, 0);
3511 if (GET_CODE (set) != SET)
3513 mem = XEXP (set, opnum);
3514 while (GET_CODE (mem) == SUBREG)
3515 mem = SUBREG_REG (mem);
3516 if (GET_CODE (mem) != MEM)
3518 return (volatile_ok || !MEM_VOLATILE_P (mem));
3521 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3524 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3527 return nonmemory_operand (op, mode);
3528 if (register_operand (op, mode))
3530 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3533 /* Return nonzero if OP is immediate operand representable on x86_64. */
3536 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3539 return immediate_operand (op, mode);
3540 return x86_64_sign_extended_value (op);
3543 /* Return nonzero if OP is immediate operand representable on x86_64. */
3546 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3548 return x86_64_zero_extended_value (op);
3551 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3552 for shift & compare patterns, as shifting by 0 does not change flags),
3553 else return zero. */
3556 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3558 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3561 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3562 reference and a constant. */
3565 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3567 switch (GET_CODE (op))
3575 if (GET_CODE (op) == SYMBOL_REF
3576 || GET_CODE (op) == LABEL_REF
3577 || (GET_CODE (op) == UNSPEC
3578 && (XINT (op, 1) == UNSPEC_GOT
3579 || XINT (op, 1) == UNSPEC_GOTOFF
3580 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3582 if (GET_CODE (op) != PLUS
3583 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3587 if (GET_CODE (op) == SYMBOL_REF
3588 || GET_CODE (op) == LABEL_REF)
3590 /* Only @GOTOFF gets offsets. */
3591 if (GET_CODE (op) != UNSPEC
3592 || XINT (op, 1) != UNSPEC_GOTOFF)
3595 op = XVECEXP (op, 0, 0);
3596 if (GET_CODE (op) == SYMBOL_REF
3597 || GET_CODE (op) == LABEL_REF)
3606 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3609 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3611 if (GET_CODE (op) != CONST)
3616 if (GET_CODE (op) == UNSPEC
3617 && XINT (op, 1) == UNSPEC_GOTPCREL)
3619 if (GET_CODE (op) == PLUS
3620 && GET_CODE (XEXP (op, 0)) == UNSPEC
3621 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3626 if (GET_CODE (op) == UNSPEC)
3628 if (GET_CODE (op) != PLUS
3629 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3632 if (GET_CODE (op) == UNSPEC)
3638 /* Return true if OP is a symbolic operand that resolves locally. */
3641 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3643 if (GET_CODE (op) == CONST
3644 && GET_CODE (XEXP (op, 0)) == PLUS
3645 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3646 op = XEXP (XEXP (op, 0), 0);
3648 if (GET_CODE (op) == LABEL_REF)
3651 if (GET_CODE (op) != SYMBOL_REF)
3654 if (SYMBOL_REF_LOCAL_P (op))
3657 /* There is, however, a not insubstantial body of code in the rest of
3658 the compiler that assumes it can just stick the results of
3659 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3660 /* ??? This is a hack. Should update the body of the compiler to
3661 always create a DECL an invoke targetm.encode_section_info. */
3662 if (strncmp (XSTR (op, 0), internal_label_prefix,
3663 internal_label_prefix_len) == 0)
3669 /* Test for various thread-local symbols. */
3672 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3674 if (GET_CODE (op) != SYMBOL_REF)
3676 return SYMBOL_REF_TLS_MODEL (op);
3680 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3682 if (GET_CODE (op) != SYMBOL_REF)
3684 return SYMBOL_REF_TLS_MODEL (op) == kind;
3688 global_dynamic_symbolic_operand (rtx op,
3689 enum machine_mode mode ATTRIBUTE_UNUSED)
3691 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3695 local_dynamic_symbolic_operand (rtx op,
3696 enum machine_mode mode ATTRIBUTE_UNUSED)
3698 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3702 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3704 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3708 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3710 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3713 /* Test for a valid operand for a call instruction. Don't allow the
3714 arg pointer register or virtual regs since they may decay into
3715 reg + const, which the patterns can't handle. */
3718 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3720 /* Disallow indirect through a virtual register. This leads to
3721 compiler aborts when trying to eliminate them. */
3722 if (GET_CODE (op) == REG
3723 && (op == arg_pointer_rtx
3724 || op == frame_pointer_rtx
3725 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3726 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3729 /* Disallow `call 1234'. Due to varying assembler lameness this
3730 gets either rejected or translated to `call .+1234'. */
3731 if (GET_CODE (op) == CONST_INT)
3734 /* Explicitly allow SYMBOL_REF even if pic. */
3735 if (GET_CODE (op) == SYMBOL_REF)
3738 /* Otherwise we can allow any general_operand in the address. */
3739 return general_operand (op, Pmode);
3742 /* Test for a valid operand for a call instruction. Don't allow the
3743 arg pointer register or virtual regs since they may decay into
3744 reg + const, which the patterns can't handle. */
3747 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3749 /* Disallow indirect through a virtual register. This leads to
3750 compiler aborts when trying to eliminate them. */
3751 if (GET_CODE (op) == REG
3752 && (op == arg_pointer_rtx
3753 || op == frame_pointer_rtx
3754 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3755 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3758 /* Explicitly allow SYMBOL_REF even if pic. */
3759 if (GET_CODE (op) == SYMBOL_REF)
3762 /* Otherwise we can only allow register operands. */
3763 return register_operand (op, Pmode);
3767 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3769 if (GET_CODE (op) == CONST
3770 && GET_CODE (XEXP (op, 0)) == PLUS
3771 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3772 op = XEXP (XEXP (op, 0), 0);
3773 return GET_CODE (op) == SYMBOL_REF;
3776 /* Match exactly zero and one. */
3779 const0_operand (rtx op, enum machine_mode mode)
3781 return op == CONST0_RTX (mode);
3785 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3787 return op == const1_rtx;
3790 /* Match 2, 4, or 8. Used for leal multiplicands. */
3793 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3795 return (GET_CODE (op) == CONST_INT
3796 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3800 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3802 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3806 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3808 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3812 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3814 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3818 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3820 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3824 /* True if this is a constant appropriate for an increment or decrement. */
3827 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3829 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3830 registers, since carry flag is not set. */
3831 if (TARGET_PENTIUM4 && !optimize_size)
3833 return op == const1_rtx || op == constm1_rtx;
3836 /* Return nonzero if OP is acceptable as operand of DImode shift
3840 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3843 return nonimmediate_operand (op, mode);
3845 return register_operand (op, mode);
3848 /* Return false if this is the stack pointer, or any other fake
3849 register eliminable to the stack pointer. Otherwise, this is
3852 This is used to prevent esp from being used as an index reg.
3853 Which would only happen in pathological cases. */
3856 reg_no_sp_operand (rtx op, enum machine_mode mode)
3859 if (GET_CODE (t) == SUBREG)
3861 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3864 return register_operand (op, mode);
3868 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3870 return MMX_REG_P (op);
3873 /* Return false if this is any eliminable register. Otherwise
3877 general_no_elim_operand (rtx op, enum machine_mode mode)
3880 if (GET_CODE (t) == SUBREG)
3882 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3883 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3884 || t == virtual_stack_dynamic_rtx)
3887 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3888 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3891 return general_operand (op, mode);
3894 /* Return false if this is any eliminable register. Otherwise
3895 register_operand or const_int. */
3898 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3901 if (GET_CODE (t) == SUBREG)
3903 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3904 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3905 || t == virtual_stack_dynamic_rtx)
3908 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3911 /* Return false if this is any eliminable register or stack register,
3912 otherwise work like register_operand. */
3915 index_register_operand (rtx op, enum machine_mode mode)
3918 if (GET_CODE (t) == SUBREG)
3922 if (t == arg_pointer_rtx
3923 || t == frame_pointer_rtx
3924 || t == virtual_incoming_args_rtx
3925 || t == virtual_stack_vars_rtx
3926 || t == virtual_stack_dynamic_rtx
3927 || REGNO (t) == STACK_POINTER_REGNUM)
3930 return general_operand (op, mode);
3933 /* Return true if op is a Q_REGS class register. */
3936 q_regs_operand (rtx op, enum machine_mode mode)
3938 if (mode != VOIDmode && GET_MODE (op) != mode)
3940 if (GET_CODE (op) == SUBREG)
3941 op = SUBREG_REG (op);
3942 return ANY_QI_REG_P (op);
3945 /* Return true if op is an flags register. */
3948 flags_reg_operand (rtx op, enum machine_mode mode)
3950 if (mode != VOIDmode && GET_MODE (op) != mode)
3952 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3955 /* Return true if op is a NON_Q_REGS class register. */
3958 non_q_regs_operand (rtx op, enum machine_mode mode)
3960 if (mode != VOIDmode && GET_MODE (op) != mode)
3962 if (GET_CODE (op) == SUBREG)
3963 op = SUBREG_REG (op);
3964 return NON_QI_REG_P (op);
3968 zero_extended_scalar_load_operand (rtx op,
3969 enum machine_mode mode ATTRIBUTE_UNUSED)
3972 if (GET_CODE (op) != MEM)
3974 op = maybe_get_pool_constant (op);
3977 if (GET_CODE (op) != CONST_VECTOR)
3980 (GET_MODE_SIZE (GET_MODE (op)) /
3981 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3982 for (n_elts--; n_elts > 0; n_elts--)
3984 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3985 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3991 /* Return 1 when OP is operand acceptable for standard SSE move. */
3993 vector_move_operand (rtx op, enum machine_mode mode)
3995 if (nonimmediate_operand (op, mode))
3997 if (GET_MODE (op) != mode && mode != VOIDmode)
3999 return (op == CONST0_RTX (GET_MODE (op)));
4002 /* Return true if op if a valid address, and does not contain
4003 a segment override. */
4006 no_seg_address_operand (rtx op, enum machine_mode mode)
4008 struct ix86_address parts;
4010 if (! address_operand (op, mode))
4013 if (! ix86_decompose_address (op, &parts))
4016 return parts.seg == SEG_DEFAULT;
4019 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4022 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4024 enum rtx_code code = GET_CODE (op);
4027 /* Operations supported directly. */
4037 /* These are equivalent to ones above in non-IEEE comparisons. */
4044 return !TARGET_IEEE_FP;
4049 /* Return 1 if OP is a valid comparison operator in valid mode. */
4051 ix86_comparison_operator (rtx op, enum machine_mode mode)
4053 enum machine_mode inmode;
4054 enum rtx_code code = GET_CODE (op);
4055 if (mode != VOIDmode && GET_MODE (op) != mode)
4057 if (GET_RTX_CLASS (code) != '<')
4059 inmode = GET_MODE (XEXP (op, 0));
4061 if (inmode == CCFPmode || inmode == CCFPUmode)
4063 enum rtx_code second_code, bypass_code;
4064 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4065 return (bypass_code == NIL && second_code == NIL);
4072 if (inmode == CCmode || inmode == CCGCmode
4073 || inmode == CCGOCmode || inmode == CCNOmode)
4076 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4077 if (inmode == CCmode)
4081 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4089 /* Return 1 if OP is a valid comparison operator testing carry flag
4092 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4094 enum machine_mode inmode;
4095 enum rtx_code code = GET_CODE (op);
4097 if (mode != VOIDmode && GET_MODE (op) != mode)
4099 if (GET_RTX_CLASS (code) != '<')
4101 inmode = GET_MODE (XEXP (op, 0));
4102 if (GET_CODE (XEXP (op, 0)) != REG
4103 || REGNO (XEXP (op, 0)) != 17
4104 || XEXP (op, 1) != const0_rtx)
4107 if (inmode == CCFPmode || inmode == CCFPUmode)
4109 enum rtx_code second_code, bypass_code;
4111 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4112 if (bypass_code != NIL || second_code != NIL)
4114 code = ix86_fp_compare_code_to_integer (code);
4116 else if (inmode != CCmode)
4121 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4124 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4126 enum machine_mode inmode;
4127 enum rtx_code code = GET_CODE (op);
4129 if (mode != VOIDmode && GET_MODE (op) != mode)
4131 if (GET_RTX_CLASS (code) != '<')
4133 inmode = GET_MODE (XEXP (op, 0));
4134 if (inmode == CCFPmode || inmode == CCFPUmode)
4136 enum rtx_code second_code, bypass_code;
4138 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4139 if (bypass_code != NIL || second_code != NIL)
4141 code = ix86_fp_compare_code_to_integer (code);
4143 /* i387 supports just limited amount of conditional codes. */
4146 case LTU: case GTU: case LEU: case GEU:
4147 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4150 case ORDERED: case UNORDERED:
4158 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4161 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4163 switch (GET_CODE (op))
4166 /* Modern CPUs have same latency for HImode and SImode multiply,
4167 but 386 and 486 do HImode multiply faster. */
4168 return ix86_tune > PROCESSOR_I486;
4180 /* Nearly general operand, but accept any const_double, since we wish
4181 to be able to drop them into memory rather than have them get pulled
4185 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4187 if (mode != VOIDmode && mode != GET_MODE (op))
4189 if (GET_CODE (op) == CONST_DOUBLE)
4191 return general_operand (op, mode);
4194 /* Match an SI or HImode register for a zero_extract. */
4197 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4200 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4201 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4204 if (!register_operand (op, VOIDmode))
4207 /* Be careful to accept only registers having upper parts. */
4208 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4209 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4212 /* Return 1 if this is a valid binary floating-point operation.
4213 OP is the expression matched, and MODE is its mode. */
4216 binary_fp_operator (rtx op, enum machine_mode mode)
4218 if (mode != VOIDmode && mode != GET_MODE (op))
4221 switch (GET_CODE (op))
4227 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4235 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4237 return GET_CODE (op) == MULT;
4241 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4243 return GET_CODE (op) == DIV;
4247 arith_or_logical_operator (rtx op, enum machine_mode mode)
4249 return ((mode == VOIDmode || GET_MODE (op) == mode)
4250 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4251 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4254 /* Returns 1 if OP is memory operand with a displacement. */
4257 memory_displacement_operand (rtx op, enum machine_mode mode)
4259 struct ix86_address parts;
4261 if (! memory_operand (op, mode))
4264 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4267 return parts.disp != NULL_RTX;
4270 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4271 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4273 ??? It seems likely that this will only work because cmpsi is an
4274 expander, and no actual insns use this. */
4277 cmpsi_operand (rtx op, enum machine_mode mode)
4279 if (nonimmediate_operand (op, mode))
4282 if (GET_CODE (op) == AND
4283 && GET_MODE (op) == SImode
4284 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4285 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4286 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4287 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4288 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4289 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4295 /* Returns 1 if OP is memory operand that can not be represented by the
4299 long_memory_operand (rtx op, enum machine_mode mode)
4301 if (! memory_operand (op, mode))
4304 return memory_address_length (op) != 0;
4307 /* Return nonzero if the rtx is known aligned. */
4310 aligned_operand (rtx op, enum machine_mode mode)
4312 struct ix86_address parts;
4314 if (!general_operand (op, mode))
4317 /* Registers and immediate operands are always "aligned". */
4318 if (GET_CODE (op) != MEM)
4321 /* Don't even try to do any aligned optimizations with volatiles. */
4322 if (MEM_VOLATILE_P (op))
4327 /* Pushes and pops are only valid on the stack pointer. */
4328 if (GET_CODE (op) == PRE_DEC
4329 || GET_CODE (op) == POST_INC)
4332 /* Decode the address. */
4333 if (! ix86_decompose_address (op, &parts))
4336 /* Look for some component that isn't known to be aligned. */
4340 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4345 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4350 if (GET_CODE (parts.disp) != CONST_INT
4351 || (INTVAL (parts.disp) & 3) != 0)
4355 /* Didn't find one -- this must be an aligned address. */
4360 compare_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4362 return GET_CODE (op) == COMPARE;
4365 /* Initialize the table of extra 80387 mathematical constants. */
4368 init_ext_80387_constants (void)
4370 static const char * cst[5] =
4372 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4373 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4374 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4375 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4376 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4380 for (i = 0; i < 5; i++)
4382 real_from_string (&ext_80387_constants_table[i], cst[i]);
4383 /* Ensure each constant is rounded to XFmode precision. */
4384 real_convert (&ext_80387_constants_table[i],
4385 XFmode, &ext_80387_constants_table[i]);
4388 ext_80387_constants_init = 1;
4391 /* Return true if the constant is something that can be loaded with
4392 a special instruction. */
4395 standard_80387_constant_p (rtx x)
4397 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4400 if (x == CONST0_RTX (GET_MODE (x)))
4402 if (x == CONST1_RTX (GET_MODE (x)))
4405 /* For XFmode constants, try to find a special 80387 instruction on
4406 those CPUs that benefit from them. */
4407 if (GET_MODE (x) == XFmode
4408 && x86_ext_80387_constants & TUNEMASK)
4413 if (! ext_80387_constants_init)
4414 init_ext_80387_constants ();
4416 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4417 for (i = 0; i < 5; i++)
4418 if (real_identical (&r, &ext_80387_constants_table[i]))
4425 /* Return the opcode of the special instruction to be used to load
4429 standard_80387_constant_opcode (rtx x)
4431 switch (standard_80387_constant_p (x))
4451 /* Return the CONST_DOUBLE representing the 80387 constant that is
4452 loaded by the specified special instruction. The argument IDX
4453 matches the return value from standard_80387_constant_p. */
4456 standard_80387_constant_rtx (int idx)
4460 if (! ext_80387_constants_init)
4461 init_ext_80387_constants ();
4477 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4481 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4484 standard_sse_constant_p (rtx x)
4486 if (x == const0_rtx)
4488 return (x == CONST0_RTX (GET_MODE (x)));
4491 /* Returns 1 if OP contains a symbol reference */
4494 symbolic_reference_mentioned_p (rtx op)
4499 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4502 fmt = GET_RTX_FORMAT (GET_CODE (op));
4503 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4509 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4510 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4514 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4521 /* Return 1 if it is appropriate to emit `ret' instructions in the
4522 body of a function. Do this only if the epilogue is simple, needing a
4523 couple of insns. Prior to reloading, we can't tell how many registers
4524 must be saved, so return 0 then. Return 0 if there is no frame
4525 marker to de-allocate.
4527 If NON_SAVING_SETJMP is defined and true, then it is not possible
4528 for the epilogue to be simple, so return 0. This is a special case
4529 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4530 until final, but jump_optimize may need to know sooner if a
4534 ix86_can_use_return_insn_p (void)
4536 struct ix86_frame frame;
4538 #ifdef NON_SAVING_SETJMP
4539 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4543 if (! reload_completed || frame_pointer_needed)
4546 /* Don't allow more than 32 pop, since that's all we can do
4547 with one instruction. */
4548 if (current_function_pops_args
4549 && current_function_args_size >= 32768)
4552 ix86_compute_frame_layout (&frame);
4553 return frame.to_allocate == 0 && frame.nregs == 0;
4556 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4558 x86_64_sign_extended_value (rtx value)
4560 switch (GET_CODE (value))
4562 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4563 to be at least 32 and this all acceptable constants are
4564 represented as CONST_INT. */
4566 if (HOST_BITS_PER_WIDE_INT == 32)
4570 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4571 return trunc_int_for_mode (val, SImode) == val;
4575 /* For certain code models, the symbolic references are known to fit.
4576 in CM_SMALL_PIC model we know it fits if it is local to the shared
4577 library. Don't count TLS SYMBOL_REFs here, since they should fit
4578 only if inside of UNSPEC handled below. */
4580 /* TLS symbols are not constant. */
4581 if (tls_symbolic_operand (value, Pmode))
4583 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4585 /* For certain code models, the code is near as well. */
4587 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4588 || ix86_cmodel == CM_KERNEL);
4590 /* We also may accept the offsetted memory references in certain special
4593 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4594 switch (XINT (XEXP (value, 0), 1))
4596 case UNSPEC_GOTPCREL:
4598 case UNSPEC_GOTNTPOFF:
4604 if (GET_CODE (XEXP (value, 0)) == PLUS)
4606 rtx op1 = XEXP (XEXP (value, 0), 0);
4607 rtx op2 = XEXP (XEXP (value, 0), 1);
4608 HOST_WIDE_INT offset;
4610 if (ix86_cmodel == CM_LARGE)
4612 if (GET_CODE (op2) != CONST_INT)
4614 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4615 switch (GET_CODE (op1))
4618 /* For CM_SMALL assume that latest object is 16MB before
4619 end of 31bits boundary. We may also accept pretty
4620 large negative constants knowing that all objects are
4621 in the positive half of address space. */
4622 if (ix86_cmodel == CM_SMALL
4623 && offset < 16*1024*1024
4624 && trunc_int_for_mode (offset, SImode) == offset)
4626 /* For CM_KERNEL we know that all object resist in the
4627 negative half of 32bits address space. We may not
4628 accept negative offsets, since they may be just off
4629 and we may accept pretty large positive ones. */
4630 if (ix86_cmodel == CM_KERNEL
4632 && trunc_int_for_mode (offset, SImode) == offset)
4636 /* These conditions are similar to SYMBOL_REF ones, just the
4637 constraints for code models differ. */
4638 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4639 && offset < 16*1024*1024
4640 && trunc_int_for_mode (offset, SImode) == offset)
4642 if (ix86_cmodel == CM_KERNEL
4644 && trunc_int_for_mode (offset, SImode) == offset)
4648 switch (XINT (op1, 1))
4653 && trunc_int_for_mode (offset, SImode) == offset)
4667 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4669 x86_64_zero_extended_value (rtx value)
4671 switch (GET_CODE (value))
4674 if (HOST_BITS_PER_WIDE_INT == 32)
4675 return (GET_MODE (value) == VOIDmode
4676 && !CONST_DOUBLE_HIGH (value));
4680 if (HOST_BITS_PER_WIDE_INT == 32)
4681 return INTVAL (value) >= 0;
4683 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4686 /* For certain code models, the symbolic references are known to fit. */
4688 /* TLS symbols are not constant. */
4689 if (tls_symbolic_operand (value, Pmode))
4691 return ix86_cmodel == CM_SMALL;
4693 /* For certain code models, the code is near as well. */
4695 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4697 /* We also may accept the offsetted memory references in certain special
4700 if (GET_CODE (XEXP (value, 0)) == PLUS)
4702 rtx op1 = XEXP (XEXP (value, 0), 0);
4703 rtx op2 = XEXP (XEXP (value, 0), 1);
4705 if (ix86_cmodel == CM_LARGE)
4707 switch (GET_CODE (op1))
4711 /* For small code model we may accept pretty large positive
4712 offsets, since one bit is available for free. Negative
4713 offsets are limited by the size of NULL pointer area
4714 specified by the ABI. */
4715 if (ix86_cmodel == CM_SMALL
4716 && GET_CODE (op2) == CONST_INT
4717 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4718 && (trunc_int_for_mode (INTVAL (op2), SImode)
4721 /* ??? For the kernel, we may accept adjustment of
4722 -0x10000000, since we know that it will just convert
4723 negative address space to positive, but perhaps this
4724 is not worthwhile. */
4727 /* These conditions are similar to SYMBOL_REF ones, just the
4728 constraints for code models differ. */
4729 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4730 && GET_CODE (op2) == CONST_INT
4731 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4732 && (trunc_int_for_mode (INTVAL (op2), SImode)
4746 /* Value should be nonzero if functions must have frame pointers.
4747 Zero means the frame pointer need not be set up (and parms may
4748 be accessed via the stack pointer) in functions that seem suitable. */
4751 ix86_frame_pointer_required (void)
4753 /* If we accessed previous frames, then the generated code expects
4754 to be able to access the saved ebp value in our frame. */
4755 if (cfun->machine->accesses_prev_frame)
4758 /* Several x86 os'es need a frame pointer for other reasons,
4759 usually pertaining to setjmp. */
4760 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4763 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4764 the frame pointer by default. Turn it back on now if we've not
4765 got a leaf function. */
4766 if (TARGET_OMIT_LEAF_FRAME_POINTER
4767 && (!current_function_is_leaf))
4770 if (current_function_profile)
4776 /* Record that the current function accesses previous call frames. */
4779 ix86_setup_frame_addresses (void)
4781 cfun->machine->accesses_prev_frame = 1;
4784 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4785 # define USE_HIDDEN_LINKONCE 1
4787 # define USE_HIDDEN_LINKONCE 0
4790 static int pic_labels_used;
4792 /* Fills in the label name that should be used for a pc thunk for
4793 the given register. */
4796 get_pc_thunk_name (char name[32], unsigned int regno)
4798 if (USE_HIDDEN_LINKONCE)
4799 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4801 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4805 /* This function generates code for -fpic that loads %ebx with
4806 the return address of the caller and then returns. */
4809 ix86_file_end (void)
4814 for (regno = 0; regno < 8; ++regno)
4818 if (! ((pic_labels_used >> regno) & 1))
4821 get_pc_thunk_name (name, regno);
4823 if (USE_HIDDEN_LINKONCE)
4827 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4829 TREE_PUBLIC (decl) = 1;
4830 TREE_STATIC (decl) = 1;
4831 DECL_ONE_ONLY (decl) = 1;
4833 (*targetm.asm_out.unique_section) (decl, 0);
4834 named_section (decl, NULL, 0);
4836 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4837 fputs ("\t.hidden\t", asm_out_file);
4838 assemble_name (asm_out_file, name);
4839 fputc ('\n', asm_out_file);
4840 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4845 ASM_OUTPUT_LABEL (asm_out_file, name);
4848 xops[0] = gen_rtx_REG (SImode, regno);
4849 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4850 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4851 output_asm_insn ("ret", xops);
4854 if (NEED_INDICATE_EXEC_STACK)
4855 file_end_indicate_exec_stack ();
4858 /* Emit code for the SET_GOT patterns. */
4861 output_set_got (rtx dest)
4866 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4868 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4870 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4873 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4875 output_asm_insn ("call\t%a2", xops);
4878 /* Output the "canonical" label name ("Lxx$pb") here too. This
4879 is what will be referred to by the Mach-O PIC subsystem. */
4880 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4882 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4883 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4886 output_asm_insn ("pop{l}\t%0", xops);
4891 get_pc_thunk_name (name, REGNO (dest));
4892 pic_labels_used |= 1 << REGNO (dest);
4894 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4895 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4896 output_asm_insn ("call\t%X2", xops);
4899 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4900 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4901 else if (!TARGET_MACHO)
4902 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4907 /* Generate an "push" pattern for input ARG. */
4912 return gen_rtx_SET (VOIDmode,
4914 gen_rtx_PRE_DEC (Pmode,
4915 stack_pointer_rtx)),
4919 /* Return >= 0 if there is an unused call-clobbered register available
4920 for the entire function. */
4923 ix86_select_alt_pic_regnum (void)
4925 if (current_function_is_leaf && !current_function_profile)
4928 for (i = 2; i >= 0; --i)
4929 if (!regs_ever_live[i])
4933 return INVALID_REGNUM;
4936 /* Return 1 if we need to save REGNO. */
4938 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4940 if (pic_offset_table_rtx
4941 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4942 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4943 || current_function_profile
4944 || current_function_calls_eh_return
4945 || current_function_uses_const_pool))
4947 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4952 if (current_function_calls_eh_return && maybe_eh_return)
4957 unsigned test = EH_RETURN_DATA_REGNO (i);
4958 if (test == INVALID_REGNUM)
4965 return (regs_ever_live[regno]
4966 && !call_used_regs[regno]
4967 && !fixed_regs[regno]
4968 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4971 /* Return number of registers to be saved on the stack. */
4974 ix86_nsaved_regs (void)
4979 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4980 if (ix86_save_reg (regno, true))
4985 /* Return the offset between two registers, one to be eliminated, and the other
4986 its replacement, at the start of a routine. */
4989 ix86_initial_elimination_offset (int from, int to)
4991 struct ix86_frame frame;
4992 ix86_compute_frame_layout (&frame);
4994 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4995 return frame.hard_frame_pointer_offset;
4996 else if (from == FRAME_POINTER_REGNUM
4997 && to == HARD_FRAME_POINTER_REGNUM)
4998 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5001 if (to != STACK_POINTER_REGNUM)
5003 else if (from == ARG_POINTER_REGNUM)
5004 return frame.stack_pointer_offset;
5005 else if (from != FRAME_POINTER_REGNUM)
5008 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5012 /* Fill structure ix86_frame about frame of currently computed function. */
5015 ix86_compute_frame_layout (struct ix86_frame *frame)
5017 HOST_WIDE_INT total_size;
5018 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5019 HOST_WIDE_INT offset;
5020 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5021 HOST_WIDE_INT size = get_frame_size ();
5023 frame->nregs = ix86_nsaved_regs ();
5026 /* During reload iteration the amount of registers saved can change.
5027 Recompute the value as needed. Do not recompute when amount of registers
5028 didn't change as reload does mutiple calls to the function and does not
5029 expect the decision to change within single iteration. */
5031 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5033 int count = frame->nregs;
5035 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5036 /* The fast prologue uses move instead of push to save registers. This
5037 is significantly longer, but also executes faster as modern hardware
5038 can execute the moves in parallel, but can't do that for push/pop.
5040 Be careful about choosing what prologue to emit: When function takes
5041 many instructions to execute we may use slow version as well as in
5042 case function is known to be outside hot spot (this is known with
5043 feedback only). Weight the size of function by number of registers
5044 to save as it is cheap to use one or two push instructions but very
5045 slow to use many of them. */
5047 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5048 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5049 || (flag_branch_probabilities
5050 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5051 cfun->machine->use_fast_prologue_epilogue = false;
5053 cfun->machine->use_fast_prologue_epilogue
5054 = !expensive_function_p (count);
5056 if (TARGET_PROLOGUE_USING_MOVE
5057 && cfun->machine->use_fast_prologue_epilogue)
5058 frame->save_regs_using_mov = true;
5060 frame->save_regs_using_mov = false;
5063 /* Skip return address and saved base pointer. */
5064 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5066 frame->hard_frame_pointer_offset = offset;
5068 /* Do some sanity checking of stack_alignment_needed and
5069 preferred_alignment, since i386 port is the only using those features
5070 that may break easily. */
5072 if (size && !stack_alignment_needed)
5074 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5076 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5078 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5081 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5082 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5084 /* Register save area */
5085 offset += frame->nregs * UNITS_PER_WORD;
5088 if (ix86_save_varrargs_registers)
5090 offset += X86_64_VARARGS_SIZE;
5091 frame->va_arg_size = X86_64_VARARGS_SIZE;
5094 frame->va_arg_size = 0;
5096 /* Align start of frame for local function. */
5097 frame->padding1 = ((offset + stack_alignment_needed - 1)
5098 & -stack_alignment_needed) - offset;
5100 offset += frame->padding1;
5102 /* Frame pointer points here. */
5103 frame->frame_pointer_offset = offset;
5107 /* Add outgoing arguments area. Can be skipped if we eliminated
5108 all the function calls as dead code.
5109 Skipping is however impossible when function calls alloca. Alloca
5110 expander assumes that last current_function_outgoing_args_size
5111 of stack frame are unused. */
5112 if (ACCUMULATE_OUTGOING_ARGS
5113 && (!current_function_is_leaf || current_function_calls_alloca))
5115 offset += current_function_outgoing_args_size;
5116 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5119 frame->outgoing_arguments_size = 0;
5121 /* Align stack boundary. Only needed if we're calling another function
5123 if (!current_function_is_leaf || current_function_calls_alloca)
5124 frame->padding2 = ((offset + preferred_alignment - 1)
5125 & -preferred_alignment) - offset;
5127 frame->padding2 = 0;
5129 offset += frame->padding2;
5131 /* We've reached end of stack frame. */
5132 frame->stack_pointer_offset = offset;
5134 /* Size prologue needs to allocate. */
5135 frame->to_allocate =
5136 (size + frame->padding1 + frame->padding2
5137 + frame->outgoing_arguments_size + frame->va_arg_size);
5139 if ((!frame->to_allocate && frame->nregs <= 1)
5140 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5141 frame->save_regs_using_mov = false;
5143 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5144 && current_function_is_leaf)
5146 frame->red_zone_size = frame->to_allocate;
5147 if (frame->save_regs_using_mov)
5148 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5149 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5150 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5153 frame->red_zone_size = 0;
5154 frame->to_allocate -= frame->red_zone_size;
5155 frame->stack_pointer_offset -= frame->red_zone_size;
5157 fprintf (stderr, "nregs: %i\n", frame->nregs);
5158 fprintf (stderr, "size: %i\n", size);
5159 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5160 fprintf (stderr, "padding1: %i\n", frame->padding1);
5161 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5162 fprintf (stderr, "padding2: %i\n", frame->padding2);
5163 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5164 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5165 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5166 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5167 frame->hard_frame_pointer_offset);
5168 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5172 /* Emit code to save registers in the prologue. */
5175 ix86_emit_save_regs (void)
5180 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5181 if (ix86_save_reg (regno, true))
5183 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5184 RTX_FRAME_RELATED_P (insn) = 1;
5188 /* Emit code to save registers using MOV insns. First register
5189 is restored from POINTER + OFFSET. */
5191 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5196 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5197 if (ix86_save_reg (regno, true))
5199 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5201 gen_rtx_REG (Pmode, regno));
5202 RTX_FRAME_RELATED_P (insn) = 1;
5203 offset += UNITS_PER_WORD;
5207 /* Expand prologue or epilogue stack adjustment.
5208 The pattern exist to put a dependency on all ebp-based memory accesses.
5209 STYLE should be negative if instructions should be marked as frame related,
5210 zero if %r11 register is live and cannot be freely used and positive
5214 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5219 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5220 else if (x86_64_immediate_operand (offset, DImode))
5221 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5225 /* r11 is used by indirect sibcall return as well, set before the
5226 epilogue and used after the epilogue. ATM indirect sibcall
5227 shouldn't be used together with huge frame sizes in one
5228 function because of the frame_size check in sibcall.c. */
5231 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5232 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5234 RTX_FRAME_RELATED_P (insn) = 1;
5235 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5239 RTX_FRAME_RELATED_P (insn) = 1;
5242 /* Expand the prologue into a bunch of separate insns. */
5245 ix86_expand_prologue (void)
5249 struct ix86_frame frame;
5250 HOST_WIDE_INT allocate;
5252 ix86_compute_frame_layout (&frame);
5254 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5255 slower on all targets. Also sdb doesn't like it. */
5257 if (frame_pointer_needed)
5259 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5260 RTX_FRAME_RELATED_P (insn) = 1;
5262 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5263 RTX_FRAME_RELATED_P (insn) = 1;
5266 allocate = frame.to_allocate;
5268 if (!frame.save_regs_using_mov)
5269 ix86_emit_save_regs ();
5271 allocate += frame.nregs * UNITS_PER_WORD;
5273 /* When using red zone we may start register saving before allocating
5274 the stack frame saving one cycle of the prologue. */
5275 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5276 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5277 : stack_pointer_rtx,
5278 -frame.nregs * UNITS_PER_WORD);
5282 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5283 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5284 GEN_INT (-allocate), -1);
5287 /* Only valid for Win32. */
5288 rtx eax = gen_rtx_REG (SImode, 0);
5289 bool eax_live = ix86_eax_live_at_start_p ();
5296 emit_insn (gen_push (eax));
5300 insn = emit_move_insn (eax, GEN_INT (allocate));
5301 RTX_FRAME_RELATED_P (insn) = 1;
5303 insn = emit_insn (gen_allocate_stack_worker (eax));
5304 RTX_FRAME_RELATED_P (insn) = 1;
5308 rtx t = plus_constant (stack_pointer_rtx, allocate);
5309 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5313 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5315 if (!frame_pointer_needed || !frame.to_allocate)
5316 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5318 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5319 -frame.nregs * UNITS_PER_WORD);
5322 pic_reg_used = false;
5323 if (pic_offset_table_rtx
5324 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5325 || current_function_profile))
5327 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5329 if (alt_pic_reg_used != INVALID_REGNUM)
5330 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5332 pic_reg_used = true;
5337 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5339 /* Even with accurate pre-reload life analysis, we can wind up
5340 deleting all references to the pic register after reload.
5341 Consider if cross-jumping unifies two sides of a branch
5342 controlled by a comparison vs the only read from a global.
5343 In which case, allow the set_got to be deleted, though we're
5344 too late to do anything about the ebx save in the prologue. */
5345 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5348 /* Prevent function calls from be scheduled before the call to mcount.
5349 In the pic_reg_used case, make sure that the got load isn't deleted. */
5350 if (current_function_profile)
5351 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5354 /* Emit code to restore saved registers using MOV insns. First register
5355 is restored from POINTER + OFFSET. */
5357 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5358 int maybe_eh_return)
5361 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5363 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5364 if (ix86_save_reg (regno, maybe_eh_return))
5366 /* Ensure that adjust_address won't be forced to produce pointer
5367 out of range allowed by x86-64 instruction set. */
5368 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5372 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5373 emit_move_insn (r11, GEN_INT (offset));
5374 emit_insn (gen_adddi3 (r11, r11, pointer));
5375 base_address = gen_rtx_MEM (Pmode, r11);
5378 emit_move_insn (gen_rtx_REG (Pmode, regno),
5379 adjust_address (base_address, Pmode, offset));
5380 offset += UNITS_PER_WORD;
5384 /* Restore function stack, frame, and registers. */
5387 ix86_expand_epilogue (int style)
5390 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5391 struct ix86_frame frame;
5392 HOST_WIDE_INT offset;
5394 ix86_compute_frame_layout (&frame);
5396 /* Calculate start of saved registers relative to ebp. Special care
5397 must be taken for the normal return case of a function using
5398 eh_return: the eax and edx registers are marked as saved, but not
5399 restored along this path. */
5400 offset = frame.nregs;
5401 if (current_function_calls_eh_return && style != 2)
5403 offset *= -UNITS_PER_WORD;
5405 /* If we're only restoring one register and sp is not valid then
5406 using a move instruction to restore the register since it's
5407 less work than reloading sp and popping the register.
5409 The default code result in stack adjustment using add/lea instruction,
5410 while this code results in LEAVE instruction (or discrete equivalent),
5411 so it is profitable in some other cases as well. Especially when there
5412 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5413 and there is exactly one register to pop. This heuristic may need some
5414 tuning in future. */
5415 if ((!sp_valid && frame.nregs <= 1)
5416 || (TARGET_EPILOGUE_USING_MOVE
5417 && cfun->machine->use_fast_prologue_epilogue
5418 && (frame.nregs > 1 || frame.to_allocate))
5419 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5420 || (frame_pointer_needed && TARGET_USE_LEAVE
5421 && cfun->machine->use_fast_prologue_epilogue
5422 && frame.nregs == 1)
5423 || current_function_calls_eh_return)
5425 /* Restore registers. We can use ebp or esp to address the memory
5426 locations. If both are available, default to ebp, since offsets
5427 are known to be small. Only exception is esp pointing directly to the
5428 end of block of saved registers, where we may simplify addressing
5431 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5432 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5433 frame.to_allocate, style == 2);
5435 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5436 offset, style == 2);
5438 /* eh_return epilogues need %ecx added to the stack pointer. */
5441 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5443 if (frame_pointer_needed)
5445 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5446 tmp = plus_constant (tmp, UNITS_PER_WORD);
5447 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5449 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5450 emit_move_insn (hard_frame_pointer_rtx, tmp);
5452 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5457 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5458 tmp = plus_constant (tmp, (frame.to_allocate
5459 + frame.nregs * UNITS_PER_WORD));
5460 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5463 else if (!frame_pointer_needed)
5464 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5465 GEN_INT (frame.to_allocate
5466 + frame.nregs * UNITS_PER_WORD),
5468 /* If not an i386, mov & pop is faster than "leave". */
5469 else if (TARGET_USE_LEAVE || optimize_size
5470 || !cfun->machine->use_fast_prologue_epilogue)
5471 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5474 pro_epilogue_adjust_stack (stack_pointer_rtx,
5475 hard_frame_pointer_rtx,
5478 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5480 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5485 /* First step is to deallocate the stack frame so that we can
5486 pop the registers. */
5489 if (!frame_pointer_needed)
5491 pro_epilogue_adjust_stack (stack_pointer_rtx,
5492 hard_frame_pointer_rtx,
5493 GEN_INT (offset), style);
5495 else if (frame.to_allocate)
5496 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5497 GEN_INT (frame.to_allocate), style);
5499 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5500 if (ix86_save_reg (regno, false))
5503 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5505 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5507 if (frame_pointer_needed)
5509 /* Leave results in shorter dependency chains on CPUs that are
5510 able to grok it fast. */
5511 if (TARGET_USE_LEAVE)
5512 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5513 else if (TARGET_64BIT)
5514 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5516 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5520 /* Sibcall epilogues don't want a return instruction. */
5524 if (current_function_pops_args && current_function_args_size)
5526 rtx popc = GEN_INT (current_function_pops_args);
5528 /* i386 can only pop 64K bytes. If asked to pop more, pop
5529 return address, do explicit add, and jump indirectly to the
5532 if (current_function_pops_args >= 65536)
5534 rtx ecx = gen_rtx_REG (SImode, 2);
5536 /* There is no "pascal" calling convention in 64bit ABI. */
5540 emit_insn (gen_popsi1 (ecx));
5541 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5542 emit_jump_insn (gen_return_indirect_internal (ecx));
5545 emit_jump_insn (gen_return_pop_internal (popc));
5548 emit_jump_insn (gen_return_internal ());
5551 /* Reset from the function's potential modifications. */
5554 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5555 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5557 if (pic_offset_table_rtx)
5558 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5561 /* Extract the parts of an RTL expression that is a valid memory address
5562 for an instruction. Return 0 if the structure of the address is
5563 grossly off. Return -1 if the address contains ASHIFT, so it is not
5564 strictly valid, but still used for computing length of lea instruction. */
5567 ix86_decompose_address (rtx addr, struct ix86_address *out)
5569 rtx base = NULL_RTX;
5570 rtx index = NULL_RTX;
5571 rtx disp = NULL_RTX;
5572 HOST_WIDE_INT scale = 1;
5573 rtx scale_rtx = NULL_RTX;
5575 enum ix86_address_seg seg = SEG_DEFAULT;
5577 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5579 else if (GET_CODE (addr) == PLUS)
5589 addends[n++] = XEXP (op, 1);
5592 while (GET_CODE (op) == PLUS);
5597 for (i = n; i >= 0; --i)
5600 switch (GET_CODE (op))
5605 index = XEXP (op, 0);
5606 scale_rtx = XEXP (op, 1);
5610 if (XINT (op, 1) == UNSPEC_TP
5611 && TARGET_TLS_DIRECT_SEG_REFS
5612 && seg == SEG_DEFAULT)
5613 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5642 else if (GET_CODE (addr) == MULT)
5644 index = XEXP (addr, 0); /* index*scale */
5645 scale_rtx = XEXP (addr, 1);
5647 else if (GET_CODE (addr) == ASHIFT)
5651 /* We're called for lea too, which implements ashift on occasion. */
5652 index = XEXP (addr, 0);
5653 tmp = XEXP (addr, 1);
5654 if (GET_CODE (tmp) != CONST_INT)
5656 scale = INTVAL (tmp);
5657 if ((unsigned HOST_WIDE_INT) scale > 3)
5663 disp = addr; /* displacement */
5665 /* Extract the integral value of scale. */
5668 if (GET_CODE (scale_rtx) != CONST_INT)
5670 scale = INTVAL (scale_rtx);
5673 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5674 if (base && index && scale == 1
5675 && (index == arg_pointer_rtx
5676 || index == frame_pointer_rtx
5677 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5684 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5685 if ((base == hard_frame_pointer_rtx
5686 || base == frame_pointer_rtx
5687 || base == arg_pointer_rtx) && !disp)
5690 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5691 Avoid this by transforming to [%esi+0]. */
5692 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5693 && base && !index && !disp
5695 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5698 /* Special case: encode reg+reg instead of reg*2. */
5699 if (!base && index && scale && scale == 2)
5700 base = index, scale = 1;
5702 /* Special case: scaling cannot be encoded without base or displacement. */
5703 if (!base && !disp && index && scale != 1)
5715 /* Return cost of the memory address x.
5716 For i386, it is better to use a complex address than let gcc copy
5717 the address into a reg and make a new pseudo. But not if the address
5718 requires to two regs - that would mean more pseudos with longer
5721 ix86_address_cost (rtx x)
5723 struct ix86_address parts;
5726 if (!ix86_decompose_address (x, &parts))
5729 /* More complex memory references are better. */
5730 if (parts.disp && parts.disp != const0_rtx)
5732 if (parts.seg != SEG_DEFAULT)
5735 /* Attempt to minimize number of registers in the address. */
5737 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5739 && (!REG_P (parts.index)
5740 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5744 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5746 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5747 && parts.base != parts.index)
5750 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5751 since it's predecode logic can't detect the length of instructions
5752 and it degenerates to vector decoded. Increase cost of such
5753 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5754 to split such addresses or even refuse such addresses at all.
5756 Following addressing modes are affected:
5761 The first and last case may be avoidable by explicitly coding the zero in
5762 memory address, but I don't have AMD-K6 machine handy to check this
5766 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5767 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5768 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5774 /* If X is a machine specific address (i.e. a symbol or label being
5775 referenced as a displacement from the GOT implemented using an
5776 UNSPEC), then return the base term. Otherwise return X. */
5779 ix86_find_base_term (rtx x)
5785 if (GET_CODE (x) != CONST)
5788 if (GET_CODE (term) == PLUS
5789 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5790 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5791 term = XEXP (term, 0);
5792 if (GET_CODE (term) != UNSPEC
5793 || XINT (term, 1) != UNSPEC_GOTPCREL)
5796 term = XVECEXP (term, 0, 0);
5798 if (GET_CODE (term) != SYMBOL_REF
5799 && GET_CODE (term) != LABEL_REF)
5805 term = ix86_delegitimize_address (x);
5807 if (GET_CODE (term) != SYMBOL_REF
5808 && GET_CODE (term) != LABEL_REF)
5814 /* Determine if a given RTX is a valid constant. We already know this
5815 satisfies CONSTANT_P. */
5818 legitimate_constant_p (rtx x)
5820 switch (GET_CODE (x))
5825 if (GET_CODE (x) == PLUS)
5827 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5832 /* Only some unspecs are valid as "constants". */
5833 if (GET_CODE (x) == UNSPEC)
5834 switch (XINT (x, 1))
5838 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5840 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5845 /* We must have drilled down to a symbol. */
5846 if (!symbolic_operand (x, Pmode))
5851 /* TLS symbols are never valid. */
5852 if (tls_symbolic_operand (x, Pmode))
5860 /* Otherwise we handle everything else in the move patterns. */
5864 /* Determine if it's legal to put X into the constant pool. This
5865 is not possible for the address of thread-local symbols, which
5866 is checked above. */
5869 ix86_cannot_force_const_mem (rtx x)
5871 return !legitimate_constant_p (x);
5874 /* Determine if a given RTX is a valid constant address. */
5877 constant_address_p (rtx x)
5879 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5882 /* Nonzero if the constant value X is a legitimate general operand
5883 when generating PIC code. It is given that flag_pic is on and
5884 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5887 legitimate_pic_operand_p (rtx x)
5891 switch (GET_CODE (x))
5894 inner = XEXP (x, 0);
5896 /* Only some unspecs are valid as "constants". */
5897 if (GET_CODE (inner) == UNSPEC)
5898 switch (XINT (inner, 1))
5901 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5909 return legitimate_pic_address_disp_p (x);
5916 /* Determine if a given CONST RTX is a valid memory displacement
5920 legitimate_pic_address_disp_p (rtx disp)
5924 /* In 64bit mode we can allow direct addresses of symbols and labels
5925 when they are not dynamic symbols. */
5928 /* TLS references should always be enclosed in UNSPEC. */
5929 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5931 if (GET_CODE (disp) == SYMBOL_REF
5932 && ix86_cmodel == CM_SMALL_PIC
5933 && SYMBOL_REF_LOCAL_P (disp))
5935 if (GET_CODE (disp) == LABEL_REF)
5937 if (GET_CODE (disp) == CONST
5938 && GET_CODE (XEXP (disp, 0)) == PLUS)
5940 rtx op0 = XEXP (XEXP (disp, 0), 0);
5941 rtx op1 = XEXP (XEXP (disp, 0), 1);
5943 /* TLS references should always be enclosed in UNSPEC. */
5944 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5946 if (((GET_CODE (op0) == SYMBOL_REF
5947 && ix86_cmodel == CM_SMALL_PIC
5948 && SYMBOL_REF_LOCAL_P (op0))
5949 || GET_CODE (op0) == LABEL_REF)
5950 && GET_CODE (op1) == CONST_INT
5951 && INTVAL (op1) < 16*1024*1024
5952 && INTVAL (op1) >= -16*1024*1024)
5956 if (GET_CODE (disp) != CONST)
5958 disp = XEXP (disp, 0);
5962 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5963 of GOT tables. We should not need these anyway. */
5964 if (GET_CODE (disp) != UNSPEC
5965 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5968 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5969 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5975 if (GET_CODE (disp) == PLUS)
5977 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5979 disp = XEXP (disp, 0);
5983 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5984 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5986 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5987 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5988 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5990 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5991 if (! strcmp (sym_name, "<pic base>"))
5996 if (GET_CODE (disp) != UNSPEC)
5999 switch (XINT (disp, 1))
6004 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6006 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6007 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6008 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6010 case UNSPEC_GOTTPOFF:
6011 case UNSPEC_GOTNTPOFF:
6012 case UNSPEC_INDNTPOFF:
6015 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6017 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6019 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6025 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6026 memory address for an instruction. The MODE argument is the machine mode
6027 for the MEM expression that wants to use this address.
6029 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6030 convert common non-canonical forms to canonical form so that they will
6034 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6036 struct ix86_address parts;
6037 rtx base, index, disp;
6038 HOST_WIDE_INT scale;
6039 const char *reason = NULL;
6040 rtx reason_rtx = NULL_RTX;
6042 if (TARGET_DEBUG_ADDR)
6045 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6046 GET_MODE_NAME (mode), strict);
6050 if (ix86_decompose_address (addr, &parts) <= 0)
6052 reason = "decomposition failed";
6057 index = parts.index;
6059 scale = parts.scale;
6061 /* Validate base register.
6063 Don't allow SUBREG's here, it can lead to spill failures when the base
6064 is one word out of a two word structure, which is represented internally
6071 if (GET_CODE (base) != REG)
6073 reason = "base is not a register";
6077 if (GET_MODE (base) != Pmode)
6079 reason = "base is not in Pmode";
6083 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6084 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6086 reason = "base is not valid";
6091 /* Validate index register.
6093 Don't allow SUBREG's here, it can lead to spill failures when the index
6094 is one word out of a two word structure, which is represented internally
6101 if (GET_CODE (index) != REG)
6103 reason = "index is not a register";
6107 if (GET_MODE (index) != Pmode)
6109 reason = "index is not in Pmode";
6113 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6114 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6116 reason = "index is not valid";
6121 /* Validate scale factor. */
6124 reason_rtx = GEN_INT (scale);
6127 reason = "scale without index";
6131 if (scale != 2 && scale != 4 && scale != 8)
6133 reason = "scale is not a valid multiplier";
6138 /* Validate displacement. */
6143 if (GET_CODE (disp) == CONST
6144 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6145 switch (XINT (XEXP (disp, 0), 1))
6149 case UNSPEC_GOTPCREL:
6152 goto is_legitimate_pic;
6154 case UNSPEC_GOTTPOFF:
6155 case UNSPEC_GOTNTPOFF:
6156 case UNSPEC_INDNTPOFF:
6162 reason = "invalid address unspec";
6166 else if (flag_pic && (SYMBOLIC_CONST (disp)
6168 && !machopic_operand_p (disp)
6173 if (TARGET_64BIT && (index || base))
6175 /* foo@dtpoff(%rX) is ok. */
6176 if (GET_CODE (disp) != CONST
6177 || GET_CODE (XEXP (disp, 0)) != PLUS
6178 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6179 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6180 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6181 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6183 reason = "non-constant pic memory reference";
6187 else if (! legitimate_pic_address_disp_p (disp))
6189 reason = "displacement is an invalid pic construct";
6193 /* This code used to verify that a symbolic pic displacement
6194 includes the pic_offset_table_rtx register.
6196 While this is good idea, unfortunately these constructs may
6197 be created by "adds using lea" optimization for incorrect
6206 This code is nonsensical, but results in addressing
6207 GOT table with pic_offset_table_rtx base. We can't
6208 just refuse it easily, since it gets matched by
6209 "addsi3" pattern, that later gets split to lea in the
6210 case output register differs from input. While this
6211 can be handled by separate addsi pattern for this case
6212 that never results in lea, this seems to be easier and
6213 correct fix for crash to disable this test. */
6215 else if (GET_CODE (disp) != LABEL_REF
6216 && GET_CODE (disp) != CONST_INT
6217 && (GET_CODE (disp) != CONST
6218 || !legitimate_constant_p (disp))
6219 && (GET_CODE (disp) != SYMBOL_REF
6220 || !legitimate_constant_p (disp)))
6222 reason = "displacement is not constant";
6225 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6227 reason = "displacement is out of range";
6232 /* Everything looks valid. */
6233 if (TARGET_DEBUG_ADDR)
6234 fprintf (stderr, "Success.\n");
6238 if (TARGET_DEBUG_ADDR)
6240 fprintf (stderr, "Error: %s\n", reason);
6241 debug_rtx (reason_rtx);
6246 /* Return an unique alias set for the GOT. */
6248 static HOST_WIDE_INT
6249 ix86_GOT_alias_set (void)
6251 static HOST_WIDE_INT set = -1;
6253 set = new_alias_set ();
6257 /* Return a legitimate reference for ORIG (an address) using the
6258 register REG. If REG is 0, a new pseudo is generated.
6260 There are two types of references that must be handled:
6262 1. Global data references must load the address from the GOT, via
6263 the PIC reg. An insn is emitted to do this load, and the reg is
6266 2. Static data references, constant pool addresses, and code labels
6267 compute the address as an offset from the GOT, whose base is in
6268 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6269 differentiate them from global data objects. The returned
6270 address is the PIC reg + an unspec constant.
6272 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6273 reg also appears in the address. */
6276 legitimize_pic_address (rtx orig, rtx reg)
6284 reg = gen_reg_rtx (Pmode);
6285 /* Use the generic Mach-O PIC machinery. */
6286 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6289 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6291 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6293 /* This symbol may be referenced via a displacement from the PIC
6294 base address (@GOTOFF). */
6296 if (reload_in_progress)
6297 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6298 if (GET_CODE (addr) == CONST)
6299 addr = XEXP (addr, 0);
6300 if (GET_CODE (addr) == PLUS)
6302 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6303 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6306 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6307 new = gen_rtx_CONST (Pmode, new);
6308 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6312 emit_move_insn (reg, new);
6316 else if (GET_CODE (addr) == SYMBOL_REF)
6320 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6321 new = gen_rtx_CONST (Pmode, new);
6322 new = gen_rtx_MEM (Pmode, new);
6323 RTX_UNCHANGING_P (new) = 1;
6324 set_mem_alias_set (new, ix86_GOT_alias_set ());
6327 reg = gen_reg_rtx (Pmode);
6328 /* Use directly gen_movsi, otherwise the address is loaded
6329 into register for CSE. We don't want to CSE this addresses,
6330 instead we CSE addresses from the GOT table, so skip this. */
6331 emit_insn (gen_movsi (reg, new));
6336 /* This symbol must be referenced via a load from the
6337 Global Offset Table (@GOT). */
6339 if (reload_in_progress)
6340 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6341 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6342 new = gen_rtx_CONST (Pmode, new);
6343 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6344 new = gen_rtx_MEM (Pmode, new);
6345 RTX_UNCHANGING_P (new) = 1;
6346 set_mem_alias_set (new, ix86_GOT_alias_set ());
6349 reg = gen_reg_rtx (Pmode);
6350 emit_move_insn (reg, new);
6356 if (GET_CODE (addr) == CONST)
6358 addr = XEXP (addr, 0);
6360 /* We must match stuff we generate before. Assume the only
6361 unspecs that can get here are ours. Not that we could do
6362 anything with them anyway.... */
6363 if (GET_CODE (addr) == UNSPEC
6364 || (GET_CODE (addr) == PLUS
6365 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6367 if (GET_CODE (addr) != PLUS)
6370 if (GET_CODE (addr) == PLUS)
6372 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6374 /* Check first to see if this is a constant offset from a @GOTOFF
6375 symbol reference. */
6376 if (local_symbolic_operand (op0, Pmode)
6377 && GET_CODE (op1) == CONST_INT)
6381 if (reload_in_progress)
6382 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6383 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6385 new = gen_rtx_PLUS (Pmode, new, op1);
6386 new = gen_rtx_CONST (Pmode, new);
6387 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6391 emit_move_insn (reg, new);
6397 if (INTVAL (op1) < -16*1024*1024
6398 || INTVAL (op1) >= 16*1024*1024)
6399 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6404 base = legitimize_pic_address (XEXP (addr, 0), reg);
6405 new = legitimize_pic_address (XEXP (addr, 1),
6406 base == reg ? NULL_RTX : reg);
6408 if (GET_CODE (new) == CONST_INT)
6409 new = plus_constant (base, INTVAL (new));
6412 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6414 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6415 new = XEXP (new, 1);
6417 new = gen_rtx_PLUS (Pmode, base, new);
6425 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6428 get_thread_pointer (int to_reg)
6432 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6436 reg = gen_reg_rtx (Pmode);
6437 insn = gen_rtx_SET (VOIDmode, reg, tp);
6438 insn = emit_insn (insn);
6443 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6444 false if we expect this to be used for a memory address and true if
6445 we expect to load the address into a register. */
6448 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6450 rtx dest, base, off, pic;
6455 case TLS_MODEL_GLOBAL_DYNAMIC:
6456 dest = gen_reg_rtx (Pmode);
6459 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6462 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6463 insns = get_insns ();
6466 emit_libcall_block (insns, dest, rax, x);
6469 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6472 case TLS_MODEL_LOCAL_DYNAMIC:
6473 base = gen_reg_rtx (Pmode);
6476 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6479 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6480 insns = get_insns ();
6483 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6484 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6485 emit_libcall_block (insns, base, rax, note);
6488 emit_insn (gen_tls_local_dynamic_base_32 (base));
6490 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6491 off = gen_rtx_CONST (Pmode, off);
6493 return gen_rtx_PLUS (Pmode, base, off);
6495 case TLS_MODEL_INITIAL_EXEC:
6499 type = UNSPEC_GOTNTPOFF;
6503 if (reload_in_progress)
6504 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6505 pic = pic_offset_table_rtx;
6506 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6508 else if (!TARGET_GNU_TLS)
6510 pic = gen_reg_rtx (Pmode);
6511 emit_insn (gen_set_got (pic));
6512 type = UNSPEC_GOTTPOFF;
6517 type = UNSPEC_INDNTPOFF;
6520 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6521 off = gen_rtx_CONST (Pmode, off);
6523 off = gen_rtx_PLUS (Pmode, pic, off);
6524 off = gen_rtx_MEM (Pmode, off);
6525 RTX_UNCHANGING_P (off) = 1;
6526 set_mem_alias_set (off, ix86_GOT_alias_set ());
6528 if (TARGET_64BIT || TARGET_GNU_TLS)
6530 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6531 off = force_reg (Pmode, off);
6532 return gen_rtx_PLUS (Pmode, base, off);
6536 base = get_thread_pointer (true);
6537 dest = gen_reg_rtx (Pmode);
6538 emit_insn (gen_subsi3 (dest, base, off));
6542 case TLS_MODEL_LOCAL_EXEC:
6543 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6544 (TARGET_64BIT || TARGET_GNU_TLS)
6545 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6546 off = gen_rtx_CONST (Pmode, off);
6548 if (TARGET_64BIT || TARGET_GNU_TLS)
6550 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6551 return gen_rtx_PLUS (Pmode, base, off);
6555 base = get_thread_pointer (true);
6556 dest = gen_reg_rtx (Pmode);
6557 emit_insn (gen_subsi3 (dest, base, off));
6568 /* Try machine-dependent ways of modifying an illegitimate address
6569 to be legitimate. If we find one, return the new, valid address.
6570 This macro is used in only one place: `memory_address' in explow.c.
6572 OLDX is the address as it was before break_out_memory_refs was called.
6573 In some cases it is useful to look at this to decide what needs to be done.
6575 MODE and WIN are passed so that this macro can use
6576 GO_IF_LEGITIMATE_ADDRESS.
6578 It is always safe for this macro to do nothing. It exists to recognize
6579 opportunities to optimize the output.
6581 For the 80386, we handle X+REG by loading X into a register R and
6582 using R+REG. R will go in a general reg and indexing will be used.
6583 However, if REG is a broken-out memory address or multiplication,
6584 nothing needs to be done because REG can certainly go in a general reg.
6586 When -fpic is used, special handling is needed for symbolic references.
6587 See comments by legitimize_pic_address in i386.c for details. */
6590 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6595 if (TARGET_DEBUG_ADDR)
6597 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6598 GET_MODE_NAME (mode));
6602 log = tls_symbolic_operand (x, mode);
6604 return legitimize_tls_address (x, log, false);
6606 if (flag_pic && SYMBOLIC_CONST (x))
6607 return legitimize_pic_address (x, 0);
6609 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6610 if (GET_CODE (x) == ASHIFT
6611 && GET_CODE (XEXP (x, 1)) == CONST_INT
6612 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6615 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6616 GEN_INT (1 << log));
6619 if (GET_CODE (x) == PLUS)
6621 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6623 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6624 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6625 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6628 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6629 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6630 GEN_INT (1 << log));
6633 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6634 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6635 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6638 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6639 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6640 GEN_INT (1 << log));
6643 /* Put multiply first if it isn't already. */
6644 if (GET_CODE (XEXP (x, 1)) == MULT)
6646 rtx tmp = XEXP (x, 0);
6647 XEXP (x, 0) = XEXP (x, 1);
6652 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6653 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6654 created by virtual register instantiation, register elimination, and
6655 similar optimizations. */
6656 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6659 x = gen_rtx_PLUS (Pmode,
6660 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6661 XEXP (XEXP (x, 1), 0)),
6662 XEXP (XEXP (x, 1), 1));
6666 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6667 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6668 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6670 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6671 && CONSTANT_P (XEXP (x, 1)))
6674 rtx other = NULL_RTX;
6676 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6678 constant = XEXP (x, 1);
6679 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6681 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6683 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6684 other = XEXP (x, 1);
6692 x = gen_rtx_PLUS (Pmode,
6693 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6694 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6695 plus_constant (other, INTVAL (constant)));
6699 if (changed && legitimate_address_p (mode, x, FALSE))
6702 if (GET_CODE (XEXP (x, 0)) == MULT)
6705 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6708 if (GET_CODE (XEXP (x, 1)) == MULT)
6711 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6715 && GET_CODE (XEXP (x, 1)) == REG
6716 && GET_CODE (XEXP (x, 0)) == REG)
6719 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6722 x = legitimize_pic_address (x, 0);
6725 if (changed && legitimate_address_p (mode, x, FALSE))
6728 if (GET_CODE (XEXP (x, 0)) == REG)
6730 rtx temp = gen_reg_rtx (Pmode);
6731 rtx val = force_operand (XEXP (x, 1), temp);
6733 emit_move_insn (temp, val);
6739 else if (GET_CODE (XEXP (x, 1)) == REG)
6741 rtx temp = gen_reg_rtx (Pmode);
6742 rtx val = force_operand (XEXP (x, 0), temp);
6744 emit_move_insn (temp, val);
6754 /* Print an integer constant expression in assembler syntax. Addition
6755 and subtraction are the only arithmetic that may appear in these
6756 expressions. FILE is the stdio stream to write to, X is the rtx, and
6757 CODE is the operand print code from the output string. */
6760 output_pic_addr_const (FILE *file, rtx x, int code)
6764 switch (GET_CODE (x))
6774 assemble_name (file, XSTR (x, 0));
6775 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6776 fputs ("@PLT", file);
6783 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6784 assemble_name (asm_out_file, buf);
6788 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6792 /* This used to output parentheses around the expression,
6793 but that does not work on the 386 (either ATT or BSD assembler). */
6794 output_pic_addr_const (file, XEXP (x, 0), code);
6798 if (GET_MODE (x) == VOIDmode)
6800 /* We can use %d if the number is <32 bits and positive. */
6801 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6802 fprintf (file, "0x%lx%08lx",
6803 (unsigned long) CONST_DOUBLE_HIGH (x),
6804 (unsigned long) CONST_DOUBLE_LOW (x));
6806 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6809 /* We can't handle floating point constants;
6810 PRINT_OPERAND must handle them. */
6811 output_operand_lossage ("floating constant misused");
6815 /* Some assemblers need integer constants to appear first. */
6816 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6818 output_pic_addr_const (file, XEXP (x, 0), code);
6820 output_pic_addr_const (file, XEXP (x, 1), code);
6822 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6824 output_pic_addr_const (file, XEXP (x, 1), code);
6826 output_pic_addr_const (file, XEXP (x, 0), code);
6834 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6835 output_pic_addr_const (file, XEXP (x, 0), code);
6837 output_pic_addr_const (file, XEXP (x, 1), code);
6839 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6843 if (XVECLEN (x, 0) != 1)
6845 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6846 switch (XINT (x, 1))
6849 fputs ("@GOT", file);
6852 fputs ("@GOTOFF", file);
6854 case UNSPEC_GOTPCREL:
6855 fputs ("@GOTPCREL(%rip)", file);
6857 case UNSPEC_GOTTPOFF:
6858 /* FIXME: This might be @TPOFF in Sun ld too. */
6859 fputs ("@GOTTPOFF", file);
6862 fputs ("@TPOFF", file);
6866 fputs ("@TPOFF", file);
6868 fputs ("@NTPOFF", file);
6871 fputs ("@DTPOFF", file);
6873 case UNSPEC_GOTNTPOFF:
6875 fputs ("@GOTTPOFF(%rip)", file);
6877 fputs ("@GOTNTPOFF", file);
6879 case UNSPEC_INDNTPOFF:
6880 fputs ("@INDNTPOFF", file);
6883 output_operand_lossage ("invalid UNSPEC as operand");
6889 output_operand_lossage ("invalid expression as operand");
6893 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6894 We need to handle our special PIC relocations. */
6897 i386_dwarf_output_addr_const (FILE *file, rtx x)
6900 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6904 fprintf (file, "%s", ASM_LONG);
6907 output_pic_addr_const (file, x, '\0');
6909 output_addr_const (file, x);
6913 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6914 We need to emit DTP-relative relocations. */
6917 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6919 fputs (ASM_LONG, file);
6920 output_addr_const (file, x);
6921 fputs ("@DTPOFF", file);
6927 fputs (", 0", file);
6934 /* In the name of slightly smaller debug output, and to cater to
6935 general assembler losage, recognize PIC+GOTOFF and turn it back
6936 into a direct symbol reference. */
6939 ix86_delegitimize_address (rtx orig_x)
6943 if (GET_CODE (x) == MEM)
6948 if (GET_CODE (x) != CONST
6949 || GET_CODE (XEXP (x, 0)) != UNSPEC
6950 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6951 || GET_CODE (orig_x) != MEM)
6953 return XVECEXP (XEXP (x, 0), 0, 0);
6956 if (GET_CODE (x) != PLUS
6957 || GET_CODE (XEXP (x, 1)) != CONST)
6960 if (GET_CODE (XEXP (x, 0)) == REG
6961 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6962 /* %ebx + GOT/GOTOFF */
6964 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6966 /* %ebx + %reg * scale + GOT/GOTOFF */
6968 if (GET_CODE (XEXP (y, 0)) == REG
6969 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6971 else if (GET_CODE (XEXP (y, 1)) == REG
6972 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6976 if (GET_CODE (y) != REG
6977 && GET_CODE (y) != MULT
6978 && GET_CODE (y) != ASHIFT)
6984 x = XEXP (XEXP (x, 1), 0);
6985 if (GET_CODE (x) == UNSPEC
6986 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6987 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6990 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6991 return XVECEXP (x, 0, 0);
6994 if (GET_CODE (x) == PLUS
6995 && GET_CODE (XEXP (x, 0)) == UNSPEC
6996 && GET_CODE (XEXP (x, 1)) == CONST_INT
6997 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6998 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6999 && GET_CODE (orig_x) != MEM)))
7001 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7003 return gen_rtx_PLUS (Pmode, y, x);
7011 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7016 if (mode == CCFPmode || mode == CCFPUmode)
7018 enum rtx_code second_code, bypass_code;
7019 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7020 if (bypass_code != NIL || second_code != NIL)
7022 code = ix86_fp_compare_code_to_integer (code);
7026 code = reverse_condition (code);
7037 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7042 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7043 Those same assemblers have the same but opposite losage on cmov. */
7046 suffix = fp ? "nbe" : "a";
7049 if (mode == CCNOmode || mode == CCGOCmode)
7051 else if (mode == CCmode || mode == CCGCmode)
7062 if (mode == CCNOmode || mode == CCGOCmode)
7064 else if (mode == CCmode || mode == CCGCmode)
7073 suffix = fp ? "nb" : "ae";
7076 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7086 suffix = fp ? "u" : "p";
7089 suffix = fp ? "nu" : "np";
7094 fputs (suffix, file);
7097 /* Print the name of register X to FILE based on its machine mode and number.
7098 If CODE is 'w', pretend the mode is HImode.
7099 If CODE is 'b', pretend the mode is QImode.
7100 If CODE is 'k', pretend the mode is SImode.
7101 If CODE is 'q', pretend the mode is DImode.
7102 If CODE is 'h', pretend the reg is the `high' byte register.
7103 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7106 print_reg (rtx x, int code, FILE *file)
7108 if (REGNO (x) == ARG_POINTER_REGNUM
7109 || REGNO (x) == FRAME_POINTER_REGNUM
7110 || REGNO (x) == FLAGS_REG
7111 || REGNO (x) == FPSR_REG)
7114 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7117 if (code == 'w' || MMX_REG_P (x))
7119 else if (code == 'b')
7121 else if (code == 'k')
7123 else if (code == 'q')
7125 else if (code == 'y')
7127 else if (code == 'h')
7130 code = GET_MODE_SIZE (GET_MODE (x));
7132 /* Irritatingly, AMD extended registers use different naming convention
7133 from the normal registers. */
7134 if (REX_INT_REG_P (x))
7141 error ("extended registers have no high halves");
7144 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7147 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7150 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7153 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7156 error ("unsupported operand size for extended register");
7164 if (STACK_TOP_P (x))
7166 fputs ("st(0)", file);
7173 if (! ANY_FP_REG_P (x))
7174 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7179 fputs (hi_reg_name[REGNO (x)], file);
7182 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7184 fputs (qi_reg_name[REGNO (x)], file);
7187 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7189 fputs (qi_high_reg_name[REGNO (x)], file);
7196 /* Locate some local-dynamic symbol still in use by this function
7197 so that we can print its name in some tls_local_dynamic_base
7201 get_some_local_dynamic_name (void)
7205 if (cfun->machine->some_ld_name)
7206 return cfun->machine->some_ld_name;
7208 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7210 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7211 return cfun->machine->some_ld_name;
7217 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7221 if (GET_CODE (x) == SYMBOL_REF
7222 && local_dynamic_symbolic_operand (x, Pmode))
7224 cfun->machine->some_ld_name = XSTR (x, 0);
7232 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7233 C -- print opcode suffix for set/cmov insn.
7234 c -- like C, but print reversed condition
7235 F,f -- likewise, but for floating-point.
7236 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7238 R -- print the prefix for register names.
7239 z -- print the opcode suffix for the size of the current operand.
7240 * -- print a star (in certain assembler syntax)
7241 A -- print an absolute memory reference.
7242 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7243 s -- print a shift double count, followed by the assemblers argument
7245 b -- print the QImode name of the register for the indicated operand.
7246 %b0 would print %al if operands[0] is reg 0.
7247 w -- likewise, print the HImode name of the register.
7248 k -- likewise, print the SImode name of the register.
7249 q -- likewise, print the DImode name of the register.
7250 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7251 y -- print "st(0)" instead of "st" as a register.
7252 D -- print condition for SSE cmp instruction.
7253 P -- if PIC, print an @PLT suffix.
7254 X -- don't print any sort of PIC '@' suffix for a symbol.
7255 & -- print some in-use local-dynamic symbol name.
7259 print_operand (FILE *file, rtx x, int code)
7266 if (ASSEMBLER_DIALECT == ASM_ATT)
7271 assemble_name (file, get_some_local_dynamic_name ());
7275 if (ASSEMBLER_DIALECT == ASM_ATT)
7277 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7279 /* Intel syntax. For absolute addresses, registers should not
7280 be surrounded by braces. */
7281 if (GET_CODE (x) != REG)
7284 PRINT_OPERAND (file, x, 0);
7292 PRINT_OPERAND (file, x, 0);
7297 if (ASSEMBLER_DIALECT == ASM_ATT)
7302 if (ASSEMBLER_DIALECT == ASM_ATT)
7307 if (ASSEMBLER_DIALECT == ASM_ATT)
7312 if (ASSEMBLER_DIALECT == ASM_ATT)
7317 if (ASSEMBLER_DIALECT == ASM_ATT)
7322 if (ASSEMBLER_DIALECT == ASM_ATT)
7327 /* 387 opcodes don't get size suffixes if the operands are
7329 if (STACK_REG_P (x))
7332 /* Likewise if using Intel opcodes. */
7333 if (ASSEMBLER_DIALECT == ASM_INTEL)
7336 /* This is the size of op from size of operand. */
7337 switch (GET_MODE_SIZE (GET_MODE (x)))
7340 #ifdef HAVE_GAS_FILDS_FISTS
7346 if (GET_MODE (x) == SFmode)
7361 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7363 #ifdef GAS_MNEMONICS
7389 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7391 PRINT_OPERAND (file, x, 0);
7397 /* Little bit of braindamage here. The SSE compare instructions
7398 does use completely different names for the comparisons that the
7399 fp conditional moves. */
7400 switch (GET_CODE (x))
7415 fputs ("unord", file);
7419 fputs ("neq", file);
7423 fputs ("nlt", file);
7427 fputs ("nle", file);
7430 fputs ("ord", file);
7438 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7439 if (ASSEMBLER_DIALECT == ASM_ATT)
7441 switch (GET_MODE (x))
7443 case HImode: putc ('w', file); break;
7445 case SFmode: putc ('l', file); break;
7447 case DFmode: putc ('q', file); break;
7455 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7458 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7459 if (ASSEMBLER_DIALECT == ASM_ATT)
7462 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7465 /* Like above, but reverse condition */
7467 /* Check to see if argument to %c is really a constant
7468 and not a condition code which needs to be reversed. */
7469 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7471 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7474 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7477 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7478 if (ASSEMBLER_DIALECT == ASM_ATT)
7481 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7487 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7490 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7493 int pred_val = INTVAL (XEXP (x, 0));
7495 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7496 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7498 int taken = pred_val > REG_BR_PROB_BASE / 2;
7499 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7501 /* Emit hints only in the case default branch prediction
7502 heuristics would fail. */
7503 if (taken != cputaken)
7505 /* We use 3e (DS) prefix for taken branches and
7506 2e (CS) prefix for not taken branches. */
7508 fputs ("ds ; ", file);
7510 fputs ("cs ; ", file);
7517 output_operand_lossage ("invalid operand code `%c'", code);
7521 if (GET_CODE (x) == REG)
7522 print_reg (x, code, file);
7524 else if (GET_CODE (x) == MEM)
7526 /* No `byte ptr' prefix for call instructions. */
7527 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7530 switch (GET_MODE_SIZE (GET_MODE (x)))
7532 case 1: size = "BYTE"; break;
7533 case 2: size = "WORD"; break;
7534 case 4: size = "DWORD"; break;
7535 case 8: size = "QWORD"; break;
7536 case 12: size = "XWORD"; break;
7537 case 16: size = "XMMWORD"; break;
7542 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7545 else if (code == 'w')
7547 else if (code == 'k')
7551 fputs (" PTR ", file);
7555 /* Avoid (%rip) for call operands. */
7556 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7557 && GET_CODE (x) != CONST_INT)
7558 output_addr_const (file, x);
7559 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7560 output_operand_lossage ("invalid constraints for operand");
7565 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7570 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7571 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7573 if (ASSEMBLER_DIALECT == ASM_ATT)
7575 fprintf (file, "0x%08lx", l);
7578 /* These float cases don't actually occur as immediate operands. */
7579 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7583 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7584 fprintf (file, "%s", dstr);
7587 else if (GET_CODE (x) == CONST_DOUBLE
7588 && GET_MODE (x) == XFmode)
7592 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7593 fprintf (file, "%s", dstr);
7600 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7602 if (ASSEMBLER_DIALECT == ASM_ATT)
7605 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7606 || GET_CODE (x) == LABEL_REF)
7608 if (ASSEMBLER_DIALECT == ASM_ATT)
7611 fputs ("OFFSET FLAT:", file);
7614 if (GET_CODE (x) == CONST_INT)
7615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7617 output_pic_addr_const (file, x, code);
7619 output_addr_const (file, x);
7623 /* Print a memory operand whose address is ADDR. */
7626 print_operand_address (FILE *file, rtx addr)
7628 struct ix86_address parts;
7629 rtx base, index, disp;
7632 if (! ix86_decompose_address (addr, &parts))
7636 index = parts.index;
7638 scale = parts.scale;
7646 if (USER_LABEL_PREFIX[0] == 0)
7648 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7654 if (!base && !index)
7656 /* Displacement only requires special attention. */
7658 if (GET_CODE (disp) == CONST_INT)
7660 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7662 if (USER_LABEL_PREFIX[0] == 0)
7664 fputs ("ds:", file);
7666 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7669 output_pic_addr_const (file, disp, 0);
7671 output_addr_const (file, disp);
7673 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7675 && ((GET_CODE (disp) == SYMBOL_REF
7676 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7677 || GET_CODE (disp) == LABEL_REF
7678 || (GET_CODE (disp) == CONST
7679 && GET_CODE (XEXP (disp, 0)) == PLUS
7680 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7681 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7682 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7683 fputs ("(%rip)", file);
7687 if (ASSEMBLER_DIALECT == ASM_ATT)
7692 output_pic_addr_const (file, disp, 0);
7693 else if (GET_CODE (disp) == LABEL_REF)
7694 output_asm_label (disp);
7696 output_addr_const (file, disp);
7701 print_reg (base, 0, file);
7705 print_reg (index, 0, file);
7707 fprintf (file, ",%d", scale);
7713 rtx offset = NULL_RTX;
7717 /* Pull out the offset of a symbol; print any symbol itself. */
7718 if (GET_CODE (disp) == CONST
7719 && GET_CODE (XEXP (disp, 0)) == PLUS
7720 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7722 offset = XEXP (XEXP (disp, 0), 1);
7723 disp = gen_rtx_CONST (VOIDmode,
7724 XEXP (XEXP (disp, 0), 0));
7728 output_pic_addr_const (file, disp, 0);
7729 else if (GET_CODE (disp) == LABEL_REF)
7730 output_asm_label (disp);
7731 else if (GET_CODE (disp) == CONST_INT)
7734 output_addr_const (file, disp);
7740 print_reg (base, 0, file);
7743 if (INTVAL (offset) >= 0)
7745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7749 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7756 print_reg (index, 0, file);
7758 fprintf (file, "*%d", scale);
7766 output_addr_const_extra (FILE *file, rtx x)
7770 if (GET_CODE (x) != UNSPEC)
7773 op = XVECEXP (x, 0, 0);
7774 switch (XINT (x, 1))
7776 case UNSPEC_GOTTPOFF:
7777 output_addr_const (file, op);
7778 /* FIXME: This might be @TPOFF in Sun ld. */
7779 fputs ("@GOTTPOFF", file);
7782 output_addr_const (file, op);
7783 fputs ("@TPOFF", file);
7786 output_addr_const (file, op);
7788 fputs ("@TPOFF", file);
7790 fputs ("@NTPOFF", file);
7793 output_addr_const (file, op);
7794 fputs ("@DTPOFF", file);
7796 case UNSPEC_GOTNTPOFF:
7797 output_addr_const (file, op);
7799 fputs ("@GOTTPOFF(%rip)", file);
7801 fputs ("@GOTNTPOFF", file);
7803 case UNSPEC_INDNTPOFF:
7804 output_addr_const (file, op);
7805 fputs ("@INDNTPOFF", file);
7815 /* Split one or more DImode RTL references into pairs of SImode
7816 references. The RTL can be REG, offsettable MEM, integer constant, or
7817 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7818 split and "num" is its length. lo_half and hi_half are output arrays
7819 that parallel "operands". */
7822 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7826 rtx op = operands[num];
7828 /* simplify_subreg refuse to split volatile memory addresses,
7829 but we still have to handle it. */
7830 if (GET_CODE (op) == MEM)
7832 lo_half[num] = adjust_address (op, SImode, 0);
7833 hi_half[num] = adjust_address (op, SImode, 4);
7837 lo_half[num] = simplify_gen_subreg (SImode, op,
7838 GET_MODE (op) == VOIDmode
7839 ? DImode : GET_MODE (op), 0);
7840 hi_half[num] = simplify_gen_subreg (SImode, op,
7841 GET_MODE (op) == VOIDmode
7842 ? DImode : GET_MODE (op), 4);
7846 /* Split one or more TImode RTL references into pairs of SImode
7847 references. The RTL can be REG, offsettable MEM, integer constant, or
7848 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7849 split and "num" is its length. lo_half and hi_half are output arrays
7850 that parallel "operands". */
7853 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7857 rtx op = operands[num];
7859 /* simplify_subreg refuse to split volatile memory addresses, but we
7860 still have to handle it. */
7861 if (GET_CODE (op) == MEM)
7863 lo_half[num] = adjust_address (op, DImode, 0);
7864 hi_half[num] = adjust_address (op, DImode, 8);
7868 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7869 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7874 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7875 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7876 is the expression of the binary operation. The output may either be
7877 emitted here, or returned to the caller, like all output_* functions.
7879 There is no guarantee that the operands are the same mode, as they
7880 might be within FLOAT or FLOAT_EXTEND expressions. */
7882 #ifndef SYSV386_COMPAT
7883 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7884 wants to fix the assemblers because that causes incompatibility
7885 with gcc. No-one wants to fix gcc because that causes
7886 incompatibility with assemblers... You can use the option of
7887 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7888 #define SYSV386_COMPAT 1
7892 output_387_binary_op (rtx insn, rtx *operands)
7894 static char buf[30];
7897 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7899 #ifdef ENABLE_CHECKING
7900 /* Even if we do not want to check the inputs, this documents input
7901 constraints. Which helps in understanding the following code. */
7902 if (STACK_REG_P (operands[0])
7903 && ((REG_P (operands[1])
7904 && REGNO (operands[0]) == REGNO (operands[1])
7905 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7906 || (REG_P (operands[2])
7907 && REGNO (operands[0]) == REGNO (operands[2])
7908 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7909 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7915 switch (GET_CODE (operands[3]))
7918 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7919 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7927 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7928 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7936 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7937 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7945 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7946 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7960 if (GET_MODE (operands[0]) == SFmode)
7961 strcat (buf, "ss\t{%2, %0|%0, %2}");
7963 strcat (buf, "sd\t{%2, %0|%0, %2}");
7968 switch (GET_CODE (operands[3]))
7972 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7974 rtx temp = operands[2];
7975 operands[2] = operands[1];
7979 /* know operands[0] == operands[1]. */
7981 if (GET_CODE (operands[2]) == MEM)
7987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7989 if (STACK_TOP_P (operands[0]))
7990 /* How is it that we are storing to a dead operand[2]?
7991 Well, presumably operands[1] is dead too. We can't
7992 store the result to st(0) as st(0) gets popped on this
7993 instruction. Instead store to operands[2] (which I
7994 think has to be st(1)). st(1) will be popped later.
7995 gcc <= 2.8.1 didn't have this check and generated
7996 assembly code that the Unixware assembler rejected. */
7997 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7999 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8003 if (STACK_TOP_P (operands[0]))
8004 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8006 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8011 if (GET_CODE (operands[1]) == MEM)
8017 if (GET_CODE (operands[2]) == MEM)
8023 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8026 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8027 derived assemblers, confusingly reverse the direction of
8028 the operation for fsub{r} and fdiv{r} when the
8029 destination register is not st(0). The Intel assembler
8030 doesn't have this brain damage. Read !SYSV386_COMPAT to
8031 figure out what the hardware really does. */
8032 if (STACK_TOP_P (operands[0]))
8033 p = "{p\t%0, %2|rp\t%2, %0}";
8035 p = "{rp\t%2, %0|p\t%0, %2}";
8037 if (STACK_TOP_P (operands[0]))
8038 /* As above for fmul/fadd, we can't store to st(0). */
8039 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8041 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8046 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8049 if (STACK_TOP_P (operands[0]))
8050 p = "{rp\t%0, %1|p\t%1, %0}";
8052 p = "{p\t%1, %0|rp\t%0, %1}";
8054 if (STACK_TOP_P (operands[0]))
8055 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8057 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8062 if (STACK_TOP_P (operands[0]))
8064 if (STACK_TOP_P (operands[1]))
8065 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8067 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8070 else if (STACK_TOP_P (operands[1]))
8073 p = "{\t%1, %0|r\t%0, %1}";
8075 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8081 p = "{r\t%2, %0|\t%0, %2}";
8083 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8096 /* Output code to initialize control word copies used by
8097 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8098 is set to control word rounding downwards. */
8100 emit_i387_cw_initialization (rtx normal, rtx round_down)
8102 rtx reg = gen_reg_rtx (HImode);
8104 emit_insn (gen_x86_fnstcw_1 (normal));
8105 emit_move_insn (reg, normal);
8106 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8108 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8110 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8111 emit_move_insn (round_down, reg);
8114 /* Output code for INSN to convert a float to a signed int. OPERANDS
8115 are the insn operands. The output may be [HSD]Imode and the input
8116 operand may be [SDX]Fmode. */
8119 output_fix_trunc (rtx insn, rtx *operands)
8121 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8122 int dimode_p = GET_MODE (operands[0]) == DImode;
8124 /* Jump through a hoop or two for DImode, since the hardware has no
8125 non-popping instruction. We used to do this a different way, but
8126 that was somewhat fragile and broke with post-reload splitters. */
8127 if (dimode_p && !stack_top_dies)
8128 output_asm_insn ("fld\t%y1", operands);
8130 if (!STACK_TOP_P (operands[1]))
8133 if (GET_CODE (operands[0]) != MEM)
8136 output_asm_insn ("fldcw\t%3", operands);
8137 if (stack_top_dies || dimode_p)
8138 output_asm_insn ("fistp%z0\t%0", operands);
8140 output_asm_insn ("fist%z0\t%0", operands);
8141 output_asm_insn ("fldcw\t%2", operands);
8146 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8147 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8148 when fucom should be used. */
8151 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8154 rtx cmp_op0 = operands[0];
8155 rtx cmp_op1 = operands[1];
8156 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8161 cmp_op1 = operands[2];
8165 if (GET_MODE (operands[0]) == SFmode)
8167 return "ucomiss\t{%1, %0|%0, %1}";
8169 return "comiss\t{%1, %0|%0, %1}";
8172 return "ucomisd\t{%1, %0|%0, %1}";
8174 return "comisd\t{%1, %0|%0, %1}";
8177 if (! STACK_TOP_P (cmp_op0))
8180 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8182 if (STACK_REG_P (cmp_op1)
8184 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8185 && REGNO (cmp_op1) != FIRST_STACK_REG)
8187 /* If both the top of the 387 stack dies, and the other operand
8188 is also a stack register that dies, then this must be a
8189 `fcompp' float compare */
8193 /* There is no double popping fcomi variant. Fortunately,
8194 eflags is immune from the fstp's cc clobbering. */
8196 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8198 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8206 return "fucompp\n\tfnstsw\t%0";
8208 return "fcompp\n\tfnstsw\t%0";
8221 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8223 static const char * const alt[24] =
8235 "fcomi\t{%y1, %0|%0, %y1}",
8236 "fcomip\t{%y1, %0|%0, %y1}",
8237 "fucomi\t{%y1, %0|%0, %y1}",
8238 "fucomip\t{%y1, %0|%0, %y1}",
8245 "fcom%z2\t%y2\n\tfnstsw\t%0",
8246 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8247 "fucom%z2\t%y2\n\tfnstsw\t%0",
8248 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8250 "ficom%z2\t%y2\n\tfnstsw\t%0",
8251 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8259 mask = eflags_p << 3;
8260 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8261 mask |= unordered_p << 1;
8262 mask |= stack_top_dies;
8275 ix86_output_addr_vec_elt (FILE *file, int value)
8277 const char *directive = ASM_LONG;
8282 directive = ASM_QUAD;
8288 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8292 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8295 fprintf (file, "%s%s%d-%s%d\n",
8296 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8297 else if (HAVE_AS_GOTOFF_IN_DATA)
8298 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8300 else if (TARGET_MACHO)
8302 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8303 machopic_output_function_base_name (file);
8304 fprintf(file, "\n");
8308 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8309 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8312 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8316 ix86_expand_clear (rtx dest)
8320 /* We play register width games, which are only valid after reload. */
8321 if (!reload_completed)
8324 /* Avoid HImode and its attendant prefix byte. */
8325 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8326 dest = gen_rtx_REG (SImode, REGNO (dest));
8328 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8330 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8331 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8333 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8334 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8340 /* X is an unchanging MEM. If it is a constant pool reference, return
8341 the constant pool rtx, else NULL. */
8344 maybe_get_pool_constant (rtx x)
8346 x = ix86_delegitimize_address (XEXP (x, 0));
8348 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8349 return get_pool_constant (x);
8355 ix86_expand_move (enum machine_mode mode, rtx operands[])
8357 int strict = (reload_in_progress || reload_completed);
8359 enum tls_model model;
8364 model = tls_symbolic_operand (op1, Pmode);
8367 op1 = legitimize_tls_address (op1, model, true);
8368 op1 = force_operand (op1, op0);
8373 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8378 rtx temp = ((reload_in_progress
8379 || ((op0 && GET_CODE (op0) == REG)
8381 ? op0 : gen_reg_rtx (Pmode));
8382 op1 = machopic_indirect_data_reference (op1, temp);
8383 op1 = machopic_legitimize_pic_address (op1, mode,
8384 temp == op1 ? 0 : temp);
8386 else if (MACHOPIC_INDIRECT)
8387 op1 = machopic_indirect_data_reference (op1, 0);
8391 if (GET_CODE (op0) == MEM)
8392 op1 = force_reg (Pmode, op1);
8396 if (GET_CODE (temp) != REG)
8397 temp = gen_reg_rtx (Pmode);
8398 temp = legitimize_pic_address (op1, temp);
8403 #endif /* TARGET_MACHO */
8407 if (GET_CODE (op0) == MEM
8408 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8409 || !push_operand (op0, mode))
8410 && GET_CODE (op1) == MEM)
8411 op1 = force_reg (mode, op1);
8413 if (push_operand (op0, mode)
8414 && ! general_no_elim_operand (op1, mode))
8415 op1 = copy_to_mode_reg (mode, op1);
8417 /* Force large constants in 64bit compilation into register
8418 to get them CSEed. */
8419 if (TARGET_64BIT && mode == DImode
8420 && immediate_operand (op1, mode)
8421 && !x86_64_zero_extended_value (op1)
8422 && !register_operand (op0, mode)
8423 && optimize && !reload_completed && !reload_in_progress)
8424 op1 = copy_to_mode_reg (mode, op1);
8426 if (FLOAT_MODE_P (mode))
8428 /* If we are loading a floating point constant to a register,
8429 force the value to memory now, since we'll get better code
8430 out the back end. */
8434 else if (GET_CODE (op1) == CONST_DOUBLE)
8436 op1 = validize_mem (force_const_mem (mode, op1));
8437 if (!register_operand (op0, mode))
8439 rtx temp = gen_reg_rtx (mode);
8440 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8441 emit_move_insn (op0, temp);
8448 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8452 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8454 /* Force constants other than zero into memory. We do not know how
8455 the instructions used to build constants modify the upper 64 bits
8456 of the register, once we have that information we may be able
8457 to handle some of them more efficiently. */
8458 if ((reload_in_progress | reload_completed) == 0
8459 && register_operand (operands[0], mode)
8460 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8461 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8463 /* Make operand1 a register if it isn't already. */
8465 && !register_operand (operands[0], mode)
8466 && !register_operand (operands[1], mode))
8468 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8469 emit_move_insn (operands[0], temp);
8473 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8476 /* Attempt to expand a binary operator. Make the expansion closer to the
8477 actual machine, then just general_operand, which will allow 3 separate
8478 memory references (one output, two input) in a single insn. */
8481 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8484 int matching_memory;
8485 rtx src1, src2, dst, op, clob;
8491 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8492 if (GET_RTX_CLASS (code) == 'c'
8493 && (rtx_equal_p (dst, src2)
8494 || immediate_operand (src1, mode)))
8501 /* If the destination is memory, and we do not have matching source
8502 operands, do things in registers. */
8503 matching_memory = 0;
8504 if (GET_CODE (dst) == MEM)
8506 if (rtx_equal_p (dst, src1))
8507 matching_memory = 1;
8508 else if (GET_RTX_CLASS (code) == 'c'
8509 && rtx_equal_p (dst, src2))
8510 matching_memory = 2;
8512 dst = gen_reg_rtx (mode);
8515 /* Both source operands cannot be in memory. */
8516 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8518 if (matching_memory != 2)
8519 src2 = force_reg (mode, src2);
8521 src1 = force_reg (mode, src1);
8524 /* If the operation is not commutable, source 1 cannot be a constant
8525 or non-matching memory. */
8526 if ((CONSTANT_P (src1)
8527 || (!matching_memory && GET_CODE (src1) == MEM))
8528 && GET_RTX_CLASS (code) != 'c')
8529 src1 = force_reg (mode, src1);
8531 /* If optimizing, copy to regs to improve CSE */
8532 if (optimize && ! no_new_pseudos)
8534 if (GET_CODE (dst) == MEM)
8535 dst = gen_reg_rtx (mode);
8536 if (GET_CODE (src1) == MEM)
8537 src1 = force_reg (mode, src1);
8538 if (GET_CODE (src2) == MEM)
8539 src2 = force_reg (mode, src2);
8542 /* Emit the instruction. */
8544 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8545 if (reload_in_progress)
8547 /* Reload doesn't know about the flags register, and doesn't know that
8548 it doesn't want to clobber it. We can only do this with PLUS. */
8555 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8556 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8559 /* Fix up the destination if needed. */
8560 if (dst != operands[0])
8561 emit_move_insn (operands[0], dst);
8564 /* Return TRUE or FALSE depending on whether the binary operator meets the
8565 appropriate constraints. */
8568 ix86_binary_operator_ok (enum rtx_code code,
8569 enum machine_mode mode ATTRIBUTE_UNUSED,
8572 /* Both source operands cannot be in memory. */
8573 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8575 /* If the operation is not commutable, source 1 cannot be a constant. */
8576 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8578 /* If the destination is memory, we must have a matching source operand. */
8579 if (GET_CODE (operands[0]) == MEM
8580 && ! (rtx_equal_p (operands[0], operands[1])
8581 || (GET_RTX_CLASS (code) == 'c'
8582 && rtx_equal_p (operands[0], operands[2]))))
8584 /* If the operation is not commutable and the source 1 is memory, we must
8585 have a matching destination. */
8586 if (GET_CODE (operands[1]) == MEM
8587 && GET_RTX_CLASS (code) != 'c'
8588 && ! rtx_equal_p (operands[0], operands[1]))
8593 /* Attempt to expand a unary operator. Make the expansion closer to the
8594 actual machine, then just general_operand, which will allow 2 separate
8595 memory references (one output, one input) in a single insn. */
8598 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8601 int matching_memory;
8602 rtx src, dst, op, clob;
8607 /* If the destination is memory, and we do not have matching source
8608 operands, do things in registers. */
8609 matching_memory = 0;
8610 if (GET_CODE (dst) == MEM)
8612 if (rtx_equal_p (dst, src))
8613 matching_memory = 1;
8615 dst = gen_reg_rtx (mode);
8618 /* When source operand is memory, destination must match. */
8619 if (!matching_memory && GET_CODE (src) == MEM)
8620 src = force_reg (mode, src);
8622 /* If optimizing, copy to regs to improve CSE */
8623 if (optimize && ! no_new_pseudos)
8625 if (GET_CODE (dst) == MEM)
8626 dst = gen_reg_rtx (mode);
8627 if (GET_CODE (src) == MEM)
8628 src = force_reg (mode, src);
8631 /* Emit the instruction. */
8633 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8634 if (reload_in_progress || code == NOT)
8636 /* Reload doesn't know about the flags register, and doesn't know that
8637 it doesn't want to clobber it. */
8644 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8645 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8648 /* Fix up the destination if needed. */
8649 if (dst != operands[0])
8650 emit_move_insn (operands[0], dst);
8653 /* Return TRUE or FALSE depending on whether the unary operator meets the
8654 appropriate constraints. */
8657 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8658 enum machine_mode mode ATTRIBUTE_UNUSED,
8659 rtx operands[2] ATTRIBUTE_UNUSED)
8661 /* If one of operands is memory, source and destination must match. */
8662 if ((GET_CODE (operands[0]) == MEM
8663 || GET_CODE (operands[1]) == MEM)
8664 && ! rtx_equal_p (operands[0], operands[1]))
8669 /* Return TRUE or FALSE depending on whether the first SET in INSN
8670 has source and destination with matching CC modes, and that the
8671 CC mode is at least as constrained as REQ_MODE. */
8674 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8677 enum machine_mode set_mode;
8679 set = PATTERN (insn);
8680 if (GET_CODE (set) == PARALLEL)
8681 set = XVECEXP (set, 0, 0);
8682 if (GET_CODE (set) != SET)
8684 if (GET_CODE (SET_SRC (set)) != COMPARE)
8687 set_mode = GET_MODE (SET_DEST (set));
8691 if (req_mode != CCNOmode
8692 && (req_mode != CCmode
8693 || XEXP (SET_SRC (set), 1) != const0_rtx))
8697 if (req_mode == CCGCmode)
8701 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8705 if (req_mode == CCZmode)
8715 return (GET_MODE (SET_SRC (set)) == set_mode);
8718 /* Generate insn patterns to do an integer compare of OPERANDS. */
8721 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8723 enum machine_mode cmpmode;
8726 cmpmode = SELECT_CC_MODE (code, op0, op1);
8727 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8729 /* This is very simple, but making the interface the same as in the
8730 FP case makes the rest of the code easier. */
8731 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8732 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8734 /* Return the test that should be put into the flags user, i.e.
8735 the bcc, scc, or cmov instruction. */
8736 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8739 /* Figure out whether to use ordered or unordered fp comparisons.
8740 Return the appropriate mode to use. */
8743 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8745 /* ??? In order to make all comparisons reversible, we do all comparisons
8746 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8747 all forms trapping and nontrapping comparisons, we can make inequality
8748 comparisons trapping again, since it results in better code when using
8749 FCOM based compares. */
8750 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8754 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8756 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8757 return ix86_fp_compare_mode (code);
8760 /* Only zero flag is needed. */
8762 case NE: /* ZF!=0 */
8764 /* Codes needing carry flag. */
8765 case GEU: /* CF=0 */
8766 case GTU: /* CF=0 & ZF=0 */
8767 case LTU: /* CF=1 */
8768 case LEU: /* CF=1 | ZF=1 */
8770 /* Codes possibly doable only with sign flag when
8771 comparing against zero. */
8772 case GE: /* SF=OF or SF=0 */
8773 case LT: /* SF<>OF or SF=1 */
8774 if (op1 == const0_rtx)
8777 /* For other cases Carry flag is not required. */
8779 /* Codes doable only with sign flag when comparing
8780 against zero, but we miss jump instruction for it
8781 so we need to use relational tests against overflow
8782 that thus needs to be zero. */
8783 case GT: /* ZF=0 & SF=OF */
8784 case LE: /* ZF=1 | SF<>OF */
8785 if (op1 == const0_rtx)
8789 /* strcmp pattern do (use flags) and combine may ask us for proper
8798 /* Return the fixed registers used for condition codes. */
8801 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8808 /* If two condition code modes are compatible, return a condition code
8809 mode which is compatible with both. Otherwise, return
8812 static enum machine_mode
8813 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8818 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8821 if ((m1 == CCGCmode && m2 == CCGOCmode)
8822 || (m1 == CCGOCmode && m2 == CCGCmode))
8850 /* These are only compatible with themselves, which we already
8856 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8859 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8861 enum rtx_code swapped_code = swap_condition (code);
8862 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8863 || (ix86_fp_comparison_cost (swapped_code)
8864 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8867 /* Swap, force into registers, or otherwise massage the two operands
8868 to a fp comparison. The operands are updated in place; the new
8869 comparison code is returned. */
8871 static enum rtx_code
8872 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8874 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8875 rtx op0 = *pop0, op1 = *pop1;
8876 enum machine_mode op_mode = GET_MODE (op0);
8877 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8879 /* All of the unordered compare instructions only work on registers.
8880 The same is true of the XFmode compare instructions. The same is
8881 true of the fcomi compare instructions. */
8884 && (fpcmp_mode == CCFPUmode
8885 || op_mode == XFmode
8886 || ix86_use_fcomi_compare (code)))
8888 op0 = force_reg (op_mode, op0);
8889 op1 = force_reg (op_mode, op1);
8893 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8894 things around if they appear profitable, otherwise force op0
8897 if (standard_80387_constant_p (op0) == 0
8898 || (GET_CODE (op0) == MEM
8899 && ! (standard_80387_constant_p (op1) == 0
8900 || GET_CODE (op1) == MEM)))
8903 tmp = op0, op0 = op1, op1 = tmp;
8904 code = swap_condition (code);
8907 if (GET_CODE (op0) != REG)
8908 op0 = force_reg (op_mode, op0);
8910 if (CONSTANT_P (op1))
8912 if (standard_80387_constant_p (op1))
8913 op1 = force_reg (op_mode, op1);
8915 op1 = validize_mem (force_const_mem (op_mode, op1));
8919 /* Try to rearrange the comparison to make it cheaper. */
8920 if (ix86_fp_comparison_cost (code)
8921 > ix86_fp_comparison_cost (swap_condition (code))
8922 && (GET_CODE (op1) == REG || !no_new_pseudos))
8925 tmp = op0, op0 = op1, op1 = tmp;
8926 code = swap_condition (code);
8927 if (GET_CODE (op0) != REG)
8928 op0 = force_reg (op_mode, op0);
8936 /* Convert comparison codes we use to represent FP comparison to integer
8937 code that will result in proper branch. Return UNKNOWN if no such code
8939 static enum rtx_code
8940 ix86_fp_compare_code_to_integer (enum rtx_code code)
8969 /* Split comparison code CODE into comparisons we can do using branch
8970 instructions. BYPASS_CODE is comparison code for branch that will
8971 branch around FIRST_CODE and SECOND_CODE. If some of branches
8972 is not required, set value to NIL.
8973 We never require more than two branches. */
8975 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8976 enum rtx_code *first_code,
8977 enum rtx_code *second_code)
8983 /* The fcomi comparison sets flags as follows:
8993 case GT: /* GTU - CF=0 & ZF=0 */
8994 case GE: /* GEU - CF=0 */
8995 case ORDERED: /* PF=0 */
8996 case UNORDERED: /* PF=1 */
8997 case UNEQ: /* EQ - ZF=1 */
8998 case UNLT: /* LTU - CF=1 */
8999 case UNLE: /* LEU - CF=1 | ZF=1 */
9000 case LTGT: /* EQ - ZF=0 */
9002 case LT: /* LTU - CF=1 - fails on unordered */
9004 *bypass_code = UNORDERED;
9006 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9008 *bypass_code = UNORDERED;
9010 case EQ: /* EQ - ZF=1 - fails on unordered */
9012 *bypass_code = UNORDERED;
9014 case NE: /* NE - ZF=0 - fails on unordered */
9016 *second_code = UNORDERED;
9018 case UNGE: /* GEU - CF=0 - fails on unordered */
9020 *second_code = UNORDERED;
9022 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9024 *second_code = UNORDERED;
9029 if (!TARGET_IEEE_FP)
9036 /* Return cost of comparison done fcom + arithmetics operations on AX.
9037 All following functions do use number of instructions as a cost metrics.
9038 In future this should be tweaked to compute bytes for optimize_size and
9039 take into account performance of various instructions on various CPUs. */
9041 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9043 if (!TARGET_IEEE_FP)
9045 /* The cost of code output by ix86_expand_fp_compare. */
9073 /* Return cost of comparison done using fcomi operation.
9074 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9076 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9078 enum rtx_code bypass_code, first_code, second_code;
9079 /* Return arbitrarily high cost when instruction is not supported - this
9080 prevents gcc from using it. */
9083 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9084 return (bypass_code != NIL || second_code != NIL) + 2;
9087 /* Return cost of comparison done using sahf operation.
9088 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9090 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9092 enum rtx_code bypass_code, first_code, second_code;
9093 /* Return arbitrarily high cost when instruction is not preferred - this
9094 avoids gcc from using it. */
9095 if (!TARGET_USE_SAHF && !optimize_size)
9097 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9098 return (bypass_code != NIL || second_code != NIL) + 3;
9101 /* Compute cost of the comparison done using any method.
9102 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9104 ix86_fp_comparison_cost (enum rtx_code code)
9106 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9109 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9110 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9112 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9113 if (min > sahf_cost)
9115 if (min > fcomi_cost)
9120 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9123 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9124 rtx *second_test, rtx *bypass_test)
9126 enum machine_mode fpcmp_mode, intcmp_mode;
9128 int cost = ix86_fp_comparison_cost (code);
9129 enum rtx_code bypass_code, first_code, second_code;
9131 fpcmp_mode = ix86_fp_compare_mode (code);
9132 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9135 *second_test = NULL_RTX;
9137 *bypass_test = NULL_RTX;
9139 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9141 /* Do fcomi/sahf based test when profitable. */
9142 if ((bypass_code == NIL || bypass_test)
9143 && (second_code == NIL || second_test)
9144 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9148 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9149 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9155 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9156 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9158 scratch = gen_reg_rtx (HImode);
9159 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9160 emit_insn (gen_x86_sahf_1 (scratch));
9163 /* The FP codes work out to act like unsigned. */
9164 intcmp_mode = fpcmp_mode;
9166 if (bypass_code != NIL)
9167 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9168 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9170 if (second_code != NIL)
9171 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9172 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9177 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9178 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9179 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9181 scratch = gen_reg_rtx (HImode);
9182 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9184 /* In the unordered case, we have to check C2 for NaN's, which
9185 doesn't happen to work out to anything nice combination-wise.
9186 So do some bit twiddling on the value we've got in AH to come
9187 up with an appropriate set of condition codes. */
9189 intcmp_mode = CCNOmode;
9194 if (code == GT || !TARGET_IEEE_FP)
9196 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9201 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9202 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9203 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9204 intcmp_mode = CCmode;
9210 if (code == LT && TARGET_IEEE_FP)
9212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9213 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9214 intcmp_mode = CCmode;
9219 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9225 if (code == GE || !TARGET_IEEE_FP)
9227 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9240 if (code == LE && TARGET_IEEE_FP)
9242 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9243 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9244 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9245 intcmp_mode = CCmode;
9250 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9256 if (code == EQ && TARGET_IEEE_FP)
9258 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9259 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9260 intcmp_mode = CCmode;
9265 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9272 if (code == NE && TARGET_IEEE_FP)
9274 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9275 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9281 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9287 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9291 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9300 /* Return the test that should be put into the flags user, i.e.
9301 the bcc, scc, or cmov instruction. */
9302 return gen_rtx_fmt_ee (code, VOIDmode,
9303 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9308 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9311 op0 = ix86_compare_op0;
9312 op1 = ix86_compare_op1;
9315 *second_test = NULL_RTX;
9317 *bypass_test = NULL_RTX;
9319 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9320 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9321 second_test, bypass_test);
9323 ret = ix86_expand_int_compare (code, op0, op1);
9328 /* Return true if the CODE will result in nontrivial jump sequence. */
9330 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9332 enum rtx_code bypass_code, first_code, second_code;
9335 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9336 return bypass_code != NIL || second_code != NIL;
9340 ix86_expand_branch (enum rtx_code code, rtx label)
9344 switch (GET_MODE (ix86_compare_op0))
9350 tmp = ix86_expand_compare (code, NULL, NULL);
9351 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9352 gen_rtx_LABEL_REF (VOIDmode, label),
9354 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9363 enum rtx_code bypass_code, first_code, second_code;
9365 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9368 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9370 /* Check whether we will use the natural sequence with one jump. If
9371 so, we can expand jump early. Otherwise delay expansion by
9372 creating compound insn to not confuse optimizers. */
9373 if (bypass_code == NIL && second_code == NIL
9376 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9377 gen_rtx_LABEL_REF (VOIDmode, label),
9382 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9383 ix86_compare_op0, ix86_compare_op1);
9384 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9385 gen_rtx_LABEL_REF (VOIDmode, label),
9387 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9389 use_fcomi = ix86_use_fcomi_compare (code);
9390 vec = rtvec_alloc (3 + !use_fcomi);
9391 RTVEC_ELT (vec, 0) = tmp;
9393 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9395 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9398 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9400 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9408 /* Expand DImode branch into multiple compare+branch. */
9410 rtx lo[2], hi[2], label2;
9411 enum rtx_code code1, code2, code3;
9413 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9415 tmp = ix86_compare_op0;
9416 ix86_compare_op0 = ix86_compare_op1;
9417 ix86_compare_op1 = tmp;
9418 code = swap_condition (code);
9420 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9421 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9423 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9424 avoid two branches. This costs one extra insn, so disable when
9425 optimizing for size. */
9427 if ((code == EQ || code == NE)
9429 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9434 if (hi[1] != const0_rtx)
9435 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9436 NULL_RTX, 0, OPTAB_WIDEN);
9439 if (lo[1] != const0_rtx)
9440 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9441 NULL_RTX, 0, OPTAB_WIDEN);
9443 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9444 NULL_RTX, 0, OPTAB_WIDEN);
9446 ix86_compare_op0 = tmp;
9447 ix86_compare_op1 = const0_rtx;
9448 ix86_expand_branch (code, label);
9452 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9453 op1 is a constant and the low word is zero, then we can just
9454 examine the high word. */
9456 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9459 case LT: case LTU: case GE: case GEU:
9460 ix86_compare_op0 = hi[0];
9461 ix86_compare_op1 = hi[1];
9462 ix86_expand_branch (code, label);
9468 /* Otherwise, we need two or three jumps. */
9470 label2 = gen_label_rtx ();
9473 code2 = swap_condition (code);
9474 code3 = unsigned_condition (code);
9478 case LT: case GT: case LTU: case GTU:
9481 case LE: code1 = LT; code2 = GT; break;
9482 case GE: code1 = GT; code2 = LT; break;
9483 case LEU: code1 = LTU; code2 = GTU; break;
9484 case GEU: code1 = GTU; code2 = LTU; break;
9486 case EQ: code1 = NIL; code2 = NE; break;
9487 case NE: code2 = NIL; break;
9495 * if (hi(a) < hi(b)) goto true;
9496 * if (hi(a) > hi(b)) goto false;
9497 * if (lo(a) < lo(b)) goto true;
9501 ix86_compare_op0 = hi[0];
9502 ix86_compare_op1 = hi[1];
9505 ix86_expand_branch (code1, label);
9507 ix86_expand_branch (code2, label2);
9509 ix86_compare_op0 = lo[0];
9510 ix86_compare_op1 = lo[1];
9511 ix86_expand_branch (code3, label);
9514 emit_label (label2);
9523 /* Split branch based on floating point condition. */
9525 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9526 rtx target1, rtx target2, rtx tmp)
9529 rtx label = NULL_RTX;
9531 int bypass_probability = -1, second_probability = -1, probability = -1;
9534 if (target2 != pc_rtx)
9537 code = reverse_condition_maybe_unordered (code);
9542 condition = ix86_expand_fp_compare (code, op1, op2,
9543 tmp, &second, &bypass);
9545 if (split_branch_probability >= 0)
9547 /* Distribute the probabilities across the jumps.
9548 Assume the BYPASS and SECOND to be always test
9550 probability = split_branch_probability;
9552 /* Value of 1 is low enough to make no need for probability
9553 to be updated. Later we may run some experiments and see
9554 if unordered values are more frequent in practice. */
9556 bypass_probability = 1;
9558 second_probability = 1;
9560 if (bypass != NULL_RTX)
9562 label = gen_label_rtx ();
9563 i = emit_jump_insn (gen_rtx_SET
9565 gen_rtx_IF_THEN_ELSE (VOIDmode,
9567 gen_rtx_LABEL_REF (VOIDmode,
9570 if (bypass_probability >= 0)
9572 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9573 GEN_INT (bypass_probability),
9576 i = emit_jump_insn (gen_rtx_SET
9578 gen_rtx_IF_THEN_ELSE (VOIDmode,
9579 condition, target1, target2)));
9580 if (probability >= 0)
9582 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9583 GEN_INT (probability),
9585 if (second != NULL_RTX)
9587 i = emit_jump_insn (gen_rtx_SET
9589 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9591 if (second_probability >= 0)
9593 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9594 GEN_INT (second_probability),
9597 if (label != NULL_RTX)
9602 ix86_expand_setcc (enum rtx_code code, rtx dest)
9604 rtx ret, tmp, tmpreg, equiv;
9605 rtx second_test, bypass_test;
9607 if (GET_MODE (ix86_compare_op0) == DImode
9609 return 0; /* FAIL */
9611 if (GET_MODE (dest) != QImode)
9614 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9615 PUT_MODE (ret, QImode);
9620 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9621 if (bypass_test || second_test)
9623 rtx test = second_test;
9625 rtx tmp2 = gen_reg_rtx (QImode);
9632 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9634 PUT_MODE (test, QImode);
9635 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9638 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9640 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9643 /* Attach a REG_EQUAL note describing the comparison result. */
9644 equiv = simplify_gen_relational (code, QImode,
9645 GET_MODE (ix86_compare_op0),
9646 ix86_compare_op0, ix86_compare_op1);
9647 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9649 return 1; /* DONE */
9652 /* Expand comparison setting or clearing carry flag. Return true when
9653 successful and set pop for the operation. */
9655 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9657 enum machine_mode mode =
9658 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9660 /* Do not handle DImode compares that go trought special path. Also we can't
9661 deal with FP compares yet. This is possible to add. */
9662 if ((mode == DImode && !TARGET_64BIT))
9664 if (FLOAT_MODE_P (mode))
9666 rtx second_test = NULL, bypass_test = NULL;
9667 rtx compare_op, compare_seq;
9669 /* Shortcut: following common codes never translate into carry flag compares. */
9670 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9671 || code == ORDERED || code == UNORDERED)
9674 /* These comparisons require zero flag; swap operands so they won't. */
9675 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9681 code = swap_condition (code);
9684 /* Try to expand the comparison and verify that we end up with carry flag
9685 based comparison. This is fails to be true only when we decide to expand
9686 comparison using arithmetic that is not too common scenario. */
9688 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9689 &second_test, &bypass_test);
9690 compare_seq = get_insns ();
9693 if (second_test || bypass_test)
9695 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9696 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9697 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9699 code = GET_CODE (compare_op);
9700 if (code != LTU && code != GEU)
9702 emit_insn (compare_seq);
9706 if (!INTEGRAL_MODE_P (mode))
9714 /* Convert a==0 into (unsigned)a<1. */
9717 if (op1 != const0_rtx)
9720 code = (code == EQ ? LTU : GEU);
9723 /* Convert a>b into b<a or a>=b-1. */
9726 if (GET_CODE (op1) == CONST_INT)
9728 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9729 /* Bail out on overflow. We still can swap operands but that
9730 would force loading of the constant into register. */
9731 if (op1 == const0_rtx
9732 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9734 code = (code == GTU ? GEU : LTU);
9741 code = (code == GTU ? LTU : GEU);
9745 /* Convert a>=0 into (unsigned)a<0x80000000. */
9748 if (mode == DImode || op1 != const0_rtx)
9750 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9751 code = (code == LT ? GEU : LTU);
9755 if (mode == DImode || op1 != constm1_rtx)
9757 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9758 code = (code == LE ? GEU : LTU);
9764 /* Swapping operands may cause constant to appear as first operand. */
9765 if (!nonimmediate_operand (op0, VOIDmode))
9769 op0 = force_reg (mode, op0);
9771 ix86_compare_op0 = op0;
9772 ix86_compare_op1 = op1;
9773 *pop = ix86_expand_compare (code, NULL, NULL);
9774 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9780 ix86_expand_int_movcc (rtx operands[])
9782 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9783 rtx compare_seq, compare_op;
9784 rtx second_test, bypass_test;
9785 enum machine_mode mode = GET_MODE (operands[0]);
9786 bool sign_bit_compare_p = false;;
9789 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9790 compare_seq = get_insns ();
9793 compare_code = GET_CODE (compare_op);
9795 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9796 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9797 sign_bit_compare_p = true;
9799 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9800 HImode insns, we'd be swallowed in word prefix ops. */
9802 if ((mode != HImode || TARGET_FAST_PREFIX)
9803 && (mode != DImode || TARGET_64BIT)
9804 && GET_CODE (operands[2]) == CONST_INT
9805 && GET_CODE (operands[3]) == CONST_INT)
9807 rtx out = operands[0];
9808 HOST_WIDE_INT ct = INTVAL (operands[2]);
9809 HOST_WIDE_INT cf = INTVAL (operands[3]);
9813 /* Sign bit compares are better done using shifts than we do by using
9815 if (sign_bit_compare_p
9816 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9817 ix86_compare_op1, &compare_op))
9819 /* Detect overlap between destination and compare sources. */
9822 if (!sign_bit_compare_p)
9826 compare_code = GET_CODE (compare_op);
9828 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9829 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9832 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9835 /* To simplify rest of code, restrict to the GEU case. */
9836 if (compare_code == LTU)
9838 HOST_WIDE_INT tmp = ct;
9841 compare_code = reverse_condition (compare_code);
9842 code = reverse_condition (code);
9847 PUT_CODE (compare_op,
9848 reverse_condition_maybe_unordered
9849 (GET_CODE (compare_op)));
9851 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9855 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9856 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9857 tmp = gen_reg_rtx (mode);
9860 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9862 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9866 if (code == GT || code == GE)
9867 code = reverse_condition (code);
9870 HOST_WIDE_INT tmp = ct;
9875 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9876 ix86_compare_op1, VOIDmode, 0, -1);
9889 tmp = expand_simple_binop (mode, PLUS,
9891 copy_rtx (tmp), 1, OPTAB_DIRECT);
9902 tmp = expand_simple_binop (mode, IOR,
9904 copy_rtx (tmp), 1, OPTAB_DIRECT);
9906 else if (diff == -1 && ct)
9916 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9918 tmp = expand_simple_binop (mode, PLUS,
9919 copy_rtx (tmp), GEN_INT (cf),
9920 copy_rtx (tmp), 1, OPTAB_DIRECT);
9928 * andl cf - ct, dest
9938 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9941 tmp = expand_simple_binop (mode, AND,
9943 gen_int_mode (cf - ct, mode),
9944 copy_rtx (tmp), 1, OPTAB_DIRECT);
9946 tmp = expand_simple_binop (mode, PLUS,
9947 copy_rtx (tmp), GEN_INT (ct),
9948 copy_rtx (tmp), 1, OPTAB_DIRECT);
9951 if (!rtx_equal_p (tmp, out))
9952 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9954 return 1; /* DONE */
9960 tmp = ct, ct = cf, cf = tmp;
9962 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9964 /* We may be reversing unordered compare to normal compare, that
9965 is not valid in general (we may convert non-trapping condition
9966 to trapping one), however on i386 we currently emit all
9967 comparisons unordered. */
9968 compare_code = reverse_condition_maybe_unordered (compare_code);
9969 code = reverse_condition_maybe_unordered (code);
9973 compare_code = reverse_condition (compare_code);
9974 code = reverse_condition (code);
9979 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9980 && GET_CODE (ix86_compare_op1) == CONST_INT)
9982 if (ix86_compare_op1 == const0_rtx
9983 && (code == LT || code == GE))
9984 compare_code = code;
9985 else if (ix86_compare_op1 == constm1_rtx)
9989 else if (code == GT)
9994 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9995 if (compare_code != NIL
9996 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9997 && (cf == -1 || ct == -1))
9999 /* If lea code below could be used, only optimize
10000 if it results in a 2 insn sequence. */
10002 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10003 || diff == 3 || diff == 5 || diff == 9)
10004 || (compare_code == LT && ct == -1)
10005 || (compare_code == GE && cf == -1))
10008 * notl op1 (if necessary)
10016 code = reverse_condition (code);
10019 out = emit_store_flag (out, code, ix86_compare_op0,
10020 ix86_compare_op1, VOIDmode, 0, -1);
10022 out = expand_simple_binop (mode, IOR,
10024 out, 1, OPTAB_DIRECT);
10025 if (out != operands[0])
10026 emit_move_insn (operands[0], out);
10028 return 1; /* DONE */
10033 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10034 || diff == 3 || diff == 5 || diff == 9)
10035 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10036 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10042 * lea cf(dest*(ct-cf)),dest
10046 * This also catches the degenerate setcc-only case.
10052 out = emit_store_flag (out, code, ix86_compare_op0,
10053 ix86_compare_op1, VOIDmode, 0, 1);
10056 /* On x86_64 the lea instruction operates on Pmode, so we need
10057 to get arithmetics done in proper mode to match. */
10059 tmp = copy_rtx (out);
10063 out1 = copy_rtx (out);
10064 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10068 tmp = gen_rtx_PLUS (mode, tmp, out1);
10074 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10077 if (!rtx_equal_p (tmp, out))
10080 out = force_operand (tmp, copy_rtx (out));
10082 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10084 if (!rtx_equal_p (out, operands[0]))
10085 emit_move_insn (operands[0], copy_rtx (out));
10087 return 1; /* DONE */
10091 * General case: Jumpful:
10092 * xorl dest,dest cmpl op1, op2
10093 * cmpl op1, op2 movl ct, dest
10094 * setcc dest jcc 1f
10095 * decl dest movl cf, dest
10096 * andl (cf-ct),dest 1:
10099 * Size 20. Size 14.
10101 * This is reasonably steep, but branch mispredict costs are
10102 * high on modern cpus, so consider failing only if optimizing
10106 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10107 && BRANCH_COST >= 2)
10113 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10114 /* We may be reversing unordered compare to normal compare,
10115 that is not valid in general (we may convert non-trapping
10116 condition to trapping one), however on i386 we currently
10117 emit all comparisons unordered. */
10118 code = reverse_condition_maybe_unordered (code);
10121 code = reverse_condition (code);
10122 if (compare_code != NIL)
10123 compare_code = reverse_condition (compare_code);
10127 if (compare_code != NIL)
10129 /* notl op1 (if needed)
10134 For x < 0 (resp. x <= -1) there will be no notl,
10135 so if possible swap the constants to get rid of the
10137 True/false will be -1/0 while code below (store flag
10138 followed by decrement) is 0/-1, so the constants need
10139 to be exchanged once more. */
10141 if (compare_code == GE || !cf)
10143 code = reverse_condition (code);
10148 HOST_WIDE_INT tmp = cf;
10153 out = emit_store_flag (out, code, ix86_compare_op0,
10154 ix86_compare_op1, VOIDmode, 0, -1);
10158 out = emit_store_flag (out, code, ix86_compare_op0,
10159 ix86_compare_op1, VOIDmode, 0, 1);
10161 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10162 copy_rtx (out), 1, OPTAB_DIRECT);
10165 out = expand_simple_binop (mode, AND, copy_rtx (out),
10166 gen_int_mode (cf - ct, mode),
10167 copy_rtx (out), 1, OPTAB_DIRECT);
10169 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10170 copy_rtx (out), 1, OPTAB_DIRECT);
10171 if (!rtx_equal_p (out, operands[0]))
10172 emit_move_insn (operands[0], copy_rtx (out));
10174 return 1; /* DONE */
10178 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10180 /* Try a few things more with specific constants and a variable. */
10183 rtx var, orig_out, out, tmp;
10185 if (BRANCH_COST <= 2)
10186 return 0; /* FAIL */
10188 /* If one of the two operands is an interesting constant, load a
10189 constant with the above and mask it in with a logical operation. */
10191 if (GET_CODE (operands[2]) == CONST_INT)
10194 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10195 operands[3] = constm1_rtx, op = and_optab;
10196 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10197 operands[3] = const0_rtx, op = ior_optab;
10199 return 0; /* FAIL */
10201 else if (GET_CODE (operands[3]) == CONST_INT)
10204 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10205 operands[2] = constm1_rtx, op = and_optab;
10206 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10207 operands[2] = const0_rtx, op = ior_optab;
10209 return 0; /* FAIL */
10212 return 0; /* FAIL */
10214 orig_out = operands[0];
10215 tmp = gen_reg_rtx (mode);
10218 /* Recurse to get the constant loaded. */
10219 if (ix86_expand_int_movcc (operands) == 0)
10220 return 0; /* FAIL */
10222 /* Mask in the interesting variable. */
10223 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10225 if (!rtx_equal_p (out, orig_out))
10226 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10228 return 1; /* DONE */
10232 * For comparison with above,
10242 if (! nonimmediate_operand (operands[2], mode))
10243 operands[2] = force_reg (mode, operands[2]);
10244 if (! nonimmediate_operand (operands[3], mode))
10245 operands[3] = force_reg (mode, operands[3]);
10247 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10249 rtx tmp = gen_reg_rtx (mode);
10250 emit_move_insn (tmp, operands[3]);
10253 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10255 rtx tmp = gen_reg_rtx (mode);
10256 emit_move_insn (tmp, operands[2]);
10260 if (! register_operand (operands[2], VOIDmode)
10262 || ! register_operand (operands[3], VOIDmode)))
10263 operands[2] = force_reg (mode, operands[2]);
10266 && ! register_operand (operands[3], VOIDmode))
10267 operands[3] = force_reg (mode, operands[3]);
10269 emit_insn (compare_seq);
10270 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10271 gen_rtx_IF_THEN_ELSE (mode,
10272 compare_op, operands[2],
10275 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10276 gen_rtx_IF_THEN_ELSE (mode,
10278 copy_rtx (operands[3]),
10279 copy_rtx (operands[0]))));
10281 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10282 gen_rtx_IF_THEN_ELSE (mode,
10284 copy_rtx (operands[2]),
10285 copy_rtx (operands[0]))));
10287 return 1; /* DONE */
10291 ix86_expand_fp_movcc (rtx operands[])
10293 enum rtx_code code;
10295 rtx compare_op, second_test, bypass_test;
10297 /* For SF/DFmode conditional moves based on comparisons
10298 in same mode, we may want to use SSE min/max instructions. */
10299 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10300 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10301 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10302 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10303 && (!TARGET_IEEE_FP
10304 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10305 /* We may be called from the post-reload splitter. */
10306 && (!REG_P (operands[0])
10307 || SSE_REG_P (operands[0])
10308 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10310 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10311 code = GET_CODE (operands[1]);
10313 /* See if we have (cross) match between comparison operands and
10314 conditional move operands. */
10315 if (rtx_equal_p (operands[2], op1))
10320 code = reverse_condition_maybe_unordered (code);
10322 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10324 /* Check for min operation. */
10325 if (code == LT || code == UNLE)
10333 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10334 if (memory_operand (op0, VOIDmode))
10335 op0 = force_reg (GET_MODE (operands[0]), op0);
10336 if (GET_MODE (operands[0]) == SFmode)
10337 emit_insn (gen_minsf3 (operands[0], op0, op1));
10339 emit_insn (gen_mindf3 (operands[0], op0, op1));
10342 /* Check for max operation. */
10343 if (code == GT || code == UNGE)
10351 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10352 if (memory_operand (op0, VOIDmode))
10353 op0 = force_reg (GET_MODE (operands[0]), op0);
10354 if (GET_MODE (operands[0]) == SFmode)
10355 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10357 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10361 /* Manage condition to be sse_comparison_operator. In case we are
10362 in non-ieee mode, try to canonicalize the destination operand
10363 to be first in the comparison - this helps reload to avoid extra
10365 if (!sse_comparison_operator (operands[1], VOIDmode)
10366 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10368 rtx tmp = ix86_compare_op0;
10369 ix86_compare_op0 = ix86_compare_op1;
10370 ix86_compare_op1 = tmp;
10371 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10372 VOIDmode, ix86_compare_op0,
10375 /* Similarly try to manage result to be first operand of conditional
10376 move. We also don't support the NE comparison on SSE, so try to
10378 if ((rtx_equal_p (operands[0], operands[3])
10379 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10380 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10382 rtx tmp = operands[2];
10383 operands[2] = operands[3];
10385 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10386 (GET_CODE (operands[1])),
10387 VOIDmode, ix86_compare_op0,
10390 if (GET_MODE (operands[0]) == SFmode)
10391 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10392 operands[2], operands[3],
10393 ix86_compare_op0, ix86_compare_op1));
10395 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10396 operands[2], operands[3],
10397 ix86_compare_op0, ix86_compare_op1));
10401 /* The floating point conditional move instructions don't directly
10402 support conditions resulting from a signed integer comparison. */
10404 code = GET_CODE (operands[1]);
10405 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10407 /* The floating point conditional move instructions don't directly
10408 support signed integer comparisons. */
10410 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10412 if (second_test != NULL || bypass_test != NULL)
10414 tmp = gen_reg_rtx (QImode);
10415 ix86_expand_setcc (code, tmp);
10417 ix86_compare_op0 = tmp;
10418 ix86_compare_op1 = const0_rtx;
10419 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10421 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10423 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10424 emit_move_insn (tmp, operands[3]);
10427 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10429 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10430 emit_move_insn (tmp, operands[2]);
10434 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10435 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10440 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10441 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10446 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10447 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10455 /* Expand conditional increment or decrement using adb/sbb instructions.
10456 The default case using setcc followed by the conditional move can be
10457 done by generic code. */
10459 ix86_expand_int_addcc (rtx operands[])
10461 enum rtx_code code = GET_CODE (operands[1]);
10463 rtx val = const0_rtx;
10464 bool fpcmp = false;
10465 enum machine_mode mode = GET_MODE (operands[0]);
10467 if (operands[3] != const1_rtx
10468 && operands[3] != constm1_rtx)
10470 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10471 ix86_compare_op1, &compare_op))
10473 code = GET_CODE (compare_op);
10475 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10476 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10479 code = ix86_fp_compare_code_to_integer (code);
10486 PUT_CODE (compare_op,
10487 reverse_condition_maybe_unordered
10488 (GET_CODE (compare_op)));
10490 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10492 PUT_MODE (compare_op, mode);
10494 /* Construct either adc or sbb insn. */
10495 if ((code == LTU) == (operands[3] == constm1_rtx))
10497 switch (GET_MODE (operands[0]))
10500 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10503 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10506 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10509 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10517 switch (GET_MODE (operands[0]))
10520 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10523 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10526 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10529 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10535 return 1; /* DONE */
10539 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10540 works for floating pointer parameters and nonoffsetable memories.
10541 For pushes, it returns just stack offsets; the values will be saved
10542 in the right order. Maximally three parts are generated. */
10545 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10550 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10552 size = (GET_MODE_SIZE (mode) + 4) / 8;
10554 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10556 if (size < 2 || size > 3)
10559 /* Optimize constant pool reference to immediates. This is used by fp
10560 moves, that force all constants to memory to allow combining. */
10561 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10563 rtx tmp = maybe_get_pool_constant (operand);
10568 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10570 /* The only non-offsetable memories we handle are pushes. */
10571 if (! push_operand (operand, VOIDmode))
10574 operand = copy_rtx (operand);
10575 PUT_MODE (operand, Pmode);
10576 parts[0] = parts[1] = parts[2] = operand;
10578 else if (!TARGET_64BIT)
10580 if (mode == DImode)
10581 split_di (&operand, 1, &parts[0], &parts[1]);
10584 if (REG_P (operand))
10586 if (!reload_completed)
10588 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10589 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10591 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10593 else if (offsettable_memref_p (operand))
10595 operand = adjust_address (operand, SImode, 0);
10596 parts[0] = operand;
10597 parts[1] = adjust_address (operand, SImode, 4);
10599 parts[2] = adjust_address (operand, SImode, 8);
10601 else if (GET_CODE (operand) == CONST_DOUBLE)
10606 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10610 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10611 parts[2] = gen_int_mode (l[2], SImode);
10614 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10619 parts[1] = gen_int_mode (l[1], SImode);
10620 parts[0] = gen_int_mode (l[0], SImode);
10628 if (mode == TImode)
10629 split_ti (&operand, 1, &parts[0], &parts[1]);
10630 if (mode == XFmode || mode == TFmode)
10632 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10633 if (REG_P (operand))
10635 if (!reload_completed)
10637 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10638 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10640 else if (offsettable_memref_p (operand))
10642 operand = adjust_address (operand, DImode, 0);
10643 parts[0] = operand;
10644 parts[1] = adjust_address (operand, upper_mode, 8);
10646 else if (GET_CODE (operand) == CONST_DOUBLE)
10651 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10652 real_to_target (l, &r, mode);
10654 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10655 if (HOST_BITS_PER_WIDE_INT >= 64)
10658 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10659 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10662 parts[0] = immed_double_const (l[0], l[1], DImode);
10664 if (upper_mode == SImode)
10665 parts[1] = gen_int_mode (l[2], SImode);
10666 else if (HOST_BITS_PER_WIDE_INT >= 64)
10669 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10670 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10673 parts[1] = immed_double_const (l[2], l[3], DImode);
10683 /* Emit insns to perform a move or push of DI, DF, and XF values.
10684 Return false when normal moves are needed; true when all required
10685 insns have been emitted. Operands 2-4 contain the input values
10686 int the correct order; operands 5-7 contain the output values. */
10689 ix86_split_long_move (rtx operands[])
10694 int collisions = 0;
10695 enum machine_mode mode = GET_MODE (operands[0]);
10697 /* The DFmode expanders may ask us to move double.
10698 For 64bit target this is single move. By hiding the fact
10699 here we simplify i386.md splitters. */
10700 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10702 /* Optimize constant pool reference to immediates. This is used by
10703 fp moves, that force all constants to memory to allow combining. */
10705 if (GET_CODE (operands[1]) == MEM
10706 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10707 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10708 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10709 if (push_operand (operands[0], VOIDmode))
10711 operands[0] = copy_rtx (operands[0]);
10712 PUT_MODE (operands[0], Pmode);
10715 operands[0] = gen_lowpart (DImode, operands[0]);
10716 operands[1] = gen_lowpart (DImode, operands[1]);
10717 emit_move_insn (operands[0], operands[1]);
10721 /* The only non-offsettable memory we handle is push. */
10722 if (push_operand (operands[0], VOIDmode))
10724 else if (GET_CODE (operands[0]) == MEM
10725 && ! offsettable_memref_p (operands[0]))
10728 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10729 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10731 /* When emitting push, take care for source operands on the stack. */
10732 if (push && GET_CODE (operands[1]) == MEM
10733 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10736 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10737 XEXP (part[1][2], 0));
10738 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10739 XEXP (part[1][1], 0));
10742 /* We need to do copy in the right order in case an address register
10743 of the source overlaps the destination. */
10744 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10746 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10748 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10751 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10754 /* Collision in the middle part can be handled by reordering. */
10755 if (collisions == 1 && nparts == 3
10756 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10759 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10760 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10763 /* If there are more collisions, we can't handle it by reordering.
10764 Do an lea to the last part and use only one colliding move. */
10765 else if (collisions > 1)
10771 base = part[0][nparts - 1];
10773 /* Handle the case when the last part isn't valid for lea.
10774 Happens in 64-bit mode storing the 12-byte XFmode. */
10775 if (GET_MODE (base) != Pmode)
10776 base = gen_rtx_REG (Pmode, REGNO (base));
10778 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10779 part[1][0] = replace_equiv_address (part[1][0], base);
10780 part[1][1] = replace_equiv_address (part[1][1],
10781 plus_constant (base, UNITS_PER_WORD));
10783 part[1][2] = replace_equiv_address (part[1][2],
10784 plus_constant (base, 8));
10794 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10795 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10796 emit_move_insn (part[0][2], part[1][2]);
10801 /* In 64bit mode we don't have 32bit push available. In case this is
10802 register, it is OK - we will just use larger counterpart. We also
10803 retype memory - these comes from attempt to avoid REX prefix on
10804 moving of second half of TFmode value. */
10805 if (GET_MODE (part[1][1]) == SImode)
10807 if (GET_CODE (part[1][1]) == MEM)
10808 part[1][1] = adjust_address (part[1][1], DImode, 0);
10809 else if (REG_P (part[1][1]))
10810 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10813 if (GET_MODE (part[1][0]) == SImode)
10814 part[1][0] = part[1][1];
10817 emit_move_insn (part[0][1], part[1][1]);
10818 emit_move_insn (part[0][0], part[1][0]);
10822 /* Choose correct order to not overwrite the source before it is copied. */
10823 if ((REG_P (part[0][0])
10824 && REG_P (part[1][1])
10825 && (REGNO (part[0][0]) == REGNO (part[1][1])
10827 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10829 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10833 operands[2] = part[0][2];
10834 operands[3] = part[0][1];
10835 operands[4] = part[0][0];
10836 operands[5] = part[1][2];
10837 operands[6] = part[1][1];
10838 operands[7] = part[1][0];
10842 operands[2] = part[0][1];
10843 operands[3] = part[0][0];
10844 operands[5] = part[1][1];
10845 operands[6] = part[1][0];
10852 operands[2] = part[0][0];
10853 operands[3] = part[0][1];
10854 operands[4] = part[0][2];
10855 operands[5] = part[1][0];
10856 operands[6] = part[1][1];
10857 operands[7] = part[1][2];
10861 operands[2] = part[0][0];
10862 operands[3] = part[0][1];
10863 operands[5] = part[1][0];
10864 operands[6] = part[1][1];
10867 emit_move_insn (operands[2], operands[5]);
10868 emit_move_insn (operands[3], operands[6]);
10870 emit_move_insn (operands[4], operands[7]);
10876 ix86_split_ashldi (rtx *operands, rtx scratch)
10878 rtx low[2], high[2];
10881 if (GET_CODE (operands[2]) == CONST_INT)
10883 split_di (operands, 2, low, high);
10884 count = INTVAL (operands[2]) & 63;
10888 emit_move_insn (high[0], low[1]);
10889 emit_move_insn (low[0], const0_rtx);
10892 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10896 if (!rtx_equal_p (operands[0], operands[1]))
10897 emit_move_insn (operands[0], operands[1]);
10898 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10899 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10904 if (!rtx_equal_p (operands[0], operands[1]))
10905 emit_move_insn (operands[0], operands[1]);
10907 split_di (operands, 1, low, high);
10909 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10910 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10912 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10914 if (! no_new_pseudos)
10915 scratch = force_reg (SImode, const0_rtx);
10917 emit_move_insn (scratch, const0_rtx);
10919 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10923 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10928 ix86_split_ashrdi (rtx *operands, rtx scratch)
10930 rtx low[2], high[2];
10933 if (GET_CODE (operands[2]) == CONST_INT)
10935 split_di (operands, 2, low, high);
10936 count = INTVAL (operands[2]) & 63;
10940 emit_move_insn (low[0], high[1]);
10942 if (! reload_completed)
10943 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10946 emit_move_insn (high[0], low[0]);
10947 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10951 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10955 if (!rtx_equal_p (operands[0], operands[1]))
10956 emit_move_insn (operands[0], operands[1]);
10957 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10958 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10963 if (!rtx_equal_p (operands[0], operands[1]))
10964 emit_move_insn (operands[0], operands[1]);
10966 split_di (operands, 1, low, high);
10968 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10969 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10971 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10973 if (! no_new_pseudos)
10974 scratch = gen_reg_rtx (SImode);
10975 emit_move_insn (scratch, high[0]);
10976 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10977 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10981 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10986 ix86_split_lshrdi (rtx *operands, rtx scratch)
10988 rtx low[2], high[2];
10991 if (GET_CODE (operands[2]) == CONST_INT)
10993 split_di (operands, 2, low, high);
10994 count = INTVAL (operands[2]) & 63;
10998 emit_move_insn (low[0], high[1]);
10999 emit_move_insn (high[0], const0_rtx);
11002 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11006 if (!rtx_equal_p (operands[0], operands[1]))
11007 emit_move_insn (operands[0], operands[1]);
11008 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11009 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11014 if (!rtx_equal_p (operands[0], operands[1]))
11015 emit_move_insn (operands[0], operands[1]);
11017 split_di (operands, 1, low, high);
11019 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11020 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11022 /* Heh. By reversing the arguments, we can reuse this pattern. */
11023 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11025 if (! no_new_pseudos)
11026 scratch = force_reg (SImode, const0_rtx);
11028 emit_move_insn (scratch, const0_rtx);
11030 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11034 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11038 /* Helper function for the string operations below. Dest VARIABLE whether
11039 it is aligned to VALUE bytes. If true, jump to the label. */
11041 ix86_expand_aligntest (rtx variable, int value)
11043 rtx label = gen_label_rtx ();
11044 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11045 if (GET_MODE (variable) == DImode)
11046 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11048 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11049 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11054 /* Adjust COUNTER by the VALUE. */
11056 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11058 if (GET_MODE (countreg) == DImode)
11059 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11061 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11064 /* Zero extend possibly SImode EXP to Pmode register. */
11066 ix86_zero_extend_to_Pmode (rtx exp)
11069 if (GET_MODE (exp) == VOIDmode)
11070 return force_reg (Pmode, exp);
11071 if (GET_MODE (exp) == Pmode)
11072 return copy_to_mode_reg (Pmode, exp);
11073 r = gen_reg_rtx (Pmode);
11074 emit_insn (gen_zero_extendsidi2 (r, exp));
11078 /* Expand string move (memcpy) operation. Use i386 string operations when
11079 profitable. expand_clrstr contains similar code. */
11081 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11083 rtx srcreg, destreg, countreg, srcexp, destexp;
11084 enum machine_mode counter_mode;
11085 HOST_WIDE_INT align = 0;
11086 unsigned HOST_WIDE_INT count = 0;
11088 if (GET_CODE (align_exp) == CONST_INT)
11089 align = INTVAL (align_exp);
11091 /* Can't use any of this if the user has appropriated esi or edi. */
11092 if (global_regs[4] || global_regs[5])
11095 /* This simple hack avoids all inlining code and simplifies code below. */
11096 if (!TARGET_ALIGN_STRINGOPS)
11099 if (GET_CODE (count_exp) == CONST_INT)
11101 count = INTVAL (count_exp);
11102 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11106 /* Figure out proper mode for counter. For 32bits it is always SImode,
11107 for 64bits use SImode when possible, otherwise DImode.
11108 Set count to number of bytes copied when known at compile time. */
11109 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11110 || x86_64_zero_extended_value (count_exp))
11111 counter_mode = SImode;
11113 counter_mode = DImode;
11115 if (counter_mode != SImode && counter_mode != DImode)
11118 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11119 if (destreg != XEXP (dst, 0))
11120 dst = replace_equiv_address_nv (dst, destreg);
11121 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11122 if (srcreg != XEXP (src, 0))
11123 src = replace_equiv_address_nv (src, srcreg);
11125 /* When optimizing for size emit simple rep ; movsb instruction for
11126 counts not divisible by 4. */
11128 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11130 emit_insn (gen_cld ());
11131 countreg = ix86_zero_extend_to_Pmode (count_exp);
11132 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11133 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11134 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11138 /* For constant aligned (or small unaligned) copies use rep movsl
11139 followed by code copying the rest. For PentiumPro ensure 8 byte
11140 alignment to allow rep movsl acceleration. */
11142 else if (count != 0
11144 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11145 || optimize_size || count < (unsigned int) 64))
11147 unsigned HOST_WIDE_INT offset = 0;
11148 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11149 rtx srcmem, dstmem;
11151 emit_insn (gen_cld ());
11152 if (count & ~(size - 1))
11154 countreg = copy_to_mode_reg (counter_mode,
11155 GEN_INT ((count >> (size == 4 ? 2 : 3))
11156 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11157 countreg = ix86_zero_extend_to_Pmode (countreg);
11159 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11160 GEN_INT (size == 4 ? 2 : 3));
11161 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11162 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11164 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11165 countreg, destexp, srcexp));
11166 offset = count & ~(size - 1);
11168 if (size == 8 && (count & 0x04))
11170 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11172 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11174 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11179 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11181 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11183 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11188 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11190 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11192 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11195 /* The generic code based on the glibc implementation:
11196 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11197 allowing accelerated copying there)
11198 - copy the data using rep movsl
11199 - copy the rest. */
11204 rtx srcmem, dstmem;
11205 int desired_alignment = (TARGET_PENTIUMPRO
11206 && (count == 0 || count >= (unsigned int) 260)
11207 ? 8 : UNITS_PER_WORD);
11208 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11209 dst = change_address (dst, BLKmode, destreg);
11210 src = change_address (src, BLKmode, srcreg);
11212 /* In case we don't know anything about the alignment, default to
11213 library version, since it is usually equally fast and result in
11216 Also emit call when we know that the count is large and call overhead
11217 will not be important. */
11218 if (!TARGET_INLINE_ALL_STRINGOPS
11219 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11222 if (TARGET_SINGLE_STRINGOP)
11223 emit_insn (gen_cld ());
11225 countreg2 = gen_reg_rtx (Pmode);
11226 countreg = copy_to_mode_reg (counter_mode, count_exp);
11228 /* We don't use loops to align destination and to copy parts smaller
11229 than 4 bytes, because gcc is able to optimize such code better (in
11230 the case the destination or the count really is aligned, gcc is often
11231 able to predict the branches) and also it is friendlier to the
11232 hardware branch prediction.
11234 Using loops is beneficial for generic case, because we can
11235 handle small counts using the loops. Many CPUs (such as Athlon)
11236 have large REP prefix setup costs.
11238 This is quite costly. Maybe we can revisit this decision later or
11239 add some customizability to this code. */
11241 if (count == 0 && align < desired_alignment)
11243 label = gen_label_rtx ();
11244 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11245 LEU, 0, counter_mode, 1, label);
11249 rtx label = ix86_expand_aligntest (destreg, 1);
11250 srcmem = change_address (src, QImode, srcreg);
11251 dstmem = change_address (dst, QImode, destreg);
11252 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11253 ix86_adjust_counter (countreg, 1);
11254 emit_label (label);
11255 LABEL_NUSES (label) = 1;
11259 rtx label = ix86_expand_aligntest (destreg, 2);
11260 srcmem = change_address (src, HImode, srcreg);
11261 dstmem = change_address (dst, HImode, destreg);
11262 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11263 ix86_adjust_counter (countreg, 2);
11264 emit_label (label);
11265 LABEL_NUSES (label) = 1;
11267 if (align <= 4 && desired_alignment > 4)
11269 rtx label = ix86_expand_aligntest (destreg, 4);
11270 srcmem = change_address (src, SImode, srcreg);
11271 dstmem = change_address (dst, SImode, destreg);
11272 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11273 ix86_adjust_counter (countreg, 4);
11274 emit_label (label);
11275 LABEL_NUSES (label) = 1;
11278 if (label && desired_alignment > 4 && !TARGET_64BIT)
11280 emit_label (label);
11281 LABEL_NUSES (label) = 1;
11284 if (!TARGET_SINGLE_STRINGOP)
11285 emit_insn (gen_cld ());
11288 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11290 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11294 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11295 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11297 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11298 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11299 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11300 countreg2, destexp, srcexp));
11304 emit_label (label);
11305 LABEL_NUSES (label) = 1;
11307 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11309 srcmem = change_address (src, SImode, srcreg);
11310 dstmem = change_address (dst, SImode, destreg);
11311 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11313 if ((align <= 4 || count == 0) && TARGET_64BIT)
11315 rtx label = ix86_expand_aligntest (countreg, 4);
11316 srcmem = change_address (src, SImode, srcreg);
11317 dstmem = change_address (dst, SImode, destreg);
11318 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11319 emit_label (label);
11320 LABEL_NUSES (label) = 1;
11322 if (align > 2 && count != 0 && (count & 2))
11324 srcmem = change_address (src, HImode, srcreg);
11325 dstmem = change_address (dst, HImode, destreg);
11326 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11328 if (align <= 2 || count == 0)
11330 rtx label = ix86_expand_aligntest (countreg, 2);
11331 srcmem = change_address (src, HImode, srcreg);
11332 dstmem = change_address (dst, HImode, destreg);
11333 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11334 emit_label (label);
11335 LABEL_NUSES (label) = 1;
11337 if (align > 1 && count != 0 && (count & 1))
11339 srcmem = change_address (src, QImode, srcreg);
11340 dstmem = change_address (dst, QImode, destreg);
11341 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11343 if (align <= 1 || count == 0)
11345 rtx label = ix86_expand_aligntest (countreg, 1);
11346 srcmem = change_address (src, QImode, srcreg);
11347 dstmem = change_address (dst, QImode, destreg);
11348 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11349 emit_label (label);
11350 LABEL_NUSES (label) = 1;
11357 /* Expand string clear operation (bzero). Use i386 string operations when
11358 profitable. expand_movstr contains similar code. */
11360 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11362 rtx destreg, zeroreg, countreg, destexp;
11363 enum machine_mode counter_mode;
11364 HOST_WIDE_INT align = 0;
11365 unsigned HOST_WIDE_INT count = 0;
11367 if (GET_CODE (align_exp) == CONST_INT)
11368 align = INTVAL (align_exp);
11370 /* Can't use any of this if the user has appropriated esi. */
11371 if (global_regs[4])
11374 /* This simple hack avoids all inlining code and simplifies code below. */
11375 if (!TARGET_ALIGN_STRINGOPS)
11378 if (GET_CODE (count_exp) == CONST_INT)
11380 count = INTVAL (count_exp);
11381 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11384 /* Figure out proper mode for counter. For 32bits it is always SImode,
11385 for 64bits use SImode when possible, otherwise DImode.
11386 Set count to number of bytes copied when known at compile time. */
11387 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11388 || x86_64_zero_extended_value (count_exp))
11389 counter_mode = SImode;
11391 counter_mode = DImode;
11393 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11394 if (destreg != XEXP (dst, 0))
11395 dst = replace_equiv_address_nv (dst, destreg);
11397 emit_insn (gen_cld ());
11399 /* When optimizing for size emit simple rep ; movsb instruction for
11400 counts not divisible by 4. */
11402 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11404 countreg = ix86_zero_extend_to_Pmode (count_exp);
11405 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11406 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11407 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11409 else if (count != 0
11411 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11412 || optimize_size || count < (unsigned int) 64))
11414 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11415 unsigned HOST_WIDE_INT offset = 0;
11417 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11418 if (count & ~(size - 1))
11420 countreg = copy_to_mode_reg (counter_mode,
11421 GEN_INT ((count >> (size == 4 ? 2 : 3))
11422 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11423 countreg = ix86_zero_extend_to_Pmode (countreg);
11424 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11425 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11426 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11427 offset = count & ~(size - 1);
11429 if (size == 8 && (count & 0x04))
11431 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11433 emit_insn (gen_strset (destreg, mem,
11434 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11439 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11441 emit_insn (gen_strset (destreg, mem,
11442 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11447 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11449 emit_insn (gen_strset (destreg, mem,
11450 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11457 /* Compute desired alignment of the string operation. */
11458 int desired_alignment = (TARGET_PENTIUMPRO
11459 && (count == 0 || count >= (unsigned int) 260)
11460 ? 8 : UNITS_PER_WORD);
11462 /* In case we don't know anything about the alignment, default to
11463 library version, since it is usually equally fast and result in
11466 Also emit call when we know that the count is large and call overhead
11467 will not be important. */
11468 if (!TARGET_INLINE_ALL_STRINGOPS
11469 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11472 if (TARGET_SINGLE_STRINGOP)
11473 emit_insn (gen_cld ());
11475 countreg2 = gen_reg_rtx (Pmode);
11476 countreg = copy_to_mode_reg (counter_mode, count_exp);
11477 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11478 /* Get rid of MEM_OFFSET, it won't be accurate. */
11479 dst = change_address (dst, BLKmode, destreg);
11481 if (count == 0 && align < desired_alignment)
11483 label = gen_label_rtx ();
11484 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11485 LEU, 0, counter_mode, 1, label);
11489 rtx label = ix86_expand_aligntest (destreg, 1);
11490 emit_insn (gen_strset (destreg, dst,
11491 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11492 ix86_adjust_counter (countreg, 1);
11493 emit_label (label);
11494 LABEL_NUSES (label) = 1;
11498 rtx label = ix86_expand_aligntest (destreg, 2);
11499 emit_insn (gen_strset (destreg, dst,
11500 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11501 ix86_adjust_counter (countreg, 2);
11502 emit_label (label);
11503 LABEL_NUSES (label) = 1;
11505 if (align <= 4 && desired_alignment > 4)
11507 rtx label = ix86_expand_aligntest (destreg, 4);
11508 emit_insn (gen_strset (destreg, dst,
11510 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11512 ix86_adjust_counter (countreg, 4);
11513 emit_label (label);
11514 LABEL_NUSES (label) = 1;
11517 if (label && desired_alignment > 4 && !TARGET_64BIT)
11519 emit_label (label);
11520 LABEL_NUSES (label) = 1;
11524 if (!TARGET_SINGLE_STRINGOP)
11525 emit_insn (gen_cld ());
11528 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11530 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11534 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11535 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11537 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11538 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11542 emit_label (label);
11543 LABEL_NUSES (label) = 1;
11546 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11547 emit_insn (gen_strset (destreg, dst,
11548 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11549 if (TARGET_64BIT && (align <= 4 || count == 0))
11551 rtx label = ix86_expand_aligntest (countreg, 4);
11552 emit_insn (gen_strset (destreg, dst,
11553 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11554 emit_label (label);
11555 LABEL_NUSES (label) = 1;
11557 if (align > 2 && count != 0 && (count & 2))
11558 emit_insn (gen_strset (destreg, dst,
11559 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11560 if (align <= 2 || count == 0)
11562 rtx label = ix86_expand_aligntest (countreg, 2);
11563 emit_insn (gen_strset (destreg, dst,
11564 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11565 emit_label (label);
11566 LABEL_NUSES (label) = 1;
11568 if (align > 1 && count != 0 && (count & 1))
11569 emit_insn (gen_strset (destreg, dst,
11570 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11571 if (align <= 1 || count == 0)
11573 rtx label = ix86_expand_aligntest (countreg, 1);
11574 emit_insn (gen_strset (destreg, dst,
11575 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11576 emit_label (label);
11577 LABEL_NUSES (label) = 1;
11583 /* Expand strlen. */
11585 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11587 rtx addr, scratch1, scratch2, scratch3, scratch4;
11589 /* The generic case of strlen expander is long. Avoid it's
11590 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11592 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11593 && !TARGET_INLINE_ALL_STRINGOPS
11595 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11598 addr = force_reg (Pmode, XEXP (src, 0));
11599 scratch1 = gen_reg_rtx (Pmode);
11601 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11604 /* Well it seems that some optimizer does not combine a call like
11605 foo(strlen(bar), strlen(bar));
11606 when the move and the subtraction is done here. It does calculate
11607 the length just once when these instructions are done inside of
11608 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11609 often used and I use one fewer register for the lifetime of
11610 output_strlen_unroll() this is better. */
11612 emit_move_insn (out, addr);
11614 ix86_expand_strlensi_unroll_1 (out, src, align);
11616 /* strlensi_unroll_1 returns the address of the zero at the end of
11617 the string, like memchr(), so compute the length by subtracting
11618 the start address. */
11620 emit_insn (gen_subdi3 (out, out, addr));
11622 emit_insn (gen_subsi3 (out, out, addr));
11627 scratch2 = gen_reg_rtx (Pmode);
11628 scratch3 = gen_reg_rtx (Pmode);
11629 scratch4 = force_reg (Pmode, constm1_rtx);
11631 emit_move_insn (scratch3, addr);
11632 eoschar = force_reg (QImode, eoschar);
11634 emit_insn (gen_cld ());
11635 src = replace_equiv_address_nv (src, scratch3);
11637 /* If .md starts supporting :P, this can be done in .md. */
11638 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11639 scratch4), UNSPEC_SCAS);
11640 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11643 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11644 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11648 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11649 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11655 /* Expand the appropriate insns for doing strlen if not just doing
11658 out = result, initialized with the start address
11659 align_rtx = alignment of the address.
11660 scratch = scratch register, initialized with the startaddress when
11661 not aligned, otherwise undefined
11663 This is just the body. It needs the initializations mentioned above and
11664 some address computing at the end. These things are done in i386.md. */
11667 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11671 rtx align_2_label = NULL_RTX;
11672 rtx align_3_label = NULL_RTX;
11673 rtx align_4_label = gen_label_rtx ();
11674 rtx end_0_label = gen_label_rtx ();
11676 rtx tmpreg = gen_reg_rtx (SImode);
11677 rtx scratch = gen_reg_rtx (SImode);
11681 if (GET_CODE (align_rtx) == CONST_INT)
11682 align = INTVAL (align_rtx);
11684 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11686 /* Is there a known alignment and is it less than 4? */
11689 rtx scratch1 = gen_reg_rtx (Pmode);
11690 emit_move_insn (scratch1, out);
11691 /* Is there a known alignment and is it not 2? */
11694 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11695 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11697 /* Leave just the 3 lower bits. */
11698 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11699 NULL_RTX, 0, OPTAB_WIDEN);
11701 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11702 Pmode, 1, align_4_label);
11703 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11704 Pmode, 1, align_2_label);
11705 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11706 Pmode, 1, align_3_label);
11710 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11711 check if is aligned to 4 - byte. */
11713 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11714 NULL_RTX, 0, OPTAB_WIDEN);
11716 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11717 Pmode, 1, align_4_label);
11720 mem = change_address (src, QImode, out);
11722 /* Now compare the bytes. */
11724 /* Compare the first n unaligned byte on a byte per byte basis. */
11725 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11726 QImode, 1, end_0_label);
11728 /* Increment the address. */
11730 emit_insn (gen_adddi3 (out, out, const1_rtx));
11732 emit_insn (gen_addsi3 (out, out, const1_rtx));
11734 /* Not needed with an alignment of 2 */
11737 emit_label (align_2_label);
11739 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11743 emit_insn (gen_adddi3 (out, out, const1_rtx));
11745 emit_insn (gen_addsi3 (out, out, const1_rtx));
11747 emit_label (align_3_label);
11750 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11754 emit_insn (gen_adddi3 (out, out, const1_rtx));
11756 emit_insn (gen_addsi3 (out, out, const1_rtx));
11759 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11760 align this loop. It gives only huge programs, but does not help to
11762 emit_label (align_4_label);
11764 mem = change_address (src, SImode, out);
11765 emit_move_insn (scratch, mem);
11767 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11769 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11771 /* This formula yields a nonzero result iff one of the bytes is zero.
11772 This saves three branches inside loop and many cycles. */
11774 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11775 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11776 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11777 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11778 gen_int_mode (0x80808080, SImode)));
11779 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11784 rtx reg = gen_reg_rtx (SImode);
11785 rtx reg2 = gen_reg_rtx (Pmode);
11786 emit_move_insn (reg, tmpreg);
11787 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11789 /* If zero is not in the first two bytes, move two bytes forward. */
11790 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11791 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11792 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11793 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11794 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11797 /* Emit lea manually to avoid clobbering of flags. */
11798 emit_insn (gen_rtx_SET (SImode, reg2,
11799 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11801 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11802 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11803 emit_insn (gen_rtx_SET (VOIDmode, out,
11804 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11811 rtx end_2_label = gen_label_rtx ();
11812 /* Is zero in the first two bytes? */
11814 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11815 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11816 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11817 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11818 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11820 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11821 JUMP_LABEL (tmp) = end_2_label;
11823 /* Not in the first two. Move two bytes forward. */
11824 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11826 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11828 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11830 emit_label (end_2_label);
11834 /* Avoid branch in fixing the byte. */
11835 tmpreg = gen_lowpart (QImode, tmpreg);
11836 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11837 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11839 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11841 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11843 emit_label (end_0_label);
11847 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11848 rtx callarg2 ATTRIBUTE_UNUSED,
11849 rtx pop, int sibcall)
11851 rtx use = NULL, call;
11853 if (pop == const0_rtx)
11855 if (TARGET_64BIT && pop)
11859 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11860 fnaddr = machopic_indirect_call_target (fnaddr);
11862 /* Static functions and indirect calls don't need the pic register. */
11863 if (! TARGET_64BIT && flag_pic
11864 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11865 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11866 use_reg (&use, pic_offset_table_rtx);
11868 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11870 rtx al = gen_rtx_REG (QImode, 0);
11871 emit_move_insn (al, callarg2);
11872 use_reg (&use, al);
11874 #endif /* TARGET_MACHO */
11876 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11878 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11879 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11881 if (sibcall && TARGET_64BIT
11882 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11885 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11886 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11887 emit_move_insn (fnaddr, addr);
11888 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11891 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11893 call = gen_rtx_SET (VOIDmode, retval, call);
11896 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11897 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11898 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11901 call = emit_call_insn (call);
11903 CALL_INSN_FUNCTION_USAGE (call) = use;
11907 /* Clear stack slot assignments remembered from previous functions.
11908 This is called from INIT_EXPANDERS once before RTL is emitted for each
11911 static struct machine_function *
11912 ix86_init_machine_status (void)
11914 struct machine_function *f;
11916 f = ggc_alloc_cleared (sizeof (struct machine_function));
11917 f->use_fast_prologue_epilogue_nregs = -1;
11922 /* Return a MEM corresponding to a stack slot with mode MODE.
11923 Allocate a new slot if necessary.
11925 The RTL for a function can have several slots available: N is
11926 which slot to use. */
11929 assign_386_stack_local (enum machine_mode mode, int n)
11931 struct stack_local_entry *s;
11933 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11936 for (s = ix86_stack_locals; s; s = s->next)
11937 if (s->mode == mode && s->n == n)
11940 s = (struct stack_local_entry *)
11941 ggc_alloc (sizeof (struct stack_local_entry));
11944 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11946 s->next = ix86_stack_locals;
11947 ix86_stack_locals = s;
11951 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11953 static GTY(()) rtx ix86_tls_symbol;
11955 ix86_tls_get_addr (void)
11958 if (!ix86_tls_symbol)
11960 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11961 (TARGET_GNU_TLS && !TARGET_64BIT)
11962 ? "___tls_get_addr"
11963 : "__tls_get_addr");
11966 return ix86_tls_symbol;
11969 /* Calculate the length of the memory address in the instruction
11970 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11973 memory_address_length (rtx addr)
11975 struct ix86_address parts;
11976 rtx base, index, disp;
11979 if (GET_CODE (addr) == PRE_DEC
11980 || GET_CODE (addr) == POST_INC
11981 || GET_CODE (addr) == PRE_MODIFY
11982 || GET_CODE (addr) == POST_MODIFY)
11985 if (! ix86_decompose_address (addr, &parts))
11989 index = parts.index;
11994 - esp as the base always wants an index,
11995 - ebp as the base always wants a displacement. */
11997 /* Register Indirect. */
11998 if (base && !index && !disp)
12000 /* esp (for its index) and ebp (for its displacement) need
12001 the two-byte modrm form. */
12002 if (addr == stack_pointer_rtx
12003 || addr == arg_pointer_rtx
12004 || addr == frame_pointer_rtx
12005 || addr == hard_frame_pointer_rtx)
12009 /* Direct Addressing. */
12010 else if (disp && !base && !index)
12015 /* Find the length of the displacement constant. */
12018 if (GET_CODE (disp) == CONST_INT
12019 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12025 /* ebp always wants a displacement. */
12026 else if (base == hard_frame_pointer_rtx)
12029 /* An index requires the two-byte modrm form.... */
12031 /* ...like esp, which always wants an index. */
12032 || base == stack_pointer_rtx
12033 || base == arg_pointer_rtx
12034 || base == frame_pointer_rtx)
12041 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12042 is set, expect that insn have 8bit immediate alternative. */
12044 ix86_attr_length_immediate_default (rtx insn, int shortform)
12048 extract_insn_cached (insn);
12049 for (i = recog_data.n_operands - 1; i >= 0; --i)
12050 if (CONSTANT_P (recog_data.operand[i]))
12055 && GET_CODE (recog_data.operand[i]) == CONST_INT
12056 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12060 switch (get_attr_mode (insn))
12071 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12076 fatal_insn ("unknown insn mode", insn);
12082 /* Compute default value for "length_address" attribute. */
12084 ix86_attr_length_address_default (rtx insn)
12088 if (get_attr_type (insn) == TYPE_LEA)
12090 rtx set = PATTERN (insn);
12091 if (GET_CODE (set) == SET)
12093 else if (GET_CODE (set) == PARALLEL
12094 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12095 set = XVECEXP (set, 0, 0);
12098 #ifdef ENABLE_CHECKING
12104 return memory_address_length (SET_SRC (set));
12107 extract_insn_cached (insn);
12108 for (i = recog_data.n_operands - 1; i >= 0; --i)
12109 if (GET_CODE (recog_data.operand[i]) == MEM)
12111 return memory_address_length (XEXP (recog_data.operand[i], 0));
12117 /* Return the maximum number of instructions a cpu can issue. */
12120 ix86_issue_rate (void)
12124 case PROCESSOR_PENTIUM:
12128 case PROCESSOR_PENTIUMPRO:
12129 case PROCESSOR_PENTIUM4:
12130 case PROCESSOR_ATHLON:
12139 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12140 by DEP_INSN and nothing set by DEP_INSN. */
12143 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12147 /* Simplify the test for uninteresting insns. */
12148 if (insn_type != TYPE_SETCC
12149 && insn_type != TYPE_ICMOV
12150 && insn_type != TYPE_FCMOV
12151 && insn_type != TYPE_IBR)
12154 if ((set = single_set (dep_insn)) != 0)
12156 set = SET_DEST (set);
12159 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12160 && XVECLEN (PATTERN (dep_insn), 0) == 2
12161 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12162 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12164 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12165 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12170 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12173 /* This test is true if the dependent insn reads the flags but
12174 not any other potentially set register. */
12175 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12178 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12184 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12185 address with operands set by DEP_INSN. */
12188 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12192 if (insn_type == TYPE_LEA
12195 addr = PATTERN (insn);
12196 if (GET_CODE (addr) == SET)
12198 else if (GET_CODE (addr) == PARALLEL
12199 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12200 addr = XVECEXP (addr, 0, 0);
12203 addr = SET_SRC (addr);
12208 extract_insn_cached (insn);
12209 for (i = recog_data.n_operands - 1; i >= 0; --i)
12210 if (GET_CODE (recog_data.operand[i]) == MEM)
12212 addr = XEXP (recog_data.operand[i], 0);
12219 return modified_in_p (addr, dep_insn);
12223 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12225 enum attr_type insn_type, dep_insn_type;
12226 enum attr_memory memory, dep_memory;
12228 int dep_insn_code_number;
12230 /* Anti and output dependencies have zero cost on all CPUs. */
12231 if (REG_NOTE_KIND (link) != 0)
12234 dep_insn_code_number = recog_memoized (dep_insn);
12236 /* If we can't recognize the insns, we can't really do anything. */
12237 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12240 insn_type = get_attr_type (insn);
12241 dep_insn_type = get_attr_type (dep_insn);
12245 case PROCESSOR_PENTIUM:
12246 /* Address Generation Interlock adds a cycle of latency. */
12247 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12250 /* ??? Compares pair with jump/setcc. */
12251 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12254 /* Floating point stores require value to be ready one cycle earlier. */
12255 if (insn_type == TYPE_FMOV
12256 && get_attr_memory (insn) == MEMORY_STORE
12257 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12261 case PROCESSOR_PENTIUMPRO:
12262 memory = get_attr_memory (insn);
12263 dep_memory = get_attr_memory (dep_insn);
12265 /* Since we can't represent delayed latencies of load+operation,
12266 increase the cost here for non-imov insns. */
12267 if (dep_insn_type != TYPE_IMOV
12268 && dep_insn_type != TYPE_FMOV
12269 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12272 /* INT->FP conversion is expensive. */
12273 if (get_attr_fp_int_src (dep_insn))
12276 /* There is one cycle extra latency between an FP op and a store. */
12277 if (insn_type == TYPE_FMOV
12278 && (set = single_set (dep_insn)) != NULL_RTX
12279 && (set2 = single_set (insn)) != NULL_RTX
12280 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12281 && GET_CODE (SET_DEST (set2)) == MEM)
12284 /* Show ability of reorder buffer to hide latency of load by executing
12285 in parallel with previous instruction in case
12286 previous instruction is not needed to compute the address. */
12287 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12288 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12290 /* Claim moves to take one cycle, as core can issue one load
12291 at time and the next load can start cycle later. */
12292 if (dep_insn_type == TYPE_IMOV
12293 || dep_insn_type == TYPE_FMOV)
12301 memory = get_attr_memory (insn);
12302 dep_memory = get_attr_memory (dep_insn);
12303 /* The esp dependency is resolved before the instruction is really
12305 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12306 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12309 /* Since we can't represent delayed latencies of load+operation,
12310 increase the cost here for non-imov insns. */
12311 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12312 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12314 /* INT->FP conversion is expensive. */
12315 if (get_attr_fp_int_src (dep_insn))
12318 /* Show ability of reorder buffer to hide latency of load by executing
12319 in parallel with previous instruction in case
12320 previous instruction is not needed to compute the address. */
12321 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12322 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12324 /* Claim moves to take one cycle, as core can issue one load
12325 at time and the next load can start cycle later. */
12326 if (dep_insn_type == TYPE_IMOV
12327 || dep_insn_type == TYPE_FMOV)
12336 case PROCESSOR_ATHLON:
12338 memory = get_attr_memory (insn);
12339 dep_memory = get_attr_memory (dep_insn);
12341 /* Show ability of reorder buffer to hide latency of load by executing
12342 in parallel with previous instruction in case
12343 previous instruction is not needed to compute the address. */
12344 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12345 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12347 enum attr_unit unit = get_attr_unit (insn);
12350 /* Because of the difference between the length of integer and
12351 floating unit pipeline preparation stages, the memory operands
12352 for floating point are cheaper.
12354 ??? For Athlon it the difference is most probably 2. */
12355 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12358 loadcost = TARGET_ATHLON ? 2 : 0;
12360 if (cost >= loadcost)
12375 struct ppro_sched_data
12378 int issued_this_cycle;
12382 static enum attr_ppro_uops
12383 ix86_safe_ppro_uops (rtx insn)
12385 if (recog_memoized (insn) >= 0)
12386 return get_attr_ppro_uops (insn);
12388 return PPRO_UOPS_MANY;
12392 ix86_dump_ppro_packet (FILE *dump)
12394 if (ix86_sched_data.ppro.decode[0])
12396 fprintf (dump, "PPRO packet: %d",
12397 INSN_UID (ix86_sched_data.ppro.decode[0]));
12398 if (ix86_sched_data.ppro.decode[1])
12399 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12400 if (ix86_sched_data.ppro.decode[2])
12401 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12402 fputc ('\n', dump);
12406 /* We're beginning a new block. Initialize data structures as necessary. */
12409 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12410 int sched_verbose ATTRIBUTE_UNUSED,
12411 int veclen ATTRIBUTE_UNUSED)
12413 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12416 /* Shift INSN to SLOT, and shift everything else down. */
12419 ix86_reorder_insn (rtx *insnp, rtx *slot)
12425 insnp[0] = insnp[1];
12426 while (++insnp != slot);
12432 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12435 enum attr_ppro_uops cur_uops;
12436 int issued_this_cycle;
12440 /* At this point .ppro.decode contains the state of the three
12441 decoders from last "cycle". That is, those insns that were
12442 actually independent. But here we're scheduling for the
12443 decoder, and we may find things that are decodable in the
12446 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12447 issued_this_cycle = 0;
12450 cur_uops = ix86_safe_ppro_uops (*insnp);
12452 /* If the decoders are empty, and we've a complex insn at the
12453 head of the priority queue, let it issue without complaint. */
12454 if (decode[0] == NULL)
12456 if (cur_uops == PPRO_UOPS_MANY)
12458 decode[0] = *insnp;
12462 /* Otherwise, search for a 2-4 uop unsn to issue. */
12463 while (cur_uops != PPRO_UOPS_FEW)
12465 if (insnp == ready)
12467 cur_uops = ix86_safe_ppro_uops (*--insnp);
12470 /* If so, move it to the head of the line. */
12471 if (cur_uops == PPRO_UOPS_FEW)
12472 ix86_reorder_insn (insnp, e_ready);
12474 /* Issue the head of the queue. */
12475 issued_this_cycle = 1;
12476 decode[0] = *e_ready--;
12479 /* Look for simple insns to fill in the other two slots. */
12480 for (i = 1; i < 3; ++i)
12481 if (decode[i] == NULL)
12483 if (ready > e_ready)
12487 cur_uops = ix86_safe_ppro_uops (*insnp);
12488 while (cur_uops != PPRO_UOPS_ONE)
12490 if (insnp == ready)
12492 cur_uops = ix86_safe_ppro_uops (*--insnp);
12495 /* Found one. Move it to the head of the queue and issue it. */
12496 if (cur_uops == PPRO_UOPS_ONE)
12498 ix86_reorder_insn (insnp, e_ready);
12499 decode[i] = *e_ready--;
12500 issued_this_cycle++;
12504 /* ??? Didn't find one. Ideally, here we would do a lazy split
12505 of 2-uop insns, issue one and queue the other. */
12509 if (issued_this_cycle == 0)
12510 issued_this_cycle = 1;
12511 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12514 /* We are about to being issuing insns for this clock cycle.
12515 Override the default sort algorithm to better slot instructions. */
12517 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12518 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12519 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12521 int n_ready = *n_readyp;
12522 rtx *e_ready = ready + n_ready - 1;
12524 /* Make sure to go ahead and initialize key items in
12525 ix86_sched_data if we are not going to bother trying to
12526 reorder the ready queue. */
12529 ix86_sched_data.ppro.issued_this_cycle = 1;
12538 case PROCESSOR_PENTIUMPRO:
12539 ix86_sched_reorder_ppro (ready, e_ready);
12544 return ix86_issue_rate ();
12547 /* We are about to issue INSN. Return the number of insns left on the
12548 ready queue that can be issued this cycle. */
12551 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12552 int can_issue_more)
12558 return can_issue_more - 1;
12560 case PROCESSOR_PENTIUMPRO:
12562 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12564 if (uops == PPRO_UOPS_MANY)
12567 ix86_dump_ppro_packet (dump);
12568 ix86_sched_data.ppro.decode[0] = insn;
12569 ix86_sched_data.ppro.decode[1] = NULL;
12570 ix86_sched_data.ppro.decode[2] = NULL;
12572 ix86_dump_ppro_packet (dump);
12573 ix86_sched_data.ppro.decode[0] = NULL;
12575 else if (uops == PPRO_UOPS_FEW)
12578 ix86_dump_ppro_packet (dump);
12579 ix86_sched_data.ppro.decode[0] = insn;
12580 ix86_sched_data.ppro.decode[1] = NULL;
12581 ix86_sched_data.ppro.decode[2] = NULL;
12585 for (i = 0; i < 3; ++i)
12586 if (ix86_sched_data.ppro.decode[i] == NULL)
12588 ix86_sched_data.ppro.decode[i] = insn;
12596 ix86_dump_ppro_packet (dump);
12597 ix86_sched_data.ppro.decode[0] = NULL;
12598 ix86_sched_data.ppro.decode[1] = NULL;
12599 ix86_sched_data.ppro.decode[2] = NULL;
12603 return --ix86_sched_data.ppro.issued_this_cycle;
12608 ia32_use_dfa_pipeline_interface (void)
12610 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12615 /* How many alternative schedules to try. This should be as wide as the
12616 scheduling freedom in the DFA, but no wider. Making this value too
12617 large results extra work for the scheduler. */
12620 ia32_multipass_dfa_lookahead (void)
12622 if (ix86_tune == PROCESSOR_PENTIUM)
12629 /* Compute the alignment given to a constant that is being placed in memory.
12630 EXP is the constant and ALIGN is the alignment that the object would
12632 The value of this function is used instead of that alignment to align
12636 ix86_constant_alignment (tree exp, int align)
12638 if (TREE_CODE (exp) == REAL_CST)
12640 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12642 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12645 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12646 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12647 return BITS_PER_WORD;
12652 /* Compute the alignment for a static variable.
12653 TYPE is the data type, and ALIGN is the alignment that
12654 the object would ordinarily have. The value of this function is used
12655 instead of that alignment to align the object. */
12658 ix86_data_alignment (tree type, int align)
12660 if (AGGREGATE_TYPE_P (type)
12661 && TYPE_SIZE (type)
12662 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12663 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12664 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12667 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12668 to 16byte boundary. */
12671 if (AGGREGATE_TYPE_P (type)
12672 && TYPE_SIZE (type)
12673 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12674 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12675 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12679 if (TREE_CODE (type) == ARRAY_TYPE)
12681 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12683 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12686 else if (TREE_CODE (type) == COMPLEX_TYPE)
12689 if (TYPE_MODE (type) == DCmode && align < 64)
12691 if (TYPE_MODE (type) == XCmode && align < 128)
12694 else if ((TREE_CODE (type) == RECORD_TYPE
12695 || TREE_CODE (type) == UNION_TYPE
12696 || TREE_CODE (type) == QUAL_UNION_TYPE)
12697 && TYPE_FIELDS (type))
12699 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12701 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12704 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12705 || TREE_CODE (type) == INTEGER_TYPE)
12707 if (TYPE_MODE (type) == DFmode && align < 64)
12709 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12716 /* Compute the alignment for a local variable.
12717 TYPE is the data type, and ALIGN is the alignment that
12718 the object would ordinarily have. The value of this macro is used
12719 instead of that alignment to align the object. */
12722 ix86_local_alignment (tree type, int align)
12724 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12725 to 16byte boundary. */
12728 if (AGGREGATE_TYPE_P (type)
12729 && TYPE_SIZE (type)
12730 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12731 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12732 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12735 if (TREE_CODE (type) == ARRAY_TYPE)
12737 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12739 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12742 else if (TREE_CODE (type) == COMPLEX_TYPE)
12744 if (TYPE_MODE (type) == DCmode && align < 64)
12746 if (TYPE_MODE (type) == XCmode && align < 128)
12749 else if ((TREE_CODE (type) == RECORD_TYPE
12750 || TREE_CODE (type) == UNION_TYPE
12751 || TREE_CODE (type) == QUAL_UNION_TYPE)
12752 && TYPE_FIELDS (type))
12754 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12756 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12759 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12760 || TREE_CODE (type) == INTEGER_TYPE)
12763 if (TYPE_MODE (type) == DFmode && align < 64)
12765 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12771 /* Emit RTL insns to initialize the variable parts of a trampoline.
12772 FNADDR is an RTX for the address of the function's pure code.
12773 CXT is an RTX for the static chain value for the function. */
12775 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12779 /* Compute offset from the end of the jmp to the target function. */
12780 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12781 plus_constant (tramp, 10),
12782 NULL_RTX, 1, OPTAB_DIRECT);
12783 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12784 gen_int_mode (0xb9, QImode));
12785 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12786 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12787 gen_int_mode (0xe9, QImode));
12788 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12793 /* Try to load address using shorter movl instead of movabs.
12794 We may want to support movq for kernel mode, but kernel does not use
12795 trampolines at the moment. */
12796 if (x86_64_zero_extended_value (fnaddr))
12798 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12799 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12800 gen_int_mode (0xbb41, HImode));
12801 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12802 gen_lowpart (SImode, fnaddr));
12807 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12808 gen_int_mode (0xbb49, HImode));
12809 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12813 /* Load static chain using movabs to r10. */
12814 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12815 gen_int_mode (0xba49, HImode));
12816 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12819 /* Jump to the r11 */
12820 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12821 gen_int_mode (0xff49, HImode));
12822 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12823 gen_int_mode (0xe3, QImode));
12825 if (offset > TRAMPOLINE_SIZE)
12829 #ifdef ENABLE_EXECUTE_STACK
12830 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12831 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12835 #define def_builtin(MASK, NAME, TYPE, CODE) \
12837 if ((MASK) & target_flags \
12838 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12839 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12840 NULL, NULL_TREE); \
12843 struct builtin_description
12845 const unsigned int mask;
12846 const enum insn_code icode;
12847 const char *const name;
12848 const enum ix86_builtins code;
12849 const enum rtx_code comparison;
12850 const unsigned int flag;
12853 static const struct builtin_description bdesc_comi[] =
12855 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12856 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12857 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12858 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12859 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12860 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12861 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12862 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12863 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12864 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12865 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12866 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12873 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12874 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12881 static const struct builtin_description bdesc_2arg[] =
12884 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12885 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12886 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12887 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12888 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12889 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12890 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12891 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12893 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12894 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12895 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12896 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12897 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12898 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12899 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12900 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12901 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12902 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12903 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12904 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12905 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12906 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12907 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12908 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12909 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12910 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12911 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12912 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12914 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12915 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12916 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12917 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12919 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12920 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12921 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12924 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12925 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12926 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12927 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12928 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12931 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12932 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12934 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12935 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12936 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12937 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12940 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12941 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12942 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12943 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12944 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12945 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12947 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12949 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12950 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12951 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12953 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12954 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12955 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12956 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12958 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12959 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12961 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12962 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12963 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12964 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12965 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12966 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12968 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12969 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12970 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12971 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12973 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12974 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12975 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12976 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12977 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12978 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12981 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12982 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12983 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12985 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12986 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12987 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12989 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12990 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12991 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12992 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12993 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12994 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12996 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12997 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12998 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12999 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13000 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13001 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13003 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13004 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13005 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13006 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13008 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13009 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13022 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13023 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13024 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
13025 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
13026 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13027 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
13028 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
13029 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
13030 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
13031 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
13032 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
13033 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13034 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13035 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13036 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13037 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
13038 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
13039 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
13040 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
13042 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13066 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13067 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13068 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13069 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13070 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13071 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13072 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13073 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13075 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13077 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13078 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13089 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13091 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13092 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13093 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13104 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13120 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13127 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13128 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13138 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13139 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13140 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13143 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13144 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13145 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13146 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13147 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13148 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13151 static const struct builtin_description bdesc_1arg[] =
13153 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13154 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13156 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13157 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13158 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13160 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13161 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13162 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13163 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13164 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13165 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13167 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13168 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13169 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13170 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13172 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13177 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13178 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13179 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13181 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13187 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13188 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13190 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13191 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13192 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13194 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13197 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13198 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13199 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13203 ix86_init_builtins (void)
13206 ix86_init_mmx_sse_builtins ();
13209 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13210 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13213 ix86_init_mmx_sse_builtins (void)
13215 const struct builtin_description * d;
13218 tree pchar_type_node = build_pointer_type (char_type_node);
13219 tree pcchar_type_node = build_pointer_type (
13220 build_type_variant (char_type_node, 1, 0));
13221 tree pfloat_type_node = build_pointer_type (float_type_node);
13222 tree pcfloat_type_node = build_pointer_type (
13223 build_type_variant (float_type_node, 1, 0));
13224 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13225 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13226 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13229 tree int_ftype_v4sf_v4sf
13230 = build_function_type_list (integer_type_node,
13231 V4SF_type_node, V4SF_type_node, NULL_TREE);
13232 tree v4si_ftype_v4sf_v4sf
13233 = build_function_type_list (V4SI_type_node,
13234 V4SF_type_node, V4SF_type_node, NULL_TREE);
13235 /* MMX/SSE/integer conversions. */
13236 tree int_ftype_v4sf
13237 = build_function_type_list (integer_type_node,
13238 V4SF_type_node, NULL_TREE);
13239 tree int64_ftype_v4sf
13240 = build_function_type_list (long_long_integer_type_node,
13241 V4SF_type_node, NULL_TREE);
13242 tree int_ftype_v8qi
13243 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13244 tree v4sf_ftype_v4sf_int
13245 = build_function_type_list (V4SF_type_node,
13246 V4SF_type_node, integer_type_node, NULL_TREE);
13247 tree v4sf_ftype_v4sf_int64
13248 = build_function_type_list (V4SF_type_node,
13249 V4SF_type_node, long_long_integer_type_node,
13251 tree v4sf_ftype_v4sf_v2si
13252 = build_function_type_list (V4SF_type_node,
13253 V4SF_type_node, V2SI_type_node, NULL_TREE);
13254 tree int_ftype_v4hi_int
13255 = build_function_type_list (integer_type_node,
13256 V4HI_type_node, integer_type_node, NULL_TREE);
13257 tree v4hi_ftype_v4hi_int_int
13258 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13259 integer_type_node, integer_type_node,
13261 /* Miscellaneous. */
13262 tree v8qi_ftype_v4hi_v4hi
13263 = build_function_type_list (V8QI_type_node,
13264 V4HI_type_node, V4HI_type_node, NULL_TREE);
13265 tree v4hi_ftype_v2si_v2si
13266 = build_function_type_list (V4HI_type_node,
13267 V2SI_type_node, V2SI_type_node, NULL_TREE);
13268 tree v4sf_ftype_v4sf_v4sf_int
13269 = build_function_type_list (V4SF_type_node,
13270 V4SF_type_node, V4SF_type_node,
13271 integer_type_node, NULL_TREE);
13272 tree v2si_ftype_v4hi_v4hi
13273 = build_function_type_list (V2SI_type_node,
13274 V4HI_type_node, V4HI_type_node, NULL_TREE);
13275 tree v4hi_ftype_v4hi_int
13276 = build_function_type_list (V4HI_type_node,
13277 V4HI_type_node, integer_type_node, NULL_TREE);
13278 tree v4hi_ftype_v4hi_di
13279 = build_function_type_list (V4HI_type_node,
13280 V4HI_type_node, long_long_unsigned_type_node,
13282 tree v2si_ftype_v2si_di
13283 = build_function_type_list (V2SI_type_node,
13284 V2SI_type_node, long_long_unsigned_type_node,
13286 tree void_ftype_void
13287 = build_function_type (void_type_node, void_list_node);
13288 tree void_ftype_unsigned
13289 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13290 tree void_ftype_unsigned_unsigned
13291 = build_function_type_list (void_type_node, unsigned_type_node,
13292 unsigned_type_node, NULL_TREE);
13293 tree void_ftype_pcvoid_unsigned_unsigned
13294 = build_function_type_list (void_type_node, const_ptr_type_node,
13295 unsigned_type_node, unsigned_type_node,
13297 tree unsigned_ftype_void
13298 = build_function_type (unsigned_type_node, void_list_node);
13300 = build_function_type (long_long_unsigned_type_node, void_list_node);
13301 tree v4sf_ftype_void
13302 = build_function_type (V4SF_type_node, void_list_node);
13303 tree v2si_ftype_v4sf
13304 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13305 /* Loads/stores. */
13306 tree void_ftype_v8qi_v8qi_pchar
13307 = build_function_type_list (void_type_node,
13308 V8QI_type_node, V8QI_type_node,
13309 pchar_type_node, NULL_TREE);
13310 tree v4sf_ftype_pcfloat
13311 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13312 /* @@@ the type is bogus */
13313 tree v4sf_ftype_v4sf_pv2si
13314 = build_function_type_list (V4SF_type_node,
13315 V4SF_type_node, pv2si_type_node, NULL_TREE);
13316 tree void_ftype_pv2si_v4sf
13317 = build_function_type_list (void_type_node,
13318 pv2si_type_node, V4SF_type_node, NULL_TREE);
13319 tree void_ftype_pfloat_v4sf
13320 = build_function_type_list (void_type_node,
13321 pfloat_type_node, V4SF_type_node, NULL_TREE);
13322 tree void_ftype_pdi_di
13323 = build_function_type_list (void_type_node,
13324 pdi_type_node, long_long_unsigned_type_node,
13326 tree void_ftype_pv2di_v2di
13327 = build_function_type_list (void_type_node,
13328 pv2di_type_node, V2DI_type_node, NULL_TREE);
13329 /* Normal vector unops. */
13330 tree v4sf_ftype_v4sf
13331 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13333 /* Normal vector binops. */
13334 tree v4sf_ftype_v4sf_v4sf
13335 = build_function_type_list (V4SF_type_node,
13336 V4SF_type_node, V4SF_type_node, NULL_TREE);
13337 tree v8qi_ftype_v8qi_v8qi
13338 = build_function_type_list (V8QI_type_node,
13339 V8QI_type_node, V8QI_type_node, NULL_TREE);
13340 tree v4hi_ftype_v4hi_v4hi
13341 = build_function_type_list (V4HI_type_node,
13342 V4HI_type_node, V4HI_type_node, NULL_TREE);
13343 tree v2si_ftype_v2si_v2si
13344 = build_function_type_list (V2SI_type_node,
13345 V2SI_type_node, V2SI_type_node, NULL_TREE);
13346 tree di_ftype_di_di
13347 = build_function_type_list (long_long_unsigned_type_node,
13348 long_long_unsigned_type_node,
13349 long_long_unsigned_type_node, NULL_TREE);
13351 tree v2si_ftype_v2sf
13352 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13353 tree v2sf_ftype_v2si
13354 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13355 tree v2si_ftype_v2si
13356 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13357 tree v2sf_ftype_v2sf
13358 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13359 tree v2sf_ftype_v2sf_v2sf
13360 = build_function_type_list (V2SF_type_node,
13361 V2SF_type_node, V2SF_type_node, NULL_TREE);
13362 tree v2si_ftype_v2sf_v2sf
13363 = build_function_type_list (V2SI_type_node,
13364 V2SF_type_node, V2SF_type_node, NULL_TREE);
13365 tree pint_type_node = build_pointer_type (integer_type_node);
13366 tree pcint_type_node = build_pointer_type (
13367 build_type_variant (integer_type_node, 1, 0));
13368 tree pdouble_type_node = build_pointer_type (double_type_node);
13369 tree pcdouble_type_node = build_pointer_type (
13370 build_type_variant (double_type_node, 1, 0));
13371 tree int_ftype_v2df_v2df
13372 = build_function_type_list (integer_type_node,
13373 V2DF_type_node, V2DF_type_node, NULL_TREE);
13376 = build_function_type (intTI_type_node, void_list_node);
13377 tree v2di_ftype_void
13378 = build_function_type (V2DI_type_node, void_list_node);
13379 tree ti_ftype_ti_ti
13380 = build_function_type_list (intTI_type_node,
13381 intTI_type_node, intTI_type_node, NULL_TREE);
13382 tree void_ftype_pcvoid
13383 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13385 = build_function_type_list (V2DI_type_node,
13386 long_long_unsigned_type_node, NULL_TREE);
13388 = build_function_type_list (long_long_unsigned_type_node,
13389 V2DI_type_node, NULL_TREE);
13390 tree v4sf_ftype_v4si
13391 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13392 tree v4si_ftype_v4sf
13393 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13394 tree v2df_ftype_v4si
13395 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13396 tree v4si_ftype_v2df
13397 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13398 tree v2si_ftype_v2df
13399 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13400 tree v4sf_ftype_v2df
13401 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13402 tree v2df_ftype_v2si
13403 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13404 tree v2df_ftype_v4sf
13405 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13406 tree int_ftype_v2df
13407 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13408 tree int64_ftype_v2df
13409 = build_function_type_list (long_long_integer_type_node,
13410 V2DF_type_node, NULL_TREE);
13411 tree v2df_ftype_v2df_int
13412 = build_function_type_list (V2DF_type_node,
13413 V2DF_type_node, integer_type_node, NULL_TREE);
13414 tree v2df_ftype_v2df_int64
13415 = build_function_type_list (V2DF_type_node,
13416 V2DF_type_node, long_long_integer_type_node,
13418 tree v4sf_ftype_v4sf_v2df
13419 = build_function_type_list (V4SF_type_node,
13420 V4SF_type_node, V2DF_type_node, NULL_TREE);
13421 tree v2df_ftype_v2df_v4sf
13422 = build_function_type_list (V2DF_type_node,
13423 V2DF_type_node, V4SF_type_node, NULL_TREE);
13424 tree v2df_ftype_v2df_v2df_int
13425 = build_function_type_list (V2DF_type_node,
13426 V2DF_type_node, V2DF_type_node,
13429 tree v2df_ftype_v2df_pv2si
13430 = build_function_type_list (V2DF_type_node,
13431 V2DF_type_node, pv2si_type_node, NULL_TREE);
13432 tree void_ftype_pv2si_v2df
13433 = build_function_type_list (void_type_node,
13434 pv2si_type_node, V2DF_type_node, NULL_TREE);
13435 tree void_ftype_pdouble_v2df
13436 = build_function_type_list (void_type_node,
13437 pdouble_type_node, V2DF_type_node, NULL_TREE);
13438 tree void_ftype_pint_int
13439 = build_function_type_list (void_type_node,
13440 pint_type_node, integer_type_node, NULL_TREE);
13441 tree void_ftype_v16qi_v16qi_pchar
13442 = build_function_type_list (void_type_node,
13443 V16QI_type_node, V16QI_type_node,
13444 pchar_type_node, NULL_TREE);
13445 tree v2df_ftype_pcdouble
13446 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13447 tree v2df_ftype_v2df_v2df
13448 = build_function_type_list (V2DF_type_node,
13449 V2DF_type_node, V2DF_type_node, NULL_TREE);
13450 tree v16qi_ftype_v16qi_v16qi
13451 = build_function_type_list (V16QI_type_node,
13452 V16QI_type_node, V16QI_type_node, NULL_TREE);
13453 tree v8hi_ftype_v8hi_v8hi
13454 = build_function_type_list (V8HI_type_node,
13455 V8HI_type_node, V8HI_type_node, NULL_TREE);
13456 tree v4si_ftype_v4si_v4si
13457 = build_function_type_list (V4SI_type_node,
13458 V4SI_type_node, V4SI_type_node, NULL_TREE);
13459 tree v2di_ftype_v2di_v2di
13460 = build_function_type_list (V2DI_type_node,
13461 V2DI_type_node, V2DI_type_node, NULL_TREE);
13462 tree v2di_ftype_v2df_v2df
13463 = build_function_type_list (V2DI_type_node,
13464 V2DF_type_node, V2DF_type_node, NULL_TREE);
13465 tree v2df_ftype_v2df
13466 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13467 tree v2df_ftype_double
13468 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13469 tree v2df_ftype_double_double
13470 = build_function_type_list (V2DF_type_node,
13471 double_type_node, double_type_node, NULL_TREE);
13472 tree int_ftype_v8hi_int
13473 = build_function_type_list (integer_type_node,
13474 V8HI_type_node, integer_type_node, NULL_TREE);
13475 tree v8hi_ftype_v8hi_int_int
13476 = build_function_type_list (V8HI_type_node,
13477 V8HI_type_node, integer_type_node,
13478 integer_type_node, NULL_TREE);
13479 tree v2di_ftype_v2di_int
13480 = build_function_type_list (V2DI_type_node,
13481 V2DI_type_node, integer_type_node, NULL_TREE);
13482 tree v4si_ftype_v4si_int
13483 = build_function_type_list (V4SI_type_node,
13484 V4SI_type_node, integer_type_node, NULL_TREE);
13485 tree v8hi_ftype_v8hi_int
13486 = build_function_type_list (V8HI_type_node,
13487 V8HI_type_node, integer_type_node, NULL_TREE);
13488 tree v8hi_ftype_v8hi_v2di
13489 = build_function_type_list (V8HI_type_node,
13490 V8HI_type_node, V2DI_type_node, NULL_TREE);
13491 tree v4si_ftype_v4si_v2di
13492 = build_function_type_list (V4SI_type_node,
13493 V4SI_type_node, V2DI_type_node, NULL_TREE);
13494 tree v4si_ftype_v8hi_v8hi
13495 = build_function_type_list (V4SI_type_node,
13496 V8HI_type_node, V8HI_type_node, NULL_TREE);
13497 tree di_ftype_v8qi_v8qi
13498 = build_function_type_list (long_long_unsigned_type_node,
13499 V8QI_type_node, V8QI_type_node, NULL_TREE);
13500 tree v2di_ftype_v16qi_v16qi
13501 = build_function_type_list (V2DI_type_node,
13502 V16QI_type_node, V16QI_type_node, NULL_TREE);
13503 tree int_ftype_v16qi
13504 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13505 tree v16qi_ftype_pcchar
13506 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13507 tree void_ftype_pchar_v16qi
13508 = build_function_type_list (void_type_node,
13509 pchar_type_node, V16QI_type_node, NULL_TREE);
13510 tree v4si_ftype_pcint
13511 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13512 tree void_ftype_pcint_v4si
13513 = build_function_type_list (void_type_node,
13514 pcint_type_node, V4SI_type_node, NULL_TREE);
13515 tree v2di_ftype_v2di
13516 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13519 tree float128_type;
13521 /* The __float80 type. */
13522 if (TYPE_MODE (long_double_type_node) == XFmode)
13523 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13527 /* The __float80 type. */
13528 float80_type = make_node (REAL_TYPE);
13529 TYPE_PRECISION (float80_type) = 96;
13530 layout_type (float80_type);
13531 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13534 float128_type = make_node (REAL_TYPE);
13535 TYPE_PRECISION (float128_type) = 128;
13536 layout_type (float128_type);
13537 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13539 /* Add all builtins that are more or less simple operations on two
13541 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13543 /* Use one of the operands; the target can have a different mode for
13544 mask-generating compares. */
13545 enum machine_mode mode;
13550 mode = insn_data[d->icode].operand[1].mode;
13555 type = v16qi_ftype_v16qi_v16qi;
13558 type = v8hi_ftype_v8hi_v8hi;
13561 type = v4si_ftype_v4si_v4si;
13564 type = v2di_ftype_v2di_v2di;
13567 type = v2df_ftype_v2df_v2df;
13570 type = ti_ftype_ti_ti;
13573 type = v4sf_ftype_v4sf_v4sf;
13576 type = v8qi_ftype_v8qi_v8qi;
13579 type = v4hi_ftype_v4hi_v4hi;
13582 type = v2si_ftype_v2si_v2si;
13585 type = di_ftype_di_di;
13592 /* Override for comparisons. */
13593 if (d->icode == CODE_FOR_maskcmpv4sf3
13594 || d->icode == CODE_FOR_maskncmpv4sf3
13595 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13596 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13597 type = v4si_ftype_v4sf_v4sf;
13599 if (d->icode == CODE_FOR_maskcmpv2df3
13600 || d->icode == CODE_FOR_maskncmpv2df3
13601 || d->icode == CODE_FOR_vmmaskcmpv2df3
13602 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13603 type = v2di_ftype_v2df_v2df;
13605 def_builtin (d->mask, d->name, type, d->code);
13608 /* Add the remaining MMX insns with somewhat more complicated types. */
13609 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13610 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13611 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13612 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13613 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13615 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13616 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13617 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13619 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13620 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13622 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13623 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13625 /* comi/ucomi insns. */
13626 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13627 if (d->mask == MASK_SSE2)
13628 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13630 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13632 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13633 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13634 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13636 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13637 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13638 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13639 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13640 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13641 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13642 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13643 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13644 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13645 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13646 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13648 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13649 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13651 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13653 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13654 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13655 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13656 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13657 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13658 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13660 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13661 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13662 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13663 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13665 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13666 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13667 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13668 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13670 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13672 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13674 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13675 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13676 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13677 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13678 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13679 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13681 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13683 /* Original 3DNow! */
13684 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13685 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13686 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13687 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13688 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13689 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13690 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13691 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13692 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13693 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13694 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13702 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13703 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13705 /* 3DNow! extension as used in the Athlon CPU. */
13706 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13707 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13708 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13709 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13710 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13711 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13713 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13739 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13756 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13758 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13764 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13765 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13769 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13771 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13772 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13774 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13776 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13777 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13778 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13779 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13780 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13781 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13782 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13784 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13785 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13786 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13788 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13789 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13790 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13792 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13793 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13794 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13796 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13798 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13799 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13800 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13802 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13803 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13804 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13806 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13807 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13809 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13810 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13811 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13812 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13814 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13815 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13817 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13819 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13822 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13824 /* Prescott New Instructions. */
13825 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13826 void_ftype_pcvoid_unsigned_unsigned,
13827 IX86_BUILTIN_MONITOR);
13828 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13829 void_ftype_unsigned_unsigned,
13830 IX86_BUILTIN_MWAIT);
13831 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13833 IX86_BUILTIN_MOVSHDUP);
13834 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13836 IX86_BUILTIN_MOVSLDUP);
13837 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13838 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13839 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13840 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13841 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13842 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13845 /* Errors in the source file can cause expand_expr to return const0_rtx
13846 where we expect a vector. To avoid crashing, use one of the vector
13847 clear instructions. */
13849 safe_vector_operand (rtx x, enum machine_mode mode)
13851 if (x != const0_rtx)
13853 x = gen_reg_rtx (mode);
13855 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13856 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13857 : gen_rtx_SUBREG (DImode, x, 0)));
13859 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13860 : gen_rtx_SUBREG (V4SFmode, x, 0),
13861 CONST0_RTX (V4SFmode)));
13865 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13868 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13871 tree arg0 = TREE_VALUE (arglist);
13872 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13873 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13874 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13875 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13876 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13877 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13879 if (VECTOR_MODE_P (mode0))
13880 op0 = safe_vector_operand (op0, mode0);
13881 if (VECTOR_MODE_P (mode1))
13882 op1 = safe_vector_operand (op1, mode1);
13885 || GET_MODE (target) != tmode
13886 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13887 target = gen_reg_rtx (tmode);
13889 if (GET_MODE (op1) == SImode && mode1 == TImode)
13891 rtx x = gen_reg_rtx (V4SImode);
13892 emit_insn (gen_sse2_loadd (x, op1));
13893 op1 = gen_lowpart (TImode, x);
13896 /* In case the insn wants input operands in modes different from
13897 the result, abort. */
13898 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13899 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13902 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13903 op0 = copy_to_mode_reg (mode0, op0);
13904 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13905 op1 = copy_to_mode_reg (mode1, op1);
13907 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13908 yet one of the two must not be a memory. This is normally enforced
13909 by expanders, but we didn't bother to create one here. */
13910 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13911 op0 = copy_to_mode_reg (mode0, op0);
13913 pat = GEN_FCN (icode) (target, op0, op1);
13920 /* Subroutine of ix86_expand_builtin to take care of stores. */
13923 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13926 tree arg0 = TREE_VALUE (arglist);
13927 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13928 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13929 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13930 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13931 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13933 if (VECTOR_MODE_P (mode1))
13934 op1 = safe_vector_operand (op1, mode1);
13936 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13937 op1 = copy_to_mode_reg (mode1, op1);
13939 pat = GEN_FCN (icode) (op0, op1);
13945 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13948 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13949 rtx target, int do_load)
13952 tree arg0 = TREE_VALUE (arglist);
13953 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13954 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13955 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13958 || GET_MODE (target) != tmode
13959 || (do_load && GET_CODE (target) == MEM)
13960 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13961 target = gen_reg_rtx (tmode);
13963 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13966 if (VECTOR_MODE_P (mode0))
13967 op0 = safe_vector_operand (op0, mode0);
13969 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13970 op0 = copy_to_mode_reg (mode0, op0);
13973 pat = GEN_FCN (icode) (target, op0);
13980 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13981 sqrtss, rsqrtss, rcpss. */
13984 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13987 tree arg0 = TREE_VALUE (arglist);
13988 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13989 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13990 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13993 || GET_MODE (target) != tmode
13994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13995 target = gen_reg_rtx (tmode);
13997 if (VECTOR_MODE_P (mode0))
13998 op0 = safe_vector_operand (op0, mode0);
14000 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14001 op0 = copy_to_mode_reg (mode0, op0);
14004 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14005 op1 = copy_to_mode_reg (mode0, op1);
14007 pat = GEN_FCN (icode) (target, op0, op1);
14014 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14017 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14021 tree arg0 = TREE_VALUE (arglist);
14022 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14023 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14024 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14026 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14027 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14028 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14029 enum rtx_code comparison = d->comparison;
14031 if (VECTOR_MODE_P (mode0))
14032 op0 = safe_vector_operand (op0, mode0);
14033 if (VECTOR_MODE_P (mode1))
14034 op1 = safe_vector_operand (op1, mode1);
14036 /* Swap operands if we have a comparison that isn't available in
14040 rtx tmp = gen_reg_rtx (mode1);
14041 emit_move_insn (tmp, op1);
14047 || GET_MODE (target) != tmode
14048 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14049 target = gen_reg_rtx (tmode);
14051 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14052 op0 = copy_to_mode_reg (mode0, op0);
14053 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14054 op1 = copy_to_mode_reg (mode1, op1);
14056 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14057 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14064 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14067 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14071 tree arg0 = TREE_VALUE (arglist);
14072 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14073 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14074 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14076 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14077 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14078 enum rtx_code comparison = d->comparison;
14080 if (VECTOR_MODE_P (mode0))
14081 op0 = safe_vector_operand (op0, mode0);
14082 if (VECTOR_MODE_P (mode1))
14083 op1 = safe_vector_operand (op1, mode1);
14085 /* Swap operands if we have a comparison that isn't available in
14094 target = gen_reg_rtx (SImode);
14095 emit_move_insn (target, const0_rtx);
14096 target = gen_rtx_SUBREG (QImode, target, 0);
14098 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14099 op0 = copy_to_mode_reg (mode0, op0);
14100 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14101 op1 = copy_to_mode_reg (mode1, op1);
14103 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14104 pat = GEN_FCN (d->icode) (op0, op1);
14108 emit_insn (gen_rtx_SET (VOIDmode,
14109 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14110 gen_rtx_fmt_ee (comparison, QImode,
14114 return SUBREG_REG (target);
14117 /* Expand an expression EXP that calls a built-in function,
14118 with result going to TARGET if that's convenient
14119 (and in mode MODE if that's convenient).
14120 SUBTARGET may be used as the target for computing one of EXP's operands.
14121 IGNORE is nonzero if the value is to be ignored. */
14124 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14125 enum machine_mode mode ATTRIBUTE_UNUSED,
14126 int ignore ATTRIBUTE_UNUSED)
14128 const struct builtin_description *d;
14130 enum insn_code icode;
14131 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14132 tree arglist = TREE_OPERAND (exp, 1);
14133 tree arg0, arg1, arg2;
14134 rtx op0, op1, op2, pat;
14135 enum machine_mode tmode, mode0, mode1, mode2;
14136 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14140 case IX86_BUILTIN_EMMS:
14141 emit_insn (gen_emms ());
14144 case IX86_BUILTIN_SFENCE:
14145 emit_insn (gen_sfence ());
14148 case IX86_BUILTIN_PEXTRW:
14149 case IX86_BUILTIN_PEXTRW128:
14150 icode = (fcode == IX86_BUILTIN_PEXTRW
14151 ? CODE_FOR_mmx_pextrw
14152 : CODE_FOR_sse2_pextrw);
14153 arg0 = TREE_VALUE (arglist);
14154 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14155 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14156 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14157 tmode = insn_data[icode].operand[0].mode;
14158 mode0 = insn_data[icode].operand[1].mode;
14159 mode1 = insn_data[icode].operand[2].mode;
14161 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14162 op0 = copy_to_mode_reg (mode0, op0);
14163 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14165 error ("selector must be an integer constant in the range 0..%i",
14166 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14167 return gen_reg_rtx (tmode);
14170 || GET_MODE (target) != tmode
14171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14172 target = gen_reg_rtx (tmode);
14173 pat = GEN_FCN (icode) (target, op0, op1);
14179 case IX86_BUILTIN_PINSRW:
14180 case IX86_BUILTIN_PINSRW128:
14181 icode = (fcode == IX86_BUILTIN_PINSRW
14182 ? CODE_FOR_mmx_pinsrw
14183 : CODE_FOR_sse2_pinsrw);
14184 arg0 = TREE_VALUE (arglist);
14185 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14186 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14187 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14188 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14189 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14190 tmode = insn_data[icode].operand[0].mode;
14191 mode0 = insn_data[icode].operand[1].mode;
14192 mode1 = insn_data[icode].operand[2].mode;
14193 mode2 = insn_data[icode].operand[3].mode;
14195 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14196 op0 = copy_to_mode_reg (mode0, op0);
14197 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14198 op1 = copy_to_mode_reg (mode1, op1);
14199 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14201 error ("selector must be an integer constant in the range 0..%i",
14202 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14206 || GET_MODE (target) != tmode
14207 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14208 target = gen_reg_rtx (tmode);
14209 pat = GEN_FCN (icode) (target, op0, op1, op2);
14215 case IX86_BUILTIN_MASKMOVQ:
14216 case IX86_BUILTIN_MASKMOVDQU:
14217 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14218 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14219 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14220 : CODE_FOR_sse2_maskmovdqu));
14221 /* Note the arg order is different from the operand order. */
14222 arg1 = TREE_VALUE (arglist);
14223 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14224 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14225 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14226 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14227 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14228 mode0 = insn_data[icode].operand[0].mode;
14229 mode1 = insn_data[icode].operand[1].mode;
14230 mode2 = insn_data[icode].operand[2].mode;
14232 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14233 op0 = copy_to_mode_reg (mode0, op0);
14234 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14235 op1 = copy_to_mode_reg (mode1, op1);
14236 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14237 op2 = copy_to_mode_reg (mode2, op2);
14238 pat = GEN_FCN (icode) (op0, op1, op2);
14244 case IX86_BUILTIN_SQRTSS:
14245 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14246 case IX86_BUILTIN_RSQRTSS:
14247 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14248 case IX86_BUILTIN_RCPSS:
14249 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14251 case IX86_BUILTIN_LOADAPS:
14252 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14254 case IX86_BUILTIN_LOADUPS:
14255 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14257 case IX86_BUILTIN_STOREAPS:
14258 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14260 case IX86_BUILTIN_STOREUPS:
14261 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14263 case IX86_BUILTIN_LOADSS:
14264 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14266 case IX86_BUILTIN_STORESS:
14267 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14269 case IX86_BUILTIN_LOADHPS:
14270 case IX86_BUILTIN_LOADLPS:
14271 case IX86_BUILTIN_LOADHPD:
14272 case IX86_BUILTIN_LOADLPD:
14273 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14274 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14275 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14276 : CODE_FOR_sse2_movsd);
14277 arg0 = TREE_VALUE (arglist);
14278 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14279 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14280 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14281 tmode = insn_data[icode].operand[0].mode;
14282 mode0 = insn_data[icode].operand[1].mode;
14283 mode1 = insn_data[icode].operand[2].mode;
14285 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14286 op0 = copy_to_mode_reg (mode0, op0);
14287 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14289 || GET_MODE (target) != tmode
14290 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14291 target = gen_reg_rtx (tmode);
14292 pat = GEN_FCN (icode) (target, op0, op1);
14298 case IX86_BUILTIN_STOREHPS:
14299 case IX86_BUILTIN_STORELPS:
14300 case IX86_BUILTIN_STOREHPD:
14301 case IX86_BUILTIN_STORELPD:
14302 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14303 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14304 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14305 : CODE_FOR_sse2_movsd);
14306 arg0 = TREE_VALUE (arglist);
14307 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14308 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14309 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14310 mode0 = insn_data[icode].operand[1].mode;
14311 mode1 = insn_data[icode].operand[2].mode;
14313 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14314 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14315 op1 = copy_to_mode_reg (mode1, op1);
14317 pat = GEN_FCN (icode) (op0, op0, op1);
14323 case IX86_BUILTIN_MOVNTPS:
14324 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14325 case IX86_BUILTIN_MOVNTQ:
14326 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14328 case IX86_BUILTIN_LDMXCSR:
14329 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14330 target = assign_386_stack_local (SImode, 0);
14331 emit_move_insn (target, op0);
14332 emit_insn (gen_ldmxcsr (target));
14335 case IX86_BUILTIN_STMXCSR:
14336 target = assign_386_stack_local (SImode, 0);
14337 emit_insn (gen_stmxcsr (target));
14338 return copy_to_mode_reg (SImode, target);
14340 case IX86_BUILTIN_SHUFPS:
14341 case IX86_BUILTIN_SHUFPD:
14342 icode = (fcode == IX86_BUILTIN_SHUFPS
14343 ? CODE_FOR_sse_shufps
14344 : CODE_FOR_sse2_shufpd);
14345 arg0 = TREE_VALUE (arglist);
14346 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14347 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14348 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14349 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14350 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14351 tmode = insn_data[icode].operand[0].mode;
14352 mode0 = insn_data[icode].operand[1].mode;
14353 mode1 = insn_data[icode].operand[2].mode;
14354 mode2 = insn_data[icode].operand[3].mode;
14356 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14357 op0 = copy_to_mode_reg (mode0, op0);
14358 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14359 op1 = copy_to_mode_reg (mode1, op1);
14360 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14362 /* @@@ better error message */
14363 error ("mask must be an immediate");
14364 return gen_reg_rtx (tmode);
14367 || GET_MODE (target) != tmode
14368 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14369 target = gen_reg_rtx (tmode);
14370 pat = GEN_FCN (icode) (target, op0, op1, op2);
14376 case IX86_BUILTIN_PSHUFW:
14377 case IX86_BUILTIN_PSHUFD:
14378 case IX86_BUILTIN_PSHUFHW:
14379 case IX86_BUILTIN_PSHUFLW:
14380 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14381 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14382 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14383 : CODE_FOR_mmx_pshufw);
14384 arg0 = TREE_VALUE (arglist);
14385 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14386 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14387 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14388 tmode = insn_data[icode].operand[0].mode;
14389 mode1 = insn_data[icode].operand[1].mode;
14390 mode2 = insn_data[icode].operand[2].mode;
14392 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14393 op0 = copy_to_mode_reg (mode1, op0);
14394 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14396 /* @@@ better error message */
14397 error ("mask must be an immediate");
14401 || GET_MODE (target) != tmode
14402 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14403 target = gen_reg_rtx (tmode);
14404 pat = GEN_FCN (icode) (target, op0, op1);
14410 case IX86_BUILTIN_PSLLDQI128:
14411 case IX86_BUILTIN_PSRLDQI128:
14412 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14413 : CODE_FOR_sse2_lshrti3);
14414 arg0 = TREE_VALUE (arglist);
14415 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14416 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14417 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14418 tmode = insn_data[icode].operand[0].mode;
14419 mode1 = insn_data[icode].operand[1].mode;
14420 mode2 = insn_data[icode].operand[2].mode;
14422 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14424 op0 = copy_to_reg (op0);
14425 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14427 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14429 error ("shift must be an immediate");
14432 target = gen_reg_rtx (V2DImode);
14433 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14439 case IX86_BUILTIN_FEMMS:
14440 emit_insn (gen_femms ());
14443 case IX86_BUILTIN_PAVGUSB:
14444 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14446 case IX86_BUILTIN_PF2ID:
14447 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14449 case IX86_BUILTIN_PFACC:
14450 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14452 case IX86_BUILTIN_PFADD:
14453 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14455 case IX86_BUILTIN_PFCMPEQ:
14456 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14458 case IX86_BUILTIN_PFCMPGE:
14459 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14461 case IX86_BUILTIN_PFCMPGT:
14462 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14464 case IX86_BUILTIN_PFMAX:
14465 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14467 case IX86_BUILTIN_PFMIN:
14468 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14470 case IX86_BUILTIN_PFMUL:
14471 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14473 case IX86_BUILTIN_PFRCP:
14474 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14476 case IX86_BUILTIN_PFRCPIT1:
14477 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14479 case IX86_BUILTIN_PFRCPIT2:
14480 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14482 case IX86_BUILTIN_PFRSQIT1:
14483 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14485 case IX86_BUILTIN_PFRSQRT:
14486 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14488 case IX86_BUILTIN_PFSUB:
14489 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14491 case IX86_BUILTIN_PFSUBR:
14492 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14494 case IX86_BUILTIN_PI2FD:
14495 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14497 case IX86_BUILTIN_PMULHRW:
14498 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14500 case IX86_BUILTIN_PF2IW:
14501 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14503 case IX86_BUILTIN_PFNACC:
14504 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14506 case IX86_BUILTIN_PFPNACC:
14507 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14509 case IX86_BUILTIN_PI2FW:
14510 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14512 case IX86_BUILTIN_PSWAPDSI:
14513 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14515 case IX86_BUILTIN_PSWAPDSF:
14516 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14518 case IX86_BUILTIN_SSE_ZERO:
14519 target = gen_reg_rtx (V4SFmode);
14520 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14523 case IX86_BUILTIN_MMX_ZERO:
14524 target = gen_reg_rtx (DImode);
14525 emit_insn (gen_mmx_clrdi (target));
14528 case IX86_BUILTIN_CLRTI:
14529 target = gen_reg_rtx (V2DImode);
14530 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14534 case IX86_BUILTIN_SQRTSD:
14535 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14536 case IX86_BUILTIN_LOADAPD:
14537 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14538 case IX86_BUILTIN_LOADUPD:
14539 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14541 case IX86_BUILTIN_STOREAPD:
14542 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14543 case IX86_BUILTIN_STOREUPD:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14546 case IX86_BUILTIN_LOADSD:
14547 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14549 case IX86_BUILTIN_STORESD:
14550 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14552 case IX86_BUILTIN_SETPD1:
14553 target = assign_386_stack_local (DFmode, 0);
14554 arg0 = TREE_VALUE (arglist);
14555 emit_move_insn (adjust_address (target, DFmode, 0),
14556 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14557 op0 = gen_reg_rtx (V2DFmode);
14558 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14559 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14562 case IX86_BUILTIN_SETPD:
14563 target = assign_386_stack_local (V2DFmode, 0);
14564 arg0 = TREE_VALUE (arglist);
14565 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14566 emit_move_insn (adjust_address (target, DFmode, 0),
14567 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14568 emit_move_insn (adjust_address (target, DFmode, 8),
14569 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14570 op0 = gen_reg_rtx (V2DFmode);
14571 emit_insn (gen_sse2_movapd (op0, target));
14574 case IX86_BUILTIN_LOADRPD:
14575 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14576 gen_reg_rtx (V2DFmode), 1);
14577 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14580 case IX86_BUILTIN_LOADPD1:
14581 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14582 gen_reg_rtx (V2DFmode), 1);
14583 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14586 case IX86_BUILTIN_STOREPD1:
14587 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14588 case IX86_BUILTIN_STORERPD:
14589 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14591 case IX86_BUILTIN_CLRPD:
14592 target = gen_reg_rtx (V2DFmode);
14593 emit_insn (gen_sse_clrv2df (target));
14596 case IX86_BUILTIN_MFENCE:
14597 emit_insn (gen_sse2_mfence ());
14599 case IX86_BUILTIN_LFENCE:
14600 emit_insn (gen_sse2_lfence ());
14603 case IX86_BUILTIN_CLFLUSH:
14604 arg0 = TREE_VALUE (arglist);
14605 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14606 icode = CODE_FOR_sse2_clflush;
14607 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14608 op0 = copy_to_mode_reg (Pmode, op0);
14610 emit_insn (gen_sse2_clflush (op0));
14613 case IX86_BUILTIN_MOVNTPD:
14614 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14615 case IX86_BUILTIN_MOVNTDQ:
14616 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14617 case IX86_BUILTIN_MOVNTI:
14618 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14620 case IX86_BUILTIN_LOADDQA:
14621 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14622 case IX86_BUILTIN_LOADDQU:
14623 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14624 case IX86_BUILTIN_LOADD:
14625 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14627 case IX86_BUILTIN_STOREDQA:
14628 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14629 case IX86_BUILTIN_STOREDQU:
14630 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14631 case IX86_BUILTIN_STORED:
14632 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14634 case IX86_BUILTIN_MONITOR:
14635 arg0 = TREE_VALUE (arglist);
14636 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14637 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14638 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14639 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14640 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14642 op0 = copy_to_mode_reg (SImode, op0);
14644 op1 = copy_to_mode_reg (SImode, op1);
14646 op2 = copy_to_mode_reg (SImode, op2);
14647 emit_insn (gen_monitor (op0, op1, op2));
14650 case IX86_BUILTIN_MWAIT:
14651 arg0 = TREE_VALUE (arglist);
14652 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14653 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14654 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14656 op0 = copy_to_mode_reg (SImode, op0);
14658 op1 = copy_to_mode_reg (SImode, op1);
14659 emit_insn (gen_mwait (op0, op1));
14662 case IX86_BUILTIN_LOADDDUP:
14663 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14665 case IX86_BUILTIN_LDDQU:
14666 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14673 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14674 if (d->code == fcode)
14676 /* Compares are treated specially. */
14677 if (d->icode == CODE_FOR_maskcmpv4sf3
14678 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14679 || d->icode == CODE_FOR_maskncmpv4sf3
14680 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14681 || d->icode == CODE_FOR_maskcmpv2df3
14682 || d->icode == CODE_FOR_vmmaskcmpv2df3
14683 || d->icode == CODE_FOR_maskncmpv2df3
14684 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14685 return ix86_expand_sse_compare (d, arglist, target);
14687 return ix86_expand_binop_builtin (d->icode, arglist, target);
14690 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14691 if (d->code == fcode)
14692 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14694 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14695 if (d->code == fcode)
14696 return ix86_expand_sse_comi (d, arglist, target);
14698 /* @@@ Should really do something sensible here. */
14702 /* Store OPERAND to the memory after reload is completed. This means
14703 that we can't easily use assign_stack_local. */
14705 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14708 if (!reload_completed)
14710 if (TARGET_RED_ZONE)
14712 result = gen_rtx_MEM (mode,
14713 gen_rtx_PLUS (Pmode,
14715 GEN_INT (-RED_ZONE_SIZE)));
14716 emit_move_insn (result, operand);
14718 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14724 operand = gen_lowpart (DImode, operand);
14728 gen_rtx_SET (VOIDmode,
14729 gen_rtx_MEM (DImode,
14730 gen_rtx_PRE_DEC (DImode,
14731 stack_pointer_rtx)),
14737 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14746 split_di (&operand, 1, operands, operands + 1);
14748 gen_rtx_SET (VOIDmode,
14749 gen_rtx_MEM (SImode,
14750 gen_rtx_PRE_DEC (Pmode,
14751 stack_pointer_rtx)),
14754 gen_rtx_SET (VOIDmode,
14755 gen_rtx_MEM (SImode,
14756 gen_rtx_PRE_DEC (Pmode,
14757 stack_pointer_rtx)),
14762 /* It is better to store HImodes as SImodes. */
14763 if (!TARGET_PARTIAL_REG_STALL)
14764 operand = gen_lowpart (SImode, operand);
14768 gen_rtx_SET (VOIDmode,
14769 gen_rtx_MEM (GET_MODE (operand),
14770 gen_rtx_PRE_DEC (SImode,
14771 stack_pointer_rtx)),
14777 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14782 /* Free operand from the memory. */
14784 ix86_free_from_memory (enum machine_mode mode)
14786 if (!TARGET_RED_ZONE)
14790 if (mode == DImode || TARGET_64BIT)
14792 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14796 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14797 to pop or add instruction if registers are available. */
14798 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14799 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14804 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14805 QImode must go into class Q_REGS.
14806 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14807 movdf to do mem-to-mem moves through integer regs. */
14809 ix86_preferred_reload_class (rtx x, enum reg_class class)
14811 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14813 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14815 /* SSE can't load any constant directly yet. */
14816 if (SSE_CLASS_P (class))
14818 /* Floats can load 0 and 1. */
14819 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14821 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14822 if (MAYBE_SSE_CLASS_P (class))
14823 return (reg_class_subset_p (class, GENERAL_REGS)
14824 ? GENERAL_REGS : FLOAT_REGS);
14828 /* General regs can load everything. */
14829 if (reg_class_subset_p (class, GENERAL_REGS))
14830 return GENERAL_REGS;
14831 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14832 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14835 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14837 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14842 /* If we are copying between general and FP registers, we need a memory
14843 location. The same is true for SSE and MMX registers.
14845 The macro can't work reliably when one of the CLASSES is class containing
14846 registers from multiple units (SSE, MMX, integer). We avoid this by never
14847 combining those units in single alternative in the machine description.
14848 Ensure that this constraint holds to avoid unexpected surprises.
14850 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14851 enforce these sanity checks. */
14853 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14854 enum machine_mode mode, int strict)
14856 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14857 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14858 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14859 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14860 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14861 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14868 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14869 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14870 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14871 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14872 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14874 /* Return the cost of moving data from a register in class CLASS1 to
14875 one in class CLASS2.
14877 It is not required that the cost always equal 2 when FROM is the same as TO;
14878 on some machines it is expensive to move between registers if they are not
14879 general registers. */
14881 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14882 enum reg_class class2)
14884 /* In case we require secondary memory, compute cost of the store followed
14885 by load. In order to avoid bad register allocation choices, we need
14886 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14888 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14892 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14893 MEMORY_MOVE_COST (mode, class1, 1));
14894 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14895 MEMORY_MOVE_COST (mode, class2, 1));
14897 /* In case of copying from general_purpose_register we may emit multiple
14898 stores followed by single load causing memory size mismatch stall.
14899 Count this as arbitrarily high cost of 20. */
14900 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14903 /* In the case of FP/MMX moves, the registers actually overlap, and we
14904 have to switch modes in order to treat them differently. */
14905 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14906 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14912 /* Moves between SSE/MMX and integer unit are expensive. */
14913 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14914 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14915 return ix86_cost->mmxsse_to_integer;
14916 if (MAYBE_FLOAT_CLASS_P (class1))
14917 return ix86_cost->fp_move;
14918 if (MAYBE_SSE_CLASS_P (class1))
14919 return ix86_cost->sse_move;
14920 if (MAYBE_MMX_CLASS_P (class1))
14921 return ix86_cost->mmx_move;
14925 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14927 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14929 /* Flags and only flags can only hold CCmode values. */
14930 if (CC_REGNO_P (regno))
14931 return GET_MODE_CLASS (mode) == MODE_CC;
14932 if (GET_MODE_CLASS (mode) == MODE_CC
14933 || GET_MODE_CLASS (mode) == MODE_RANDOM
14934 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14936 if (FP_REGNO_P (regno))
14937 return VALID_FP_MODE_P (mode);
14938 if (SSE_REGNO_P (regno))
14940 /* HACK! We didn't change all of the constraints for SSE1 for the
14941 scalar modes on the branch. Fortunately, they're not required
14942 for ABI compatibility. */
14943 if (!TARGET_SSE2 && !VECTOR_MODE_P (mode))
14944 return VALID_SSE_REG_MODE (mode);
14946 /* We implement the move patterns for all vector modes into and
14947 out of SSE registers, even when no operation instructions
14949 return (VALID_SSE_REG_MODE (mode)
14950 || VALID_SSE2_REG_MODE (mode)
14951 || VALID_MMX_REG_MODE (mode)
14952 || VALID_MMX_REG_MODE_3DNOW (mode));
14954 if (MMX_REGNO_P (regno))
14956 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14957 so if the register is available at all, then we can move data of
14958 the given mode into or out of it. */
14959 return (VALID_MMX_REG_MODE (mode)
14960 || VALID_MMX_REG_MODE_3DNOW (mode));
14962 /* We handle both integer and floats in the general purpose registers.
14963 In future we should be able to handle vector modes as well. */
14964 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14966 /* Take care for QImode values - they can be in non-QI regs, but then
14967 they do cause partial register stalls. */
14968 if (regno < 4 || mode != QImode || TARGET_64BIT)
14970 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14973 /* Return the cost of moving data of mode M between a
14974 register and memory. A value of 2 is the default; this cost is
14975 relative to those in `REGISTER_MOVE_COST'.
14977 If moving between registers and memory is more expensive than
14978 between two registers, you should define this macro to express the
14981 Model also increased moving costs of QImode registers in non
14985 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14987 if (FLOAT_CLASS_P (class))
15004 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15006 if (SSE_CLASS_P (class))
15009 switch (GET_MODE_SIZE (mode))
15023 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15025 if (MMX_CLASS_P (class))
15028 switch (GET_MODE_SIZE (mode))
15039 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15041 switch (GET_MODE_SIZE (mode))
15045 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15046 : ix86_cost->movzbl_load);
15048 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15049 : ix86_cost->int_store[0] + 4);
15052 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15054 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15055 if (mode == TFmode)
15057 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15058 * (((int) GET_MODE_SIZE (mode)
15059 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15063 /* Compute a (partial) cost for rtx X. Return true if the complete
15064 cost has been computed, and false if subexpressions should be
15065 scanned. In either case, *TOTAL contains the cost result. */
15068 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15070 enum machine_mode mode = GET_MODE (x);
15078 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15080 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15082 else if (flag_pic && SYMBOLIC_CONST (x)
15084 || (!GET_CODE (x) != LABEL_REF
15085 && (GET_CODE (x) != SYMBOL_REF
15086 || !SYMBOL_REF_LOCAL_P (x)))))
15093 if (mode == VOIDmode)
15096 switch (standard_80387_constant_p (x))
15101 default: /* Other constants */
15106 /* Start with (MEM (SYMBOL_REF)), since that's where
15107 it'll probably end up. Add a penalty for size. */
15108 *total = (COSTS_N_INSNS (1)
15109 + (flag_pic != 0 && !TARGET_64BIT)
15110 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15116 /* The zero extensions is often completely free on x86_64, so make
15117 it as cheap as possible. */
15118 if (TARGET_64BIT && mode == DImode
15119 && GET_MODE (XEXP (x, 0)) == SImode)
15121 else if (TARGET_ZERO_EXTEND_WITH_AND)
15122 *total = COSTS_N_INSNS (ix86_cost->add);
15124 *total = COSTS_N_INSNS (ix86_cost->movzx);
15128 *total = COSTS_N_INSNS (ix86_cost->movsx);
15132 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15133 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15135 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15138 *total = COSTS_N_INSNS (ix86_cost->add);
15141 if ((value == 2 || value == 3)
15142 && !TARGET_DECOMPOSE_LEA
15143 && ix86_cost->lea <= ix86_cost->shift_const)
15145 *total = COSTS_N_INSNS (ix86_cost->lea);
15155 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15157 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15159 if (INTVAL (XEXP (x, 1)) > 32)
15160 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15162 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15166 if (GET_CODE (XEXP (x, 1)) == AND)
15167 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15169 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15174 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15175 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15177 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15182 if (FLOAT_MODE_P (mode))
15183 *total = COSTS_N_INSNS (ix86_cost->fmul);
15184 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15186 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15189 for (nbits = 0; value != 0; value >>= 1)
15192 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15193 + nbits * ix86_cost->mult_bit);
15197 /* This is arbitrary */
15198 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15199 + 7 * ix86_cost->mult_bit);
15207 if (FLOAT_MODE_P (mode))
15208 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15210 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15214 if (FLOAT_MODE_P (mode))
15215 *total = COSTS_N_INSNS (ix86_cost->fadd);
15216 else if (!TARGET_DECOMPOSE_LEA
15217 && GET_MODE_CLASS (mode) == MODE_INT
15218 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15220 if (GET_CODE (XEXP (x, 0)) == PLUS
15221 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15222 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15223 && CONSTANT_P (XEXP (x, 1)))
15225 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15226 if (val == 2 || val == 4 || val == 8)
15228 *total = COSTS_N_INSNS (ix86_cost->lea);
15229 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15230 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15232 *total += rtx_cost (XEXP (x, 1), outer_code);
15236 else if (GET_CODE (XEXP (x, 0)) == MULT
15237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15239 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15240 if (val == 2 || val == 4 || val == 8)
15242 *total = COSTS_N_INSNS (ix86_cost->lea);
15243 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15244 *total += rtx_cost (XEXP (x, 1), outer_code);
15248 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15250 *total = COSTS_N_INSNS (ix86_cost->lea);
15251 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15252 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15253 *total += rtx_cost (XEXP (x, 1), outer_code);
15260 if (FLOAT_MODE_P (mode))
15262 *total = COSTS_N_INSNS (ix86_cost->fadd);
15270 if (!TARGET_64BIT && mode == DImode)
15272 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15273 + (rtx_cost (XEXP (x, 0), outer_code)
15274 << (GET_MODE (XEXP (x, 0)) != DImode))
15275 + (rtx_cost (XEXP (x, 1), outer_code)
15276 << (GET_MODE (XEXP (x, 1)) != DImode)));
15282 if (FLOAT_MODE_P (mode))
15284 *total = COSTS_N_INSNS (ix86_cost->fchs);
15290 if (!TARGET_64BIT && mode == DImode)
15291 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15293 *total = COSTS_N_INSNS (ix86_cost->add);
15297 if (!TARGET_SSE_MATH
15299 || (mode == DFmode && !TARGET_SSE2))
15304 if (FLOAT_MODE_P (mode))
15305 *total = COSTS_N_INSNS (ix86_cost->fabs);
15309 if (FLOAT_MODE_P (mode))
15310 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15314 if (XINT (x, 1) == UNSPEC_TP)
15323 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15325 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15328 fputs ("\tpushl $", asm_out_file);
15329 assemble_name (asm_out_file, XSTR (symbol, 0));
15330 fputc ('\n', asm_out_file);
15336 static int current_machopic_label_num;
15338 /* Given a symbol name and its associated stub, write out the
15339 definition of the stub. */
15342 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15344 unsigned int length;
15345 char *binder_name, *symbol_name, lazy_ptr_name[32];
15346 int label = ++current_machopic_label_num;
15348 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15349 symb = (*targetm.strip_name_encoding) (symb);
15351 length = strlen (stub);
15352 binder_name = alloca (length + 32);
15353 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15355 length = strlen (symb);
15356 symbol_name = alloca (length + 32);
15357 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15359 sprintf (lazy_ptr_name, "L%d$lz", label);
15362 machopic_picsymbol_stub_section ();
15364 machopic_symbol_stub_section ();
15366 fprintf (file, "%s:\n", stub);
15367 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15371 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15372 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15373 fprintf (file, "\tjmp %%edx\n");
15376 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15378 fprintf (file, "%s:\n", binder_name);
15382 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15383 fprintf (file, "\tpushl %%eax\n");
15386 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15388 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15390 machopic_lazy_symbol_ptr_section ();
15391 fprintf (file, "%s:\n", lazy_ptr_name);
15392 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15393 fprintf (file, "\t.long %s\n", binder_name);
15395 #endif /* TARGET_MACHO */
15397 /* Order the registers for register allocator. */
15400 x86_order_regs_for_local_alloc (void)
15405 /* First allocate the local general purpose registers. */
15406 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15407 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15408 reg_alloc_order [pos++] = i;
15410 /* Global general purpose registers. */
15411 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15412 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15413 reg_alloc_order [pos++] = i;
15415 /* x87 registers come first in case we are doing FP math
15417 if (!TARGET_SSE_MATH)
15418 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15419 reg_alloc_order [pos++] = i;
15421 /* SSE registers. */
15422 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15423 reg_alloc_order [pos++] = i;
15424 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15425 reg_alloc_order [pos++] = i;
15427 /* x87 registers. */
15428 if (TARGET_SSE_MATH)
15429 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15430 reg_alloc_order [pos++] = i;
15432 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15433 reg_alloc_order [pos++] = i;
15435 /* Initialize the rest of array as we do not allocate some registers
15437 while (pos < FIRST_PSEUDO_REGISTER)
15438 reg_alloc_order [pos++] = 0;
15441 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15442 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15445 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15446 struct attribute_spec.handler. */
15448 ix86_handle_struct_attribute (tree *node, tree name,
15449 tree args ATTRIBUTE_UNUSED,
15450 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15453 if (DECL_P (*node))
15455 if (TREE_CODE (*node) == TYPE_DECL)
15456 type = &TREE_TYPE (*node);
15461 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15462 || TREE_CODE (*type) == UNION_TYPE)))
15464 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15465 *no_add_attrs = true;
15468 else if ((is_attribute_p ("ms_struct", name)
15469 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15470 || ((is_attribute_p ("gcc_struct", name)
15471 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15473 warning ("`%s' incompatible attribute ignored",
15474 IDENTIFIER_POINTER (name));
15475 *no_add_attrs = true;
15482 ix86_ms_bitfield_layout_p (tree record_type)
15484 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15485 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15486 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15489 /* Returns an expression indicating where the this parameter is
15490 located on entry to the FUNCTION. */
15493 x86_this_parameter (tree function)
15495 tree type = TREE_TYPE (function);
15499 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15500 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15503 if (ix86_function_regparm (type, function) > 0)
15507 parm = TYPE_ARG_TYPES (type);
15508 /* Figure out whether or not the function has a variable number of
15510 for (; parm; parm = TREE_CHAIN (parm))
15511 if (TREE_VALUE (parm) == void_type_node)
15513 /* If not, the this parameter is in the first argument. */
15517 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15519 return gen_rtx_REG (SImode, regno);
15523 if (aggregate_value_p (TREE_TYPE (type), type))
15524 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15526 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15529 /* Determine whether x86_output_mi_thunk can succeed. */
15532 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15533 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15534 HOST_WIDE_INT vcall_offset, tree function)
15536 /* 64-bit can handle anything. */
15540 /* For 32-bit, everything's fine if we have one free register. */
15541 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15544 /* Need a free register for vcall_offset. */
15548 /* Need a free register for GOT references. */
15549 if (flag_pic && !(*targetm.binds_local_p) (function))
15552 /* Otherwise ok. */
15556 /* Output the assembler code for a thunk function. THUNK_DECL is the
15557 declaration for the thunk function itself, FUNCTION is the decl for
15558 the target function. DELTA is an immediate constant offset to be
15559 added to THIS. If VCALL_OFFSET is nonzero, the word at
15560 *(*this + vcall_offset) should be added to THIS. */
15563 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15564 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15565 HOST_WIDE_INT vcall_offset, tree function)
15568 rtx this = x86_this_parameter (function);
15571 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15572 pull it in now and let DELTA benefit. */
15575 else if (vcall_offset)
15577 /* Put the this parameter into %eax. */
15579 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15580 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15583 this_reg = NULL_RTX;
15585 /* Adjust the this parameter by a fixed constant. */
15588 xops[0] = GEN_INT (delta);
15589 xops[1] = this_reg ? this_reg : this;
15592 if (!x86_64_general_operand (xops[0], DImode))
15594 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15596 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15600 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15603 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15606 /* Adjust the this parameter by a value stored in the vtable. */
15610 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15613 int tmp_regno = 2 /* ECX */;
15614 if (lookup_attribute ("fastcall",
15615 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15616 tmp_regno = 0 /* EAX */;
15617 tmp = gen_rtx_REG (SImode, tmp_regno);
15620 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15623 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15625 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15627 /* Adjust the this parameter. */
15628 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15629 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15631 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15632 xops[0] = GEN_INT (vcall_offset);
15634 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15635 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15637 xops[1] = this_reg;
15639 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15641 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15644 /* If necessary, drop THIS back to its stack slot. */
15645 if (this_reg && this_reg != this)
15647 xops[0] = this_reg;
15649 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15652 xops[0] = XEXP (DECL_RTL (function), 0);
15655 if (!flag_pic || (*targetm.binds_local_p) (function))
15656 output_asm_insn ("jmp\t%P0", xops);
15659 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15660 tmp = gen_rtx_CONST (Pmode, tmp);
15661 tmp = gen_rtx_MEM (QImode, tmp);
15663 output_asm_insn ("jmp\t%A0", xops);
15668 if (!flag_pic || (*targetm.binds_local_p) (function))
15669 output_asm_insn ("jmp\t%P0", xops);
15674 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15675 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15676 tmp = gen_rtx_MEM (QImode, tmp);
15678 output_asm_insn ("jmp\t%0", xops);
15681 #endif /* TARGET_MACHO */
15683 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15684 output_set_got (tmp);
15687 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15688 output_asm_insn ("jmp\t{*}%1", xops);
15694 x86_file_start (void)
15696 default_file_start ();
15697 if (X86_FILE_START_VERSION_DIRECTIVE)
15698 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15699 if (X86_FILE_START_FLTUSED)
15700 fputs ("\t.global\t__fltused\n", asm_out_file);
15701 if (ix86_asm_dialect == ASM_INTEL)
15702 fputs ("\t.intel_syntax\n", asm_out_file);
15706 x86_field_alignment (tree field, int computed)
15708 enum machine_mode mode;
15709 tree type = TREE_TYPE (field);
15711 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15713 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15714 ? get_inner_array_type (type) : type);
15715 if (mode == DFmode || mode == DCmode
15716 || GET_MODE_CLASS (mode) == MODE_INT
15717 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15718 return MIN (32, computed);
15722 /* Output assembler code to FILE to increment profiler label # LABELNO
15723 for profiling a function entry. */
15725 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15730 #ifndef NO_PROFILE_COUNTERS
15731 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15733 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15737 #ifndef NO_PROFILE_COUNTERS
15738 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15740 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15744 #ifndef NO_PROFILE_COUNTERS
15745 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15746 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15748 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15752 #ifndef NO_PROFILE_COUNTERS
15753 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15754 PROFILE_COUNT_REGISTER);
15756 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15760 /* We don't have exact information about the insn sizes, but we may assume
15761 quite safely that we are informed about all 1 byte insns and memory
15762 address sizes. This is enough to eliminate unnecessary padding in
15766 min_insn_size (rtx insn)
15770 if (!INSN_P (insn) || !active_insn_p (insn))
15773 /* Discard alignments we've emit and jump instructions. */
15774 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15775 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15777 if (GET_CODE (insn) == JUMP_INSN
15778 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15779 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15782 /* Important case - calls are always 5 bytes.
15783 It is common to have many calls in the row. */
15784 if (GET_CODE (insn) == CALL_INSN
15785 && symbolic_reference_mentioned_p (PATTERN (insn))
15786 && !SIBLING_CALL_P (insn))
15788 if (get_attr_length (insn) <= 1)
15791 /* For normal instructions we may rely on the sizes of addresses
15792 and the presence of symbol to require 4 bytes of encoding.
15793 This is not the case for jumps where references are PC relative. */
15794 if (GET_CODE (insn) != JUMP_INSN)
15796 l = get_attr_length_address (insn);
15797 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15806 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15810 k8_avoid_jump_misspredicts (void)
15812 rtx insn, start = get_insns ();
15813 int nbytes = 0, njumps = 0;
15816 /* Look for all minimal intervals of instructions containing 4 jumps.
15817 The intervals are bounded by START and INSN. NBYTES is the total
15818 size of instructions in the interval including INSN and not including
15819 START. When the NBYTES is smaller than 16 bytes, it is possible
15820 that the end of START and INSN ends up in the same 16byte page.
15822 The smallest offset in the page INSN can start is the case where START
15823 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15824 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15826 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15829 nbytes += min_insn_size (insn);
15831 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15832 INSN_UID (insn), min_insn_size (insn));
15833 if ((GET_CODE (insn) == JUMP_INSN
15834 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15835 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15836 || GET_CODE (insn) == CALL_INSN)
15843 start = NEXT_INSN (start);
15844 if ((GET_CODE (start) == JUMP_INSN
15845 && GET_CODE (PATTERN (start)) != ADDR_VEC
15846 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15847 || GET_CODE (start) == CALL_INSN)
15848 njumps--, isjump = 1;
15851 nbytes -= min_insn_size (start);
15856 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15857 INSN_UID (start), INSN_UID (insn), nbytes);
15859 if (njumps == 3 && isjump && nbytes < 16)
15861 int padsize = 15 - nbytes + min_insn_size (insn);
15864 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15865 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15870 /* Implement machine specific optimizations.
15871 At the moment we implement single transformation: AMD Athlon works faster
15872 when RET is not destination of conditional jump or directly preceded
15873 by other jump instruction. We avoid the penalty by inserting NOP just
15874 before the RET instructions in such cases. */
15880 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15882 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15884 basic_block bb = e->src;
15885 rtx ret = BB_END (bb);
15887 bool replace = false;
15889 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15890 || !maybe_hot_bb_p (bb))
15892 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15893 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15895 if (prev && GET_CODE (prev) == CODE_LABEL)
15898 for (e = bb->pred; e; e = e->pred_next)
15899 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15900 && !(e->flags & EDGE_FALLTHRU))
15905 prev = prev_active_insn (ret);
15907 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15908 || GET_CODE (prev) == CALL_INSN))
15910 /* Empty functions get branch mispredict even when the jump destination
15911 is not visible to us. */
15912 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15917 emit_insn_before (gen_return_internal_long (), ret);
15921 k8_avoid_jump_misspredicts ();
15924 /* Return nonzero when QImode register that must be represented via REX prefix
15927 x86_extended_QIreg_mentioned_p (rtx insn)
15930 extract_insn_cached (insn);
15931 for (i = 0; i < recog_data.n_operands; i++)
15932 if (REG_P (recog_data.operand[i])
15933 && REGNO (recog_data.operand[i]) >= 4)
15938 /* Return nonzero when P points to register encoded via REX prefix.
15939 Called via for_each_rtx. */
15941 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15943 unsigned int regno;
15946 regno = REGNO (*p);
15947 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15950 /* Return true when INSN mentions register that must be encoded using REX
15953 x86_extended_reg_mentioned_p (rtx insn)
15955 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15958 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15959 optabs would emit if we didn't have TFmode patterns. */
15962 x86_emit_floatuns (rtx operands[2])
15964 rtx neglab, donelab, i0, i1, f0, in, out;
15965 enum machine_mode mode, inmode;
15967 inmode = GET_MODE (operands[1]);
15968 if (inmode != SImode
15969 && inmode != DImode)
15973 in = force_reg (inmode, operands[1]);
15974 mode = GET_MODE (out);
15975 neglab = gen_label_rtx ();
15976 donelab = gen_label_rtx ();
15977 i1 = gen_reg_rtx (Pmode);
15978 f0 = gen_reg_rtx (mode);
15980 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15982 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15983 emit_jump_insn (gen_jump (donelab));
15986 emit_label (neglab);
15988 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15989 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15990 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15991 expand_float (f0, i0, 0);
15992 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15994 emit_label (donelab);
15997 /* Return if we do not know how to pass TYPE solely in registers. */
15999 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
16001 if (default_must_pass_in_stack (mode, type))
16003 return (!TARGET_64BIT && type && mode == TImode);
16006 /* Initialize vector TARGET via VALS. */
16008 ix86_expand_vector_init (rtx target, rtx vals)
16010 enum machine_mode mode = GET_MODE (target);
16011 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16012 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
16015 for (i = n_elts - 1; i >= 0; i--)
16016 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
16017 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
16020 /* Few special cases first...
16021 ... constants are best loaded from constant pool. */
16024 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16028 /* ... values where only first field is non-constant are best loaded
16029 from the pool and overwriten via move later. */
16032 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
16033 GET_MODE_INNER (mode), 0);
16035 op = force_reg (mode, op);
16036 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
16037 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16038 switch (GET_MODE (target))
16041 emit_insn (gen_sse2_movsd (target, target, op));
16044 emit_insn (gen_sse_movss (target, target, op));
16052 /* And the busy sequence doing rotations. */
16053 switch (GET_MODE (target))
16058 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
16060 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
16062 vecop0 = force_reg (V2DFmode, vecop0);
16063 vecop1 = force_reg (V2DFmode, vecop1);
16064 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16070 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16072 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16074 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16076 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16077 rtx tmp1 = gen_reg_rtx (V4SFmode);
16078 rtx tmp2 = gen_reg_rtx (V4SFmode);
16080 vecop0 = force_reg (V4SFmode, vecop0);
16081 vecop1 = force_reg (V4SFmode, vecop1);
16082 vecop2 = force_reg (V4SFmode, vecop2);
16083 vecop3 = force_reg (V4SFmode, vecop3);
16084 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16085 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16086 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16094 #include "gt-i386.h"