1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
840 rtx base, index, disp;
842 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
845 static int ix86_decompose_address (rtx, struct ix86_address *);
846 static int ix86_address_cost (rtx);
847 static bool ix86_cannot_force_const_mem (rtx);
848 static rtx ix86_delegitimize_address (rtx);
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi (const struct builtin_description *,
853 static rtx ix86_expand_sse_compare (const struct builtin_description *,
855 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
857 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_store_builtin (enum insn_code, tree);
859 static rtx safe_vector_operand (rtx, enum machine_mode);
860 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
861 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
862 enum rtx_code *, enum rtx_code *);
863 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
867 static int ix86_fp_comparison_cost (enum rtx_code code);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame *);
871 static int ix86_comp_type_attributes (tree, tree);
872 static int ix86_function_regparm (tree, tree);
873 const struct attribute_spec ix86_attribute_table[];
874 static bool ix86_function_ok_for_sibcall (tree, tree);
875 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
876 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
877 static int ix86_value_regno (enum machine_mode);
878 static bool contains_128bit_aligned_vector_p (tree);
879 static bool ix86_ms_bitfield_layout_p (tree);
880 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
881 static int extended_reg_mentioned_1 (rtx *, void *);
882 static bool ix86_rtx_costs (rtx, int, int, int *);
883 static int min_insn_size (rtx);
884 static void k8_avoid_jump_misspredicts (void);
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx, int);
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES], int);
917 static int examine_argument (enum machine_mode, tree, int, int *, int *);
918 static rtx construct_container (enum machine_mode, tree, int, int, int,
920 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
921 enum x86_64_reg_class);
923 /* Table of constants used by fldpi, fldln2, etc.... */
924 static REAL_VALUE_TYPE ext_80387_constants_table [5];
925 static bool ext_80387_constants_init = 0;
926 static void init_ext_80387_constants (void);
928 /* Initialize the GCC target structure. */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 # undef TARGET_MERGE_DECL_ATTRIBUTES
933 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981 ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984 ia32_multipass_dfa_lookahead
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1026 struct gcc_target targetm = TARGET_INITIALIZER;
1028 /* The svr4 ABI for the i386 says that records and unions are returned
1030 #ifndef DEFAULT_PCC_STRUCT_RETURN
1031 #define DEFAULT_PCC_STRUCT_RETURN 1
1034 /* Sometimes certain combinations of command options do not make
1035 sense on a particular target machine. You can define a macro
1036 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1037 defined, is executed once just after all the command options have
1040 Don't use this macro to turn on various extra optimizations for
1041 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1044 override_options (void)
1047 /* Comes from final.c -- no real reason to change it. */
1048 #define MAX_CODE_ALIGN 16
1052 const struct processor_costs *cost; /* Processor costs */
1053 const int target_enable; /* Target flags to enable. */
1054 const int target_disable; /* Target flags to disable. */
1055 const int align_loop; /* Default alignments. */
1056 const int align_loop_max_skip;
1057 const int align_jump;
1058 const int align_jump_max_skip;
1059 const int align_func;
1061 const processor_target_table[PROCESSOR_max] =
1063 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1064 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1065 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1066 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1067 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1068 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1069 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1070 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1073 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1076 const char *const name; /* processor name or nickname. */
1077 const enum processor_type processor;
1078 const enum pta_flags
1084 PTA_PREFETCH_SSE = 16,
1090 const processor_alias_table[] =
1092 {"i386", PROCESSOR_I386, 0},
1093 {"i486", PROCESSOR_I486, 0},
1094 {"i586", PROCESSOR_PENTIUM, 0},
1095 {"pentium", PROCESSOR_PENTIUM, 0},
1096 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1097 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1098 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1099 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1100 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1101 {"i686", PROCESSOR_PENTIUMPRO, 0},
1102 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1103 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1104 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1105 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1106 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1107 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1108 | PTA_MMX | PTA_PREFETCH_SSE},
1109 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1110 | PTA_MMX | PTA_PREFETCH_SSE},
1111 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1112 | PTA_MMX | PTA_PREFETCH_SSE},
1113 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1114 | PTA_MMX | PTA_PREFETCH_SSE},
1115 {"k6", PROCESSOR_K6, PTA_MMX},
1116 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1117 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1120 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1121 | PTA_3DNOW | PTA_3DNOW_A},
1122 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1123 | PTA_3DNOW_A | PTA_SSE},
1124 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1125 | PTA_3DNOW_A | PTA_SSE},
1126 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127 | PTA_3DNOW_A | PTA_SSE},
1128 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1129 | PTA_SSE | PTA_SSE2 },
1130 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1131 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1132 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1133 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1134 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1135 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1137 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1140 int const pta_size = ARRAY_SIZE (processor_alias_table);
1142 /* Set the default values for switches whose default depends on TARGET_64BIT
1143 in case they weren't overwritten by command line options. */
1146 if (flag_omit_frame_pointer == 2)
1147 flag_omit_frame_pointer = 1;
1148 if (flag_asynchronous_unwind_tables == 2)
1149 flag_asynchronous_unwind_tables = 1;
1150 if (flag_pcc_struct_return == 2)
1151 flag_pcc_struct_return = 0;
1155 if (flag_omit_frame_pointer == 2)
1156 flag_omit_frame_pointer = 0;
1157 if (flag_asynchronous_unwind_tables == 2)
1158 flag_asynchronous_unwind_tables = 0;
1159 if (flag_pcc_struct_return == 2)
1160 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1163 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1164 SUBTARGET_OVERRIDE_OPTIONS;
1167 if (!ix86_tune_string && ix86_arch_string)
1168 ix86_tune_string = ix86_arch_string;
1169 if (!ix86_tune_string)
1170 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1171 if (!ix86_arch_string)
1172 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1174 if (ix86_cmodel_string != 0)
1176 if (!strcmp (ix86_cmodel_string, "small"))
1177 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1179 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1180 else if (!strcmp (ix86_cmodel_string, "32"))
1181 ix86_cmodel = CM_32;
1182 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1183 ix86_cmodel = CM_KERNEL;
1184 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1185 ix86_cmodel = CM_MEDIUM;
1186 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1187 ix86_cmodel = CM_LARGE;
1189 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1193 ix86_cmodel = CM_32;
1195 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1197 if (ix86_asm_string != 0)
1199 if (!strcmp (ix86_asm_string, "intel"))
1200 ix86_asm_dialect = ASM_INTEL;
1201 else if (!strcmp (ix86_asm_string, "att"))
1202 ix86_asm_dialect = ASM_ATT;
1204 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1206 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1207 error ("code model `%s' not supported in the %s bit mode",
1208 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1209 if (ix86_cmodel == CM_LARGE)
1210 sorry ("code model `large' not supported yet");
1211 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1212 sorry ("%i-bit mode not compiled in",
1213 (target_flags & MASK_64BIT) ? 64 : 32);
1215 for (i = 0; i < pta_size; i++)
1216 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1218 ix86_arch = processor_alias_table[i].processor;
1219 /* Default cpu tuning to the architecture. */
1220 ix86_tune = ix86_arch;
1221 if (processor_alias_table[i].flags & PTA_MMX
1222 && !(target_flags_explicit & MASK_MMX))
1223 target_flags |= MASK_MMX;
1224 if (processor_alias_table[i].flags & PTA_3DNOW
1225 && !(target_flags_explicit & MASK_3DNOW))
1226 target_flags |= MASK_3DNOW;
1227 if (processor_alias_table[i].flags & PTA_3DNOW_A
1228 && !(target_flags_explicit & MASK_3DNOW_A))
1229 target_flags |= MASK_3DNOW_A;
1230 if (processor_alias_table[i].flags & PTA_SSE
1231 && !(target_flags_explicit & MASK_SSE))
1232 target_flags |= MASK_SSE;
1233 if (processor_alias_table[i].flags & PTA_SSE2
1234 && !(target_flags_explicit & MASK_SSE2))
1235 target_flags |= MASK_SSE2;
1236 if (processor_alias_table[i].flags & PTA_SSE3
1237 && !(target_flags_explicit & MASK_SSE3))
1238 target_flags |= MASK_SSE3;
1239 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1240 x86_prefetch_sse = true;
1241 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1242 error ("CPU you selected does not support x86-64 instruction set");
1247 error ("bad value (%s) for -march= switch", ix86_arch_string);
1249 for (i = 0; i < pta_size; i++)
1250 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1252 ix86_tune = processor_alias_table[i].processor;
1253 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1254 error ("CPU you selected does not support x86-64 instruction set");
1256 /* Intel CPUs have always interpreted SSE prefetch instructions as
1257 NOPs; so, we can enable SSE prefetch instructions even when
1258 -mtune (rather than -march) points us to a processor that has them.
1259 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1260 higher processors. */
1261 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1262 x86_prefetch_sse = true;
1266 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1269 ix86_cost = &size_cost;
1271 ix86_cost = processor_target_table[ix86_tune].cost;
1272 target_flags |= processor_target_table[ix86_tune].target_enable;
1273 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1275 /* Arrange to set up i386_stack_locals for all functions. */
1276 init_machine_status = ix86_init_machine_status;
1278 /* Validate -mregparm= value. */
1279 if (ix86_regparm_string)
1281 i = atoi (ix86_regparm_string);
1282 if (i < 0 || i > REGPARM_MAX)
1283 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1289 ix86_regparm = REGPARM_MAX;
1291 /* If the user has provided any of the -malign-* options,
1292 warn and use that value only if -falign-* is not set.
1293 Remove this code in GCC 3.2 or later. */
1294 if (ix86_align_loops_string)
1296 warning ("-malign-loops is obsolete, use -falign-loops");
1297 if (align_loops == 0)
1299 i = atoi (ix86_align_loops_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1303 align_loops = 1 << i;
1307 if (ix86_align_jumps_string)
1309 warning ("-malign-jumps is obsolete, use -falign-jumps");
1310 if (align_jumps == 0)
1312 i = atoi (ix86_align_jumps_string);
1313 if (i < 0 || i > MAX_CODE_ALIGN)
1314 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1316 align_jumps = 1 << i;
1320 if (ix86_align_funcs_string)
1322 warning ("-malign-functions is obsolete, use -falign-functions");
1323 if (align_functions == 0)
1325 i = atoi (ix86_align_funcs_string);
1326 if (i < 0 || i > MAX_CODE_ALIGN)
1327 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1329 align_functions = 1 << i;
1333 /* Default align_* from the processor table. */
1334 if (align_loops == 0)
1336 align_loops = processor_target_table[ix86_tune].align_loop;
1337 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1339 if (align_jumps == 0)
1341 align_jumps = processor_target_table[ix86_tune].align_jump;
1342 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1344 if (align_functions == 0)
1346 align_functions = processor_target_table[ix86_tune].align_func;
1349 /* Validate -mpreferred-stack-boundary= value, or provide default.
1350 The default of 128 bits is for Pentium III's SSE __m128, but we
1351 don't want additional code to keep the stack aligned when
1352 optimizing for code size. */
1353 ix86_preferred_stack_boundary = (optimize_size
1354 ? TARGET_64BIT ? 128 : 32
1356 if (ix86_preferred_stack_boundary_string)
1358 i = atoi (ix86_preferred_stack_boundary_string);
1359 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1360 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1361 TARGET_64BIT ? 4 : 2);
1363 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1366 /* Validate -mbranch-cost= value, or provide default. */
1367 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1368 if (ix86_branch_cost_string)
1370 i = atoi (ix86_branch_cost_string);
1372 error ("-mbranch-cost=%d is not between 0 and 5", i);
1374 ix86_branch_cost = i;
1377 if (ix86_tls_dialect_string)
1379 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1380 ix86_tls_dialect = TLS_DIALECT_GNU;
1381 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1382 ix86_tls_dialect = TLS_DIALECT_SUN;
1384 error ("bad value (%s) for -mtls-dialect= switch",
1385 ix86_tls_dialect_string);
1388 /* Keep nonleaf frame pointers. */
1389 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1390 flag_omit_frame_pointer = 1;
1392 /* If we're doing fast math, we don't care about comparison order
1393 wrt NaNs. This lets us use a shorter comparison sequence. */
1394 if (flag_unsafe_math_optimizations)
1395 target_flags &= ~MASK_IEEE_FP;
1397 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1398 since the insns won't need emulation. */
1399 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1400 target_flags &= ~MASK_NO_FANCY_MATH_387;
1402 /* Turn on SSE2 builtins for -msse3. */
1404 target_flags |= MASK_SSE2;
1406 /* Turn on SSE builtins for -msse2. */
1408 target_flags |= MASK_SSE;
1412 if (TARGET_ALIGN_DOUBLE)
1413 error ("-malign-double makes no sense in the 64bit mode");
1415 error ("-mrtd calling convention not supported in the 64bit mode");
1416 /* Enable by default the SSE and MMX builtins. */
1417 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1418 ix86_fpmath = FPMATH_SSE;
1422 ix86_fpmath = FPMATH_387;
1423 /* i386 ABI does not specify red zone. It still makes sense to use it
1424 when programmer takes care to stack from being destroyed. */
1425 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1426 target_flags |= MASK_NO_RED_ZONE;
1429 if (ix86_fpmath_string != 0)
1431 if (! strcmp (ix86_fpmath_string, "387"))
1432 ix86_fpmath = FPMATH_387;
1433 else if (! strcmp (ix86_fpmath_string, "sse"))
1437 warning ("SSE instruction set disabled, using 387 arithmetics");
1438 ix86_fpmath = FPMATH_387;
1441 ix86_fpmath = FPMATH_SSE;
1443 else if (! strcmp (ix86_fpmath_string, "387,sse")
1444 || ! strcmp (ix86_fpmath_string, "sse,387"))
1448 warning ("SSE instruction set disabled, using 387 arithmetics");
1449 ix86_fpmath = FPMATH_387;
1451 else if (!TARGET_80387)
1453 warning ("387 instruction set disabled, using SSE arithmetics");
1454 ix86_fpmath = FPMATH_SSE;
1457 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1460 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1463 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1467 target_flags |= MASK_MMX;
1468 x86_prefetch_sse = true;
1471 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1474 target_flags |= MASK_MMX;
1475 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1476 extensions it adds. */
1477 if (x86_3dnow_a & (1 << ix86_arch))
1478 target_flags |= MASK_3DNOW_A;
1480 if ((x86_accumulate_outgoing_args & TUNEMASK)
1481 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1483 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1485 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1488 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1489 p = strchr (internal_label_prefix, 'X');
1490 internal_label_prefix_len = p - internal_label_prefix;
1496 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1498 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1499 make the problem with not enough registers even worse. */
1500 #ifdef INSN_SCHEDULING
1502 flag_schedule_insns = 0;
1505 /* The default values of these switches depend on the TARGET_64BIT
1506 that is not known at this moment. Mark these values with 2 and
1507 let user the to override these. In case there is no command line option
1508 specifying them, we will set the defaults in override_options. */
1510 flag_omit_frame_pointer = 2;
1511 flag_pcc_struct_return = 2;
1512 flag_asynchronous_unwind_tables = 2;
1515 /* Table of valid machine attributes. */
1516 const struct attribute_spec ix86_attribute_table[] =
1518 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1519 /* Stdcall attribute says callee is responsible for popping arguments
1520 if they are not variable. */
1521 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1522 /* Fastcall attribute says callee is responsible for popping arguments
1523 if they are not variable. */
1524 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1525 /* Cdecl attribute says the callee is a normal C declaration */
1526 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1527 /* Regparm attribute specifies how many integer arguments are to be
1528 passed in registers. */
1529 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1530 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1531 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1532 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1533 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1535 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1536 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1537 { NULL, 0, 0, false, false, false, NULL }
1540 /* Decide whether we can make a sibling call to a function. DECL is the
1541 declaration of the function being targeted by the call and EXP is the
1542 CALL_EXPR representing the call. */
1545 ix86_function_ok_for_sibcall (tree decl, tree exp)
1547 /* If we are generating position-independent code, we cannot sibcall
1548 optimize any indirect call, or a direct call to a global function,
1549 as the PLT requires %ebx be live. */
1550 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1553 /* If we are returning floats on the 80387 register stack, we cannot
1554 make a sibcall from a function that doesn't return a float to a
1555 function that does or, conversely, from a function that does return
1556 a float to a function that doesn't; the necessary stack adjustment
1557 would not be executed. */
1558 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1559 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1562 /* If this call is indirect, we'll need to be able to use a call-clobbered
1563 register for the address of the target function. Make sure that all
1564 such registers are not used for passing parameters. */
1565 if (!decl && !TARGET_64BIT)
1569 /* We're looking at the CALL_EXPR, we need the type of the function. */
1570 type = TREE_OPERAND (exp, 0); /* pointer expression */
1571 type = TREE_TYPE (type); /* pointer type */
1572 type = TREE_TYPE (type); /* function type */
1574 if (ix86_function_regparm (type, NULL) >= 3)
1576 /* ??? Need to count the actual number of registers to be used,
1577 not the possible number of registers. Fix later. */
1582 /* Otherwise okay. That also includes certain types of indirect calls. */
1586 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1587 arguments as in struct attribute_spec.handler. */
1589 ix86_handle_cdecl_attribute (tree *node, tree name,
1590 tree args ATTRIBUTE_UNUSED,
1591 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1593 if (TREE_CODE (*node) != FUNCTION_TYPE
1594 && TREE_CODE (*node) != METHOD_TYPE
1595 && TREE_CODE (*node) != FIELD_DECL
1596 && TREE_CODE (*node) != TYPE_DECL)
1598 warning ("`%s' attribute only applies to functions",
1599 IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1604 if (is_attribute_p ("fastcall", name))
1606 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1608 error ("fastcall and stdcall attributes are not compatible");
1610 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1612 error ("fastcall and regparm attributes are not compatible");
1615 else if (is_attribute_p ("stdcall", name))
1617 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1619 error ("fastcall and stdcall attributes are not compatible");
1626 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1627 *no_add_attrs = true;
1633 /* Handle a "regparm" attribute;
1634 arguments as in struct attribute_spec.handler. */
1636 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1637 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1639 if (TREE_CODE (*node) != FUNCTION_TYPE
1640 && TREE_CODE (*node) != METHOD_TYPE
1641 && TREE_CODE (*node) != FIELD_DECL
1642 && TREE_CODE (*node) != TYPE_DECL)
1644 warning ("`%s' attribute only applies to functions",
1645 IDENTIFIER_POINTER (name));
1646 *no_add_attrs = true;
1652 cst = TREE_VALUE (args);
1653 if (TREE_CODE (cst) != INTEGER_CST)
1655 warning ("`%s' attribute requires an integer constant argument",
1656 IDENTIFIER_POINTER (name));
1657 *no_add_attrs = true;
1659 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1661 warning ("argument to `%s' attribute larger than %d",
1662 IDENTIFIER_POINTER (name), REGPARM_MAX);
1663 *no_add_attrs = true;
1666 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1668 error ("fastcall and regparm attributes are not compatible");
1675 /* Return 0 if the attributes for two types are incompatible, 1 if they
1676 are compatible, and 2 if they are nearly compatible (which causes a
1677 warning to be generated). */
1680 ix86_comp_type_attributes (tree type1, tree type2)
1682 /* Check for mismatch of non-default calling convention. */
1683 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1685 if (TREE_CODE (type1) != FUNCTION_TYPE)
1688 /* Check for mismatched fastcall types */
1689 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1690 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1693 /* Check for mismatched return types (cdecl vs stdcall). */
1694 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1695 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1697 if (ix86_function_regparm (type1, NULL)
1698 != ix86_function_regparm (type2, NULL))
1703 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1704 DECL may be NULL when calling function indirectly
1705 or considering a libcall. */
1708 ix86_function_regparm (tree type, tree decl)
1711 int regparm = ix86_regparm;
1712 bool user_convention = false;
1716 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1719 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1720 user_convention = true;
1723 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1726 user_convention = true;
1729 /* Use register calling convention for local functions when possible. */
1730 if (!TARGET_64BIT && !user_convention && decl
1731 && flag_unit_at_a_time && !profile_flag)
1733 struct cgraph_local_info *i = cgraph_local_info (decl);
1736 /* We can't use regparm(3) for nested functions as these use
1737 static chain pointer in third argument. */
1738 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1748 /* Return true if EAX is live at the start of the function. Used by
1749 ix86_expand_prologue to determine if we need special help before
1750 calling allocate_stack_worker. */
1753 ix86_eax_live_at_start_p (void)
1755 /* Cheat. Don't bother working forward from ix86_function_regparm
1756 to the function type to whether an actual argument is located in
1757 eax. Instead just look at cfg info, which is still close enough
1758 to correct at this point. This gives false positives for broken
1759 functions that might use uninitialized data that happens to be
1760 allocated in eax, but who cares? */
1761 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1764 /* Value is the number of bytes of arguments automatically
1765 popped when returning from a subroutine call.
1766 FUNDECL is the declaration node of the function (as a tree),
1767 FUNTYPE is the data type of the function (as a tree),
1768 or for a library call it is an identifier node for the subroutine name.
1769 SIZE is the number of bytes of arguments passed on the stack.
1771 On the 80386, the RTD insn may be used to pop them if the number
1772 of args is fixed, but if the number is variable then the caller
1773 must pop them all. RTD can't be used for library calls now
1774 because the library is compiled with the Unix compiler.
1775 Use of RTD is a selectable option, since it is incompatible with
1776 standard Unix calling sequences. If the option is not selected,
1777 the caller must always pop the args.
1779 The attribute stdcall is equivalent to RTD on a per module basis. */
1782 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1784 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1786 /* Cdecl functions override -mrtd, and never pop the stack. */
1787 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1789 /* Stdcall and fastcall functions will pop the stack if not
1791 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1792 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1796 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1797 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1798 == void_type_node)))
1802 /* Lose any fake structure return argument if it is passed on the stack. */
1803 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1806 int nregs = ix86_function_regparm (funtype, fundecl);
1809 return GET_MODE_SIZE (Pmode);
1815 /* Argument support functions. */
1817 /* Return true when register may be used to pass function parameters. */
1819 ix86_function_arg_regno_p (int regno)
1823 return (regno < REGPARM_MAX
1824 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1825 if (SSE_REGNO_P (regno) && TARGET_SSE)
1827 /* RAX is used as hidden argument to va_arg functions. */
1830 for (i = 0; i < REGPARM_MAX; i++)
1831 if (regno == x86_64_int_parameter_registers[i])
1836 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1837 for a call to a function whose data type is FNTYPE.
1838 For a library call, FNTYPE is 0. */
1841 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1842 tree fntype, /* tree ptr for function decl */
1843 rtx libname, /* SYMBOL_REF of library name or 0 */
1846 static CUMULATIVE_ARGS zero_cum;
1847 tree param, next_param;
1849 if (TARGET_DEBUG_ARG)
1851 fprintf (stderr, "\ninit_cumulative_args (");
1853 fprintf (stderr, "fntype code = %s, ret code = %s",
1854 tree_code_name[(int) TREE_CODE (fntype)],
1855 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1857 fprintf (stderr, "no fntype");
1860 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1865 /* Set up the number of registers to use for passing arguments. */
1867 cum->nregs = ix86_function_regparm (fntype, fndecl);
1869 cum->nregs = ix86_regparm;
1870 cum->sse_nregs = SSE_REGPARM_MAX;
1871 cum->mmx_nregs = MMX_REGPARM_MAX;
1872 cum->warn_sse = true;
1873 cum->warn_mmx = true;
1874 cum->maybe_vaarg = false;
1876 /* Use ecx and edx registers if function has fastcall attribute */
1877 if (fntype && !TARGET_64BIT)
1879 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1887 /* Determine if this function has variable arguments. This is
1888 indicated by the last argument being 'void_type_mode' if there
1889 are no variable arguments. If there are variable arguments, then
1890 we won't pass anything in registers */
1892 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1894 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1895 param != 0; param = next_param)
1897 next_param = TREE_CHAIN (param);
1898 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1909 cum->maybe_vaarg = true;
1913 if ((!fntype && !libname)
1914 || (fntype && !TYPE_ARG_TYPES (fntype)))
1915 cum->maybe_vaarg = 1;
1917 if (TARGET_DEBUG_ARG)
1918 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1923 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1924 of this code is to classify each 8bytes of incoming argument by the register
1925 class and assign registers accordingly. */
1927 /* Return the union class of CLASS1 and CLASS2.
1928 See the x86-64 PS ABI for details. */
1930 static enum x86_64_reg_class
1931 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1933 /* Rule #1: If both classes are equal, this is the resulting class. */
1934 if (class1 == class2)
1937 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1939 if (class1 == X86_64_NO_CLASS)
1941 if (class2 == X86_64_NO_CLASS)
1944 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1945 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1946 return X86_64_MEMORY_CLASS;
1948 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1949 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1950 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1951 return X86_64_INTEGERSI_CLASS;
1952 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1953 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1954 return X86_64_INTEGER_CLASS;
1956 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1957 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1958 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1959 return X86_64_MEMORY_CLASS;
1961 /* Rule #6: Otherwise class SSE is used. */
1962 return X86_64_SSE_CLASS;
1965 /* Classify the argument of type TYPE and mode MODE.
1966 CLASSES will be filled by the register class used to pass each word
1967 of the operand. The number of words is returned. In case the parameter
1968 should be passed in memory, 0 is returned. As a special case for zero
1969 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1971 BIT_OFFSET is used internally for handling records and specifies offset
1972 of the offset in bits modulo 256 to avoid overflow cases.
1974 See the x86-64 PS ABI for details.
1978 classify_argument (enum machine_mode mode, tree type,
1979 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1981 HOST_WIDE_INT bytes =
1982 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1983 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1985 /* Variable sized entities are always passed/returned in memory. */
1989 if (mode != VOIDmode
1990 && MUST_PASS_IN_STACK (mode, type))
1993 if (type && AGGREGATE_TYPE_P (type))
1997 enum x86_64_reg_class subclasses[MAX_CLASSES];
1999 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2003 for (i = 0; i < words; i++)
2004 classes[i] = X86_64_NO_CLASS;
2006 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2007 signalize memory class, so handle it as special case. */
2010 classes[0] = X86_64_NO_CLASS;
2014 /* Classify each field of record and merge classes. */
2015 if (TREE_CODE (type) == RECORD_TYPE)
2017 /* For classes first merge in the field of the subclasses. */
2018 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2020 tree bases = TYPE_BINFO_BASETYPES (type);
2021 int n_bases = TREE_VEC_LENGTH (bases);
2024 for (i = 0; i < n_bases; ++i)
2026 tree binfo = TREE_VEC_ELT (bases, i);
2028 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2029 tree type = BINFO_TYPE (binfo);
2031 num = classify_argument (TYPE_MODE (type),
2033 (offset + bit_offset) % 256);
2036 for (i = 0; i < num; i++)
2038 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2040 merge_classes (subclasses[i], classes[i + pos]);
2044 /* And now merge the fields of structure. */
2045 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2047 if (TREE_CODE (field) == FIELD_DECL)
2051 /* Bitfields are always classified as integer. Handle them
2052 early, since later code would consider them to be
2053 misaligned integers. */
2054 if (DECL_BIT_FIELD (field))
2056 for (i = int_bit_position (field) / 8 / 8;
2057 i < (int_bit_position (field)
2058 + tree_low_cst (DECL_SIZE (field), 0)
2061 merge_classes (X86_64_INTEGER_CLASS,
2066 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2067 TREE_TYPE (field), subclasses,
2068 (int_bit_position (field)
2069 + bit_offset) % 256);
2072 for (i = 0; i < num; i++)
2075 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2077 merge_classes (subclasses[i], classes[i + pos]);
2083 /* Arrays are handled as small records. */
2084 else if (TREE_CODE (type) == ARRAY_TYPE)
2087 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2088 TREE_TYPE (type), subclasses, bit_offset);
2092 /* The partial classes are now full classes. */
2093 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2094 subclasses[0] = X86_64_SSE_CLASS;
2095 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2096 subclasses[0] = X86_64_INTEGER_CLASS;
2098 for (i = 0; i < words; i++)
2099 classes[i] = subclasses[i % num];
2101 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2102 else if (TREE_CODE (type) == UNION_TYPE
2103 || TREE_CODE (type) == QUAL_UNION_TYPE)
2105 /* For classes first merge in the field of the subclasses. */
2106 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2108 tree bases = TYPE_BINFO_BASETYPES (type);
2109 int n_bases = TREE_VEC_LENGTH (bases);
2112 for (i = 0; i < n_bases; ++i)
2114 tree binfo = TREE_VEC_ELT (bases, i);
2116 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2117 tree type = BINFO_TYPE (binfo);
2119 num = classify_argument (TYPE_MODE (type),
2121 (offset + (bit_offset % 64)) % 256);
2124 for (i = 0; i < num; i++)
2126 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2128 merge_classes (subclasses[i], classes[i + pos]);
2132 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2134 if (TREE_CODE (field) == FIELD_DECL)
2137 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2138 TREE_TYPE (field), subclasses,
2142 for (i = 0; i < num; i++)
2143 classes[i] = merge_classes (subclasses[i], classes[i]);
2147 else if (TREE_CODE (type) == SET_TYPE)
2151 classes[0] = X86_64_INTEGERSI_CLASS;
2154 else if (bytes <= 8)
2156 classes[0] = X86_64_INTEGER_CLASS;
2159 else if (bytes <= 12)
2161 classes[0] = X86_64_INTEGER_CLASS;
2162 classes[1] = X86_64_INTEGERSI_CLASS;
2167 classes[0] = X86_64_INTEGER_CLASS;
2168 classes[1] = X86_64_INTEGER_CLASS;
2175 /* Final merger cleanup. */
2176 for (i = 0; i < words; i++)
2178 /* If one class is MEMORY, everything should be passed in
2180 if (classes[i] == X86_64_MEMORY_CLASS)
2183 /* The X86_64_SSEUP_CLASS should be always preceded by
2184 X86_64_SSE_CLASS. */
2185 if (classes[i] == X86_64_SSEUP_CLASS
2186 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2187 classes[i] = X86_64_SSE_CLASS;
2189 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2190 if (classes[i] == X86_64_X87UP_CLASS
2191 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2192 classes[i] = X86_64_SSE_CLASS;
2197 /* Compute alignment needed. We align all types to natural boundaries with
2198 exception of XFmode that is aligned to 64bits. */
2199 if (mode != VOIDmode && mode != BLKmode)
2201 int mode_alignment = GET_MODE_BITSIZE (mode);
2204 mode_alignment = 128;
2205 else if (mode == XCmode)
2206 mode_alignment = 256;
2207 if (COMPLEX_MODE_P (mode))
2208 mode_alignment /= 2;
2209 /* Misaligned fields are always returned in memory. */
2210 if (bit_offset % mode_alignment)
2214 /* Classification of atomic types. */
2224 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2225 classes[0] = X86_64_INTEGERSI_CLASS;
2227 classes[0] = X86_64_INTEGER_CLASS;
2231 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2234 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2235 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2238 if (!(bit_offset % 64))
2239 classes[0] = X86_64_SSESF_CLASS;
2241 classes[0] = X86_64_SSE_CLASS;
2244 classes[0] = X86_64_SSEDF_CLASS;
2247 classes[0] = X86_64_X87_CLASS;
2248 classes[1] = X86_64_X87UP_CLASS;
2254 classes[0] = X86_64_X87_CLASS;
2255 classes[1] = X86_64_X87UP_CLASS;
2256 classes[2] = X86_64_X87_CLASS;
2257 classes[3] = X86_64_X87UP_CLASS;
2260 classes[0] = X86_64_SSEDF_CLASS;
2261 classes[1] = X86_64_SSEDF_CLASS;
2264 classes[0] = X86_64_SSE_CLASS;
2272 classes[0] = X86_64_SSE_CLASS;
2273 classes[1] = X86_64_SSEUP_CLASS;
2288 /* Examine the argument and return set number of register required in each
2289 class. Return 0 iff parameter should be passed in memory. */
2291 examine_argument (enum machine_mode mode, tree type, int in_return,
2292 int *int_nregs, int *sse_nregs)
2294 enum x86_64_reg_class class[MAX_CLASSES];
2295 int n = classify_argument (mode, type, class, 0);
2301 for (n--; n >= 0; n--)
2304 case X86_64_INTEGER_CLASS:
2305 case X86_64_INTEGERSI_CLASS:
2308 case X86_64_SSE_CLASS:
2309 case X86_64_SSESF_CLASS:
2310 case X86_64_SSEDF_CLASS:
2313 case X86_64_NO_CLASS:
2314 case X86_64_SSEUP_CLASS:
2316 case X86_64_X87_CLASS:
2317 case X86_64_X87UP_CLASS:
2321 case X86_64_MEMORY_CLASS:
2326 /* Construct container for the argument used by GCC interface. See
2327 FUNCTION_ARG for the detailed description. */
2329 construct_container (enum machine_mode mode, tree type, int in_return,
2330 int nintregs, int nsseregs, const int * intreg,
2333 enum machine_mode tmpmode;
2335 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2336 enum x86_64_reg_class class[MAX_CLASSES];
2340 int needed_sseregs, needed_intregs;
2341 rtx exp[MAX_CLASSES];
2344 n = classify_argument (mode, type, class, 0);
2345 if (TARGET_DEBUG_ARG)
2348 fprintf (stderr, "Memory class\n");
2351 fprintf (stderr, "Classes:");
2352 for (i = 0; i < n; i++)
2354 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2356 fprintf (stderr, "\n");
2361 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2363 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2366 /* First construct simple cases. Avoid SCmode, since we want to use
2367 single register to pass this type. */
2368 if (n == 1 && mode != SCmode)
2371 case X86_64_INTEGER_CLASS:
2372 case X86_64_INTEGERSI_CLASS:
2373 return gen_rtx_REG (mode, intreg[0]);
2374 case X86_64_SSE_CLASS:
2375 case X86_64_SSESF_CLASS:
2376 case X86_64_SSEDF_CLASS:
2377 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2378 case X86_64_X87_CLASS:
2379 return gen_rtx_REG (mode, FIRST_STACK_REG);
2380 case X86_64_NO_CLASS:
2381 /* Zero sized array, struct or class. */
2386 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2388 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2390 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2391 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2392 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2393 && class[1] == X86_64_INTEGER_CLASS
2394 && (mode == CDImode || mode == TImode || mode == TFmode)
2395 && intreg[0] + 1 == intreg[1])
2396 return gen_rtx_REG (mode, intreg[0]);
2398 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2399 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2401 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2403 /* Otherwise figure out the entries of the PARALLEL. */
2404 for (i = 0; i < n; i++)
2408 case X86_64_NO_CLASS:
2410 case X86_64_INTEGER_CLASS:
2411 case X86_64_INTEGERSI_CLASS:
2412 /* Merge TImodes on aligned occasions here too. */
2413 if (i * 8 + 8 > bytes)
2414 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2415 else if (class[i] == X86_64_INTEGERSI_CLASS)
2419 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2420 if (tmpmode == BLKmode)
2422 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2423 gen_rtx_REG (tmpmode, *intreg),
2427 case X86_64_SSESF_CLASS:
2428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2429 gen_rtx_REG (SFmode,
2430 SSE_REGNO (sse_regno)),
2434 case X86_64_SSEDF_CLASS:
2435 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2436 gen_rtx_REG (DFmode,
2437 SSE_REGNO (sse_regno)),
2441 case X86_64_SSE_CLASS:
2442 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447 gen_rtx_REG (tmpmode,
2448 SSE_REGNO (sse_regno)),
2450 if (tmpmode == TImode)
2458 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2459 for (i = 0; i < nexps; i++)
2460 XVECEXP (ret, 0, i) = exp [i];
2464 /* Update the data in CUM to advance over an argument
2465 of mode MODE and data type TYPE.
2466 (TYPE is null for libcalls where that information may not be available.) */
2469 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2470 enum machine_mode mode, /* current arg mode */
2471 tree type, /* type of the argument or 0 if lib support */
2472 int named) /* whether or not the argument was named */
2475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2476 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2478 if (TARGET_DEBUG_ARG)
2480 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2481 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2484 int int_nregs, sse_nregs;
2485 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2486 cum->words += words;
2487 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2489 cum->nregs -= int_nregs;
2490 cum->sse_nregs -= sse_nregs;
2491 cum->regno += int_nregs;
2492 cum->sse_regno += sse_nregs;
2495 cum->words += words;
2499 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2500 && (!type || !AGGREGATE_TYPE_P (type)))
2502 cum->sse_words += words;
2503 cum->sse_nregs -= 1;
2504 cum->sse_regno += 1;
2505 if (cum->sse_nregs <= 0)
2511 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2512 && (!type || !AGGREGATE_TYPE_P (type)))
2514 cum->mmx_words += words;
2515 cum->mmx_nregs -= 1;
2516 cum->mmx_regno += 1;
2517 if (cum->mmx_nregs <= 0)
2525 cum->words += words;
2526 cum->nregs -= words;
2527 cum->regno += words;
2529 if (cum->nregs <= 0)
2539 /* Define where to put the arguments to a function.
2540 Value is zero to push the argument on the stack,
2541 or a hard register in which to store the argument.
2543 MODE is the argument's machine mode.
2544 TYPE is the data type of the argument (as a tree).
2545 This is null for libcalls where that information may
2547 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2548 the preceding args and about the function being called.
2549 NAMED is nonzero if this argument is a named parameter
2550 (otherwise it is an extra parameter matching an ellipsis). */
2553 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2554 enum machine_mode mode, /* current arg mode */
2555 tree type, /* type of the argument or 0 if lib support */
2556 int named) /* != 0 for normal args, == 0 for ... args */
2560 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2561 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2562 static bool warnedsse, warnedmmx;
2564 /* Handle a hidden AL argument containing number of registers for varargs
2565 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2567 if (mode == VOIDmode)
2570 return GEN_INT (cum->maybe_vaarg
2571 ? (cum->sse_nregs < 0
2579 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2580 &x86_64_int_parameter_registers [cum->regno],
2585 /* For now, pass fp/complex values on the stack. */
2597 if (words <= cum->nregs)
2599 int regno = cum->regno;
2601 /* Fastcall allocates the first two DWORD (SImode) or
2602 smaller arguments to ECX and EDX. */
2605 if (mode == BLKmode || mode == DImode)
2608 /* ECX not EAX is the first allocated register. */
2612 ret = gen_rtx_REG (mode, regno);
2622 if (!type || !AGGREGATE_TYPE_P (type))
2624 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2627 warning ("SSE vector argument without SSE enabled "
2631 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2638 if (!type || !AGGREGATE_TYPE_P (type))
2640 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2643 warning ("MMX vector argument without MMX enabled "
2647 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2652 if (TARGET_DEBUG_ARG)
2655 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2656 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2659 print_simple_rtl (stderr, ret);
2661 fprintf (stderr, ", stack");
2663 fprintf (stderr, " )\n");
2669 /* A C expression that indicates when an argument must be passed by
2670 reference. If nonzero for an argument, a copy of that argument is
2671 made in memory and a pointer to the argument is passed instead of
2672 the argument itself. The pointer is passed in whatever way is
2673 appropriate for passing a pointer to that type. */
2676 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2677 enum machine_mode mode ATTRIBUTE_UNUSED,
2678 tree type, int named ATTRIBUTE_UNUSED)
2683 if (type && int_size_in_bytes (type) == -1)
2685 if (TARGET_DEBUG_ARG)
2686 fprintf (stderr, "function_arg_pass_by_reference\n");
2693 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2696 contains_128bit_aligned_vector_p (tree type)
2698 enum machine_mode mode = TYPE_MODE (type);
2699 if (SSE_REG_MODE_P (mode)
2700 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2702 if (TYPE_ALIGN (type) < 128)
2705 if (AGGREGATE_TYPE_P (type))
2707 /* Walk the aggregates recursively. */
2708 if (TREE_CODE (type) == RECORD_TYPE
2709 || TREE_CODE (type) == UNION_TYPE
2710 || TREE_CODE (type) == QUAL_UNION_TYPE)
2714 if (TYPE_BINFO (type) != NULL
2715 && TYPE_BINFO_BASETYPES (type) != NULL)
2717 tree bases = TYPE_BINFO_BASETYPES (type);
2718 int n_bases = TREE_VEC_LENGTH (bases);
2721 for (i = 0; i < n_bases; ++i)
2723 tree binfo = TREE_VEC_ELT (bases, i);
2724 tree type = BINFO_TYPE (binfo);
2726 if (contains_128bit_aligned_vector_p (type))
2730 /* And now merge the fields of structure. */
2731 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2733 if (TREE_CODE (field) == FIELD_DECL
2734 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2738 /* Just for use if some languages passes arrays by value. */
2739 else if (TREE_CODE (type) == ARRAY_TYPE)
2741 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2750 /* Gives the alignment boundary, in bits, of an argument with the
2751 specified mode and type. */
2754 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2758 align = TYPE_ALIGN (type);
2760 align = GET_MODE_ALIGNMENT (mode);
2761 if (align < PARM_BOUNDARY)
2762 align = PARM_BOUNDARY;
2765 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2766 make an exception for SSE modes since these require 128bit
2769 The handling here differs from field_alignment. ICC aligns MMX
2770 arguments to 4 byte boundaries, while structure fields are aligned
2771 to 8 byte boundaries. */
2774 if (!SSE_REG_MODE_P (mode))
2775 align = PARM_BOUNDARY;
2779 if (!contains_128bit_aligned_vector_p (type))
2780 align = PARM_BOUNDARY;
2788 /* Return true if N is a possible register number of function value. */
2790 ix86_function_value_regno_p (int regno)
2794 return ((regno) == 0
2795 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2796 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2798 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2799 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2800 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2803 /* Define how to find the value returned by a function.
2804 VALTYPE is the data type of the value (as a tree).
2805 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2806 otherwise, FUNC is 0. */
2808 ix86_function_value (tree valtype)
2812 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2813 REGPARM_MAX, SSE_REGPARM_MAX,
2814 x86_64_int_return_registers, 0);
2815 /* For zero sized structures, construct_container return NULL, but we need
2816 to keep rest of compiler happy by returning meaningful value. */
2818 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2822 return gen_rtx_REG (TYPE_MODE (valtype),
2823 ix86_value_regno (TYPE_MODE (valtype)));
2826 /* Return false iff type is returned in memory. */
2828 ix86_return_in_memory (tree type)
2830 int needed_intregs, needed_sseregs, size;
2831 enum machine_mode mode = TYPE_MODE (type);
2834 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2836 if (mode == BLKmode)
2839 size = int_size_in_bytes (type);
2841 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2844 if (VECTOR_MODE_P (mode) || mode == TImode)
2846 /* User-created vectors small enough to fit in EAX. */
2850 /* MMX/3dNow values are returned on the stack, since we've
2851 got to EMMS/FEMMS before returning. */
2855 /* SSE values are returned in XMM0. */
2856 /* ??? Except when it doesn't exist? We have a choice of
2857 either (1) being abi incompatible with a -march switch,
2858 or (2) generating an error here. Given no good solution,
2859 I think the safest thing is one warning. The user won't
2860 be able to use -Werror, but.... */
2871 warning ("SSE vector return without SSE enabled "
2886 /* Define how to find the value returned by a library function
2887 assuming the value has mode MODE. */
2889 ix86_libcall_value (enum machine_mode mode)
2899 return gen_rtx_REG (mode, FIRST_SSE_REG);
2902 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2907 return gen_rtx_REG (mode, 0);
2911 return gen_rtx_REG (mode, ix86_value_regno (mode));
2914 /* Given a mode, return the register to use for a return value. */
2917 ix86_value_regno (enum machine_mode mode)
2919 /* Floating point return values in %st(0). */
2920 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2921 return FIRST_FLOAT_REG;
2922 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2923 we prevent this case when sse is not available. */
2924 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2925 return FIRST_SSE_REG;
2926 /* Everything else in %eax. */
2930 /* Create the va_list data type. */
2933 ix86_build_builtin_va_list (void)
2935 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2937 /* For i386 we use plain pointer to argument area. */
2939 return build_pointer_type (char_type_node);
2941 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2942 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2944 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2945 unsigned_type_node);
2946 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2947 unsigned_type_node);
2948 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2950 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2953 DECL_FIELD_CONTEXT (f_gpr) = record;
2954 DECL_FIELD_CONTEXT (f_fpr) = record;
2955 DECL_FIELD_CONTEXT (f_ovf) = record;
2956 DECL_FIELD_CONTEXT (f_sav) = record;
2958 TREE_CHAIN (record) = type_decl;
2959 TYPE_NAME (record) = type_decl;
2960 TYPE_FIELDS (record) = f_gpr;
2961 TREE_CHAIN (f_gpr) = f_fpr;
2962 TREE_CHAIN (f_fpr) = f_ovf;
2963 TREE_CHAIN (f_ovf) = f_sav;
2965 layout_type (record);
2967 /* The correct type is an array type of one element. */
2968 return build_array_type (record, build_index_type (size_zero_node));
2971 /* Perform any needed actions needed for a function that is receiving a
2972 variable number of arguments.
2976 MODE and TYPE are the mode and type of the current parameter.
2978 PRETEND_SIZE is a variable that should be set to the amount of stack
2979 that must be pushed by the prolog to pretend that our caller pushed
2982 Normally, this macro will push all remaining incoming registers on the
2983 stack and set PRETEND_SIZE to the length of the registers pushed. */
2986 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2987 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2990 CUMULATIVE_ARGS next_cum;
2991 rtx save_area = NULL_RTX, mem;
3004 /* Indicate to allocate space on the stack for varargs save area. */
3005 ix86_save_varrargs_registers = 1;
3007 cfun->stack_alignment_needed = 128;
3009 fntype = TREE_TYPE (current_function_decl);
3010 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3011 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3012 != void_type_node));
3014 /* For varargs, we do not want to skip the dummy va_dcl argument.
3015 For stdargs, we do want to skip the last named argument. */
3018 function_arg_advance (&next_cum, mode, type, 1);
3021 save_area = frame_pointer_rtx;
3023 set = get_varargs_alias_set ();
3025 for (i = next_cum.regno; i < ix86_regparm; i++)
3027 mem = gen_rtx_MEM (Pmode,
3028 plus_constant (save_area, i * UNITS_PER_WORD));
3029 set_mem_alias_set (mem, set);
3030 emit_move_insn (mem, gen_rtx_REG (Pmode,
3031 x86_64_int_parameter_registers[i]));
3034 if (next_cum.sse_nregs)
3036 /* Now emit code to save SSE registers. The AX parameter contains number
3037 of SSE parameter registers used to call this function. We use
3038 sse_prologue_save insn template that produces computed jump across
3039 SSE saves. We need some preparation work to get this working. */
3041 label = gen_label_rtx ();
3042 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3044 /* Compute address to jump to :
3045 label - 5*eax + nnamed_sse_arguments*5 */
3046 tmp_reg = gen_reg_rtx (Pmode);
3047 nsse_reg = gen_reg_rtx (Pmode);
3048 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3049 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3050 gen_rtx_MULT (Pmode, nsse_reg,
3052 if (next_cum.sse_regno)
3055 gen_rtx_CONST (DImode,
3056 gen_rtx_PLUS (DImode,
3058 GEN_INT (next_cum.sse_regno * 4))));
3060 emit_move_insn (nsse_reg, label_ref);
3061 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3063 /* Compute address of memory block we save into. We always use pointer
3064 pointing 127 bytes after first byte to store - this is needed to keep
3065 instruction size limited by 4 bytes. */
3066 tmp_reg = gen_reg_rtx (Pmode);
3067 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3068 plus_constant (save_area,
3069 8 * REGPARM_MAX + 127)));
3070 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3071 set_mem_alias_set (mem, set);
3072 set_mem_align (mem, BITS_PER_WORD);
3074 /* And finally do the dirty job! */
3075 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3076 GEN_INT (next_cum.sse_regno), label));
3081 /* Implement va_start. */
3084 ix86_va_start (tree valist, rtx nextarg)
3086 HOST_WIDE_INT words, n_gpr, n_fpr;
3087 tree f_gpr, f_fpr, f_ovf, f_sav;
3088 tree gpr, fpr, ovf, sav, t;
3090 /* Only 64bit target needs something special. */
3093 std_expand_builtin_va_start (valist, nextarg);
3097 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3098 f_fpr = TREE_CHAIN (f_gpr);
3099 f_ovf = TREE_CHAIN (f_fpr);
3100 f_sav = TREE_CHAIN (f_ovf);
3102 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3103 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3104 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3105 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3106 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3108 /* Count number of gp and fp argument registers used. */
3109 words = current_function_args_info.words;
3110 n_gpr = current_function_args_info.regno;
3111 n_fpr = current_function_args_info.sse_regno;
3113 if (TARGET_DEBUG_ARG)
3114 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3115 (int) words, (int) n_gpr, (int) n_fpr);
3117 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3118 build_int_2 (n_gpr * 8, 0));
3119 TREE_SIDE_EFFECTS (t) = 1;
3120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3122 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3123 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3124 TREE_SIDE_EFFECTS (t) = 1;
3125 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3127 /* Find the overflow area. */
3128 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3130 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3131 build_int_2 (words * UNITS_PER_WORD, 0));
3132 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3133 TREE_SIDE_EFFECTS (t) = 1;
3134 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3136 /* Find the register save area.
3137 Prologue of the function save it right above stack frame. */
3138 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3139 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3140 TREE_SIDE_EFFECTS (t) = 1;
3141 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3144 /* Implement va_arg. */
3146 ix86_va_arg (tree valist, tree type)
3148 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3149 tree f_gpr, f_fpr, f_ovf, f_sav;
3150 tree gpr, fpr, ovf, sav, t;
3152 rtx lab_false, lab_over = NULL_RTX;
3157 /* Only 64bit target needs something special. */
3160 return std_expand_builtin_va_arg (valist, type);
3163 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3164 f_fpr = TREE_CHAIN (f_gpr);
3165 f_ovf = TREE_CHAIN (f_fpr);
3166 f_sav = TREE_CHAIN (f_ovf);
3168 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3169 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3170 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3171 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3172 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3174 size = int_size_in_bytes (type);
3177 /* Passed by reference. */
3179 type = build_pointer_type (type);
3180 size = int_size_in_bytes (type);
3182 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3184 container = construct_container (TYPE_MODE (type), type, 0,
3185 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3187 * Pull the value out of the saved registers ...
3190 addr_rtx = gen_reg_rtx (Pmode);
3194 rtx int_addr_rtx, sse_addr_rtx;
3195 int needed_intregs, needed_sseregs;
3198 lab_over = gen_label_rtx ();
3199 lab_false = gen_label_rtx ();
3201 examine_argument (TYPE_MODE (type), type, 0,
3202 &needed_intregs, &needed_sseregs);
3205 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3206 || TYPE_ALIGN (type) > 128);
3208 /* In case we are passing structure, verify that it is consecutive block
3209 on the register save area. If not we need to do moves. */
3210 if (!need_temp && !REG_P (container))
3212 /* Verify that all registers are strictly consecutive */
3213 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3217 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3219 rtx slot = XVECEXP (container, 0, i);
3220 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3221 || INTVAL (XEXP (slot, 1)) != i * 16)
3229 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3231 rtx slot = XVECEXP (container, 0, i);
3232 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3233 || INTVAL (XEXP (slot, 1)) != i * 8)
3240 int_addr_rtx = addr_rtx;
3241 sse_addr_rtx = addr_rtx;
3245 int_addr_rtx = gen_reg_rtx (Pmode);
3246 sse_addr_rtx = gen_reg_rtx (Pmode);
3248 /* First ensure that we fit completely in registers. */
3251 emit_cmp_and_jump_insns (expand_expr
3252 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3253 GEN_INT ((REGPARM_MAX - needed_intregs +
3254 1) * 8), GE, const1_rtx, SImode,
3259 emit_cmp_and_jump_insns (expand_expr
3260 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3261 GEN_INT ((SSE_REGPARM_MAX -
3262 needed_sseregs + 1) * 16 +
3263 REGPARM_MAX * 8), GE, const1_rtx,
3264 SImode, 1, lab_false);
3267 /* Compute index to start of area used for integer regs. */
3270 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3271 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3272 if (r != int_addr_rtx)
3273 emit_move_insn (int_addr_rtx, r);
3277 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3278 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3279 if (r != sse_addr_rtx)
3280 emit_move_insn (sse_addr_rtx, r);
3288 /* Never use the memory itself, as it has the alias set. */
3289 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3290 mem = gen_rtx_MEM (BLKmode, x);
3291 force_operand (x, addr_rtx);
3292 set_mem_alias_set (mem, get_varargs_alias_set ());
3293 set_mem_align (mem, BITS_PER_UNIT);
3295 for (i = 0; i < XVECLEN (container, 0); i++)
3297 rtx slot = XVECEXP (container, 0, i);
3298 rtx reg = XEXP (slot, 0);
3299 enum machine_mode mode = GET_MODE (reg);
3305 if (SSE_REGNO_P (REGNO (reg)))
3307 src_addr = sse_addr_rtx;
3308 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3312 src_addr = int_addr_rtx;
3313 src_offset = REGNO (reg) * 8;
3315 src_mem = gen_rtx_MEM (mode, src_addr);
3316 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3317 src_mem = adjust_address (src_mem, mode, src_offset);
3318 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3319 emit_move_insn (dest_mem, src_mem);
3326 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3327 build_int_2 (needed_intregs * 8, 0));
3328 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3329 TREE_SIDE_EFFECTS (t) = 1;
3330 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3335 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3336 build_int_2 (needed_sseregs * 16, 0));
3337 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3338 TREE_SIDE_EFFECTS (t) = 1;
3339 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3342 emit_jump_insn (gen_jump (lab_over));
3344 emit_label (lab_false);
3347 /* ... otherwise out of the overflow area. */
3349 /* Care for on-stack alignment if needed. */
3350 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3354 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3355 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3356 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3360 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3362 emit_move_insn (addr_rtx, r);
3365 build (PLUS_EXPR, TREE_TYPE (t), t,
3366 build_int_2 (rsize * UNITS_PER_WORD, 0));
3367 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3368 TREE_SIDE_EFFECTS (t) = 1;
3369 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3372 emit_label (lab_over);
3376 r = gen_rtx_MEM (Pmode, addr_rtx);
3377 set_mem_alias_set (r, get_varargs_alias_set ());
3378 emit_move_insn (addr_rtx, r);
3384 /* Return nonzero if OP is either a i387 or SSE fp register. */
3386 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3388 return ANY_FP_REG_P (op);
3391 /* Return nonzero if OP is an i387 fp register. */
3393 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3395 return FP_REG_P (op);
3398 /* Return nonzero if OP is a non-fp register_operand. */
3400 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3402 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3405 /* Return nonzero if OP is a register operand other than an
3406 i387 fp register. */
3408 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3410 return register_operand (op, mode) && !FP_REG_P (op);
3413 /* Return nonzero if OP is general operand representable on x86_64. */
3416 x86_64_general_operand (rtx op, enum machine_mode mode)
3419 return general_operand (op, mode);
3420 if (nonimmediate_operand (op, mode))
3422 return x86_64_sign_extended_value (op);
3425 /* Return nonzero if OP is general operand representable on x86_64
3426 as either sign extended or zero extended constant. */
3429 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3432 return general_operand (op, mode);
3433 if (nonimmediate_operand (op, mode))
3435 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3438 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3441 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3444 return nonmemory_operand (op, mode);
3445 if (register_operand (op, mode))
3447 return x86_64_sign_extended_value (op);
3450 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3453 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3455 if (!TARGET_64BIT || !flag_pic)
3456 return nonmemory_operand (op, mode);
3457 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3459 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3464 /* Return nonzero if OPNUM's MEM should be matched
3465 in movabs* patterns. */
3468 ix86_check_movabs (rtx insn, int opnum)
3472 set = PATTERN (insn);
3473 if (GET_CODE (set) == PARALLEL)
3474 set = XVECEXP (set, 0, 0);
3475 if (GET_CODE (set) != SET)
3477 mem = XEXP (set, opnum);
3478 while (GET_CODE (mem) == SUBREG)
3479 mem = SUBREG_REG (mem);
3480 if (GET_CODE (mem) != MEM)
3482 return (volatile_ok || !MEM_VOLATILE_P (mem));
3485 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3488 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3491 return nonmemory_operand (op, mode);
3492 if (register_operand (op, mode))
3494 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3497 /* Return nonzero if OP is immediate operand representable on x86_64. */
3500 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3503 return immediate_operand (op, mode);
3504 return x86_64_sign_extended_value (op);
3507 /* Return nonzero if OP is immediate operand representable on x86_64. */
3510 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3512 return x86_64_zero_extended_value (op);
3515 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3516 for shift & compare patterns, as shifting by 0 does not change flags),
3517 else return zero. */
3520 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3522 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3525 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3526 reference and a constant. */
3529 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3531 switch (GET_CODE (op))
3539 if (GET_CODE (op) == SYMBOL_REF
3540 || GET_CODE (op) == LABEL_REF
3541 || (GET_CODE (op) == UNSPEC
3542 && (XINT (op, 1) == UNSPEC_GOT
3543 || XINT (op, 1) == UNSPEC_GOTOFF
3544 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3546 if (GET_CODE (op) != PLUS
3547 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3551 if (GET_CODE (op) == SYMBOL_REF
3552 || GET_CODE (op) == LABEL_REF)
3554 /* Only @GOTOFF gets offsets. */
3555 if (GET_CODE (op) != UNSPEC
3556 || XINT (op, 1) != UNSPEC_GOTOFF)
3559 op = XVECEXP (op, 0, 0);
3560 if (GET_CODE (op) == SYMBOL_REF
3561 || GET_CODE (op) == LABEL_REF)
3570 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3573 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3575 if (GET_CODE (op) != CONST)
3580 if (GET_CODE (op) == UNSPEC
3581 && XINT (op, 1) == UNSPEC_GOTPCREL)
3583 if (GET_CODE (op) == PLUS
3584 && GET_CODE (XEXP (op, 0)) == UNSPEC
3585 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3590 if (GET_CODE (op) == UNSPEC)
3592 if (GET_CODE (op) != PLUS
3593 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3596 if (GET_CODE (op) == UNSPEC)
3602 /* Return true if OP is a symbolic operand that resolves locally. */
3605 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3607 if (GET_CODE (op) == CONST
3608 && GET_CODE (XEXP (op, 0)) == PLUS
3609 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3610 op = XEXP (XEXP (op, 0), 0);
3612 if (GET_CODE (op) == LABEL_REF)
3615 if (GET_CODE (op) != SYMBOL_REF)
3618 if (SYMBOL_REF_LOCAL_P (op))
3621 /* There is, however, a not insubstantial body of code in the rest of
3622 the compiler that assumes it can just stick the results of
3623 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3624 /* ??? This is a hack. Should update the body of the compiler to
3625 always create a DECL an invoke targetm.encode_section_info. */
3626 if (strncmp (XSTR (op, 0), internal_label_prefix,
3627 internal_label_prefix_len) == 0)
3633 /* Test for various thread-local symbols. */
3636 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3638 if (GET_CODE (op) != SYMBOL_REF)
3640 return SYMBOL_REF_TLS_MODEL (op);
3644 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3646 if (GET_CODE (op) != SYMBOL_REF)
3648 return SYMBOL_REF_TLS_MODEL (op) == kind;
3652 global_dynamic_symbolic_operand (rtx op,
3653 enum machine_mode mode ATTRIBUTE_UNUSED)
3655 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3659 local_dynamic_symbolic_operand (rtx op,
3660 enum machine_mode mode ATTRIBUTE_UNUSED)
3662 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3666 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3668 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3672 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3674 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3677 /* Test for a valid operand for a call instruction. Don't allow the
3678 arg pointer register or virtual regs since they may decay into
3679 reg + const, which the patterns can't handle. */
3682 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3684 /* Disallow indirect through a virtual register. This leads to
3685 compiler aborts when trying to eliminate them. */
3686 if (GET_CODE (op) == REG
3687 && (op == arg_pointer_rtx
3688 || op == frame_pointer_rtx
3689 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3690 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3693 /* Disallow `call 1234'. Due to varying assembler lameness this
3694 gets either rejected or translated to `call .+1234'. */
3695 if (GET_CODE (op) == CONST_INT)
3698 /* Explicitly allow SYMBOL_REF even if pic. */
3699 if (GET_CODE (op) == SYMBOL_REF)
3702 /* Otherwise we can allow any general_operand in the address. */
3703 return general_operand (op, Pmode);
3706 /* Test for a valid operand for a call instruction. Don't allow the
3707 arg pointer register or virtual regs since they may decay into
3708 reg + const, which the patterns can't handle. */
3711 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3713 /* Disallow indirect through a virtual register. This leads to
3714 compiler aborts when trying to eliminate them. */
3715 if (GET_CODE (op) == REG
3716 && (op == arg_pointer_rtx
3717 || op == frame_pointer_rtx
3718 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3719 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3722 /* Explicitly allow SYMBOL_REF even if pic. */
3723 if (GET_CODE (op) == SYMBOL_REF)
3726 /* Otherwise we can only allow register operands. */
3727 return register_operand (op, Pmode);
3731 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3733 if (GET_CODE (op) == CONST
3734 && GET_CODE (XEXP (op, 0)) == PLUS
3735 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3736 op = XEXP (XEXP (op, 0), 0);
3737 return GET_CODE (op) == SYMBOL_REF;
3740 /* Match exactly zero and one. */
3743 const0_operand (rtx op, enum machine_mode mode)
3745 return op == CONST0_RTX (mode);
3749 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3751 return op == const1_rtx;
3754 /* Match 2, 4, or 8. Used for leal multiplicands. */
3757 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3759 return (GET_CODE (op) == CONST_INT
3760 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3764 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3766 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3770 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3776 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3778 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3782 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3784 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3788 /* True if this is a constant appropriate for an increment or decrement. */
3791 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3793 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3794 registers, since carry flag is not set. */
3795 if (TARGET_PENTIUM4 && !optimize_size)
3797 return op == const1_rtx || op == constm1_rtx;
3800 /* Return nonzero if OP is acceptable as operand of DImode shift
3804 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3807 return nonimmediate_operand (op, mode);
3809 return register_operand (op, mode);
3812 /* Return false if this is the stack pointer, or any other fake
3813 register eliminable to the stack pointer. Otherwise, this is
3816 This is used to prevent esp from being used as an index reg.
3817 Which would only happen in pathological cases. */
3820 reg_no_sp_operand (rtx op, enum machine_mode mode)
3823 if (GET_CODE (t) == SUBREG)
3825 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3828 return register_operand (op, mode);
3832 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3834 return MMX_REG_P (op);
3837 /* Return false if this is any eliminable register. Otherwise
3841 general_no_elim_operand (rtx op, enum machine_mode mode)
3844 if (GET_CODE (t) == SUBREG)
3846 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3847 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3848 || t == virtual_stack_dynamic_rtx)
3851 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3852 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3855 return general_operand (op, mode);
3858 /* Return false if this is any eliminable register. Otherwise
3859 register_operand or const_int. */
3862 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3865 if (GET_CODE (t) == SUBREG)
3867 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3868 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3869 || t == virtual_stack_dynamic_rtx)
3872 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3875 /* Return false if this is any eliminable register or stack register,
3876 otherwise work like register_operand. */
3879 index_register_operand (rtx op, enum machine_mode mode)
3882 if (GET_CODE (t) == SUBREG)
3886 if (t == arg_pointer_rtx
3887 || t == frame_pointer_rtx
3888 || t == virtual_incoming_args_rtx
3889 || t == virtual_stack_vars_rtx
3890 || t == virtual_stack_dynamic_rtx
3891 || REGNO (t) == STACK_POINTER_REGNUM)
3894 return general_operand (op, mode);
3897 /* Return true if op is a Q_REGS class register. */
3900 q_regs_operand (rtx op, enum machine_mode mode)
3902 if (mode != VOIDmode && GET_MODE (op) != mode)
3904 if (GET_CODE (op) == SUBREG)
3905 op = SUBREG_REG (op);
3906 return ANY_QI_REG_P (op);
3909 /* Return true if op is an flags register. */
3912 flags_reg_operand (rtx op, enum machine_mode mode)
3914 if (mode != VOIDmode && GET_MODE (op) != mode)
3916 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3919 /* Return true if op is a NON_Q_REGS class register. */
3922 non_q_regs_operand (rtx op, enum machine_mode mode)
3924 if (mode != VOIDmode && GET_MODE (op) != mode)
3926 if (GET_CODE (op) == SUBREG)
3927 op = SUBREG_REG (op);
3928 return NON_QI_REG_P (op);
3932 zero_extended_scalar_load_operand (rtx op,
3933 enum machine_mode mode ATTRIBUTE_UNUSED)
3936 if (GET_CODE (op) != MEM)
3938 op = maybe_get_pool_constant (op);
3941 if (GET_CODE (op) != CONST_VECTOR)
3944 (GET_MODE_SIZE (GET_MODE (op)) /
3945 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3946 for (n_elts--; n_elts > 0; n_elts--)
3948 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3949 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3955 /* Return 1 when OP is operand acceptable for standard SSE move. */
3957 vector_move_operand (rtx op, enum machine_mode mode)
3959 if (nonimmediate_operand (op, mode))
3961 if (GET_MODE (op) != mode && mode != VOIDmode)
3963 return (op == CONST0_RTX (GET_MODE (op)));
3966 /* Return true if op if a valid address, and does not contain
3967 a segment override. */
3970 no_seg_address_operand (rtx op, enum machine_mode mode)
3972 struct ix86_address parts;
3974 if (! address_operand (op, mode))
3977 if (! ix86_decompose_address (op, &parts))
3980 return parts.seg == SEG_DEFAULT;
3983 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3986 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3988 enum rtx_code code = GET_CODE (op);
3991 /* Operations supported directly. */
4001 /* These are equivalent to ones above in non-IEEE comparisons. */
4008 return !TARGET_IEEE_FP;
4013 /* Return 1 if OP is a valid comparison operator in valid mode. */
4015 ix86_comparison_operator (rtx op, enum machine_mode mode)
4017 enum machine_mode inmode;
4018 enum rtx_code code = GET_CODE (op);
4019 if (mode != VOIDmode && GET_MODE (op) != mode)
4021 if (GET_RTX_CLASS (code) != '<')
4023 inmode = GET_MODE (XEXP (op, 0));
4025 if (inmode == CCFPmode || inmode == CCFPUmode)
4027 enum rtx_code second_code, bypass_code;
4028 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4029 return (bypass_code == NIL && second_code == NIL);
4036 if (inmode == CCmode || inmode == CCGCmode
4037 || inmode == CCGOCmode || inmode == CCNOmode)
4040 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4041 if (inmode == CCmode)
4045 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4053 /* Return 1 if OP is a valid comparison operator testing carry flag
4056 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4058 enum machine_mode inmode;
4059 enum rtx_code code = GET_CODE (op);
4061 if (mode != VOIDmode && GET_MODE (op) != mode)
4063 if (GET_RTX_CLASS (code) != '<')
4065 inmode = GET_MODE (XEXP (op, 0));
4066 if (GET_CODE (XEXP (op, 0)) != REG
4067 || REGNO (XEXP (op, 0)) != 17
4068 || XEXP (op, 1) != const0_rtx)
4071 if (inmode == CCFPmode || inmode == CCFPUmode)
4073 enum rtx_code second_code, bypass_code;
4075 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4076 if (bypass_code != NIL || second_code != NIL)
4078 code = ix86_fp_compare_code_to_integer (code);
4080 else if (inmode != CCmode)
4085 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4088 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4090 enum machine_mode inmode;
4091 enum rtx_code code = GET_CODE (op);
4093 if (mode != VOIDmode && GET_MODE (op) != mode)
4095 if (GET_RTX_CLASS (code) != '<')
4097 inmode = GET_MODE (XEXP (op, 0));
4098 if (inmode == CCFPmode || inmode == CCFPUmode)
4100 enum rtx_code second_code, bypass_code;
4102 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4103 if (bypass_code != NIL || second_code != NIL)
4105 code = ix86_fp_compare_code_to_integer (code);
4107 /* i387 supports just limited amount of conditional codes. */
4110 case LTU: case GTU: case LEU: case GEU:
4111 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4114 case ORDERED: case UNORDERED:
4122 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4125 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4127 switch (GET_CODE (op))
4130 /* Modern CPUs have same latency for HImode and SImode multiply,
4131 but 386 and 486 do HImode multiply faster. */
4132 return ix86_tune > PROCESSOR_I486;
4144 /* Nearly general operand, but accept any const_double, since we wish
4145 to be able to drop them into memory rather than have them get pulled
4149 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4151 if (mode != VOIDmode && mode != GET_MODE (op))
4153 if (GET_CODE (op) == CONST_DOUBLE)
4155 return general_operand (op, mode);
4158 /* Match an SI or HImode register for a zero_extract. */
4161 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4164 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4165 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4168 if (!register_operand (op, VOIDmode))
4171 /* Be careful to accept only registers having upper parts. */
4172 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4173 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4176 /* Return 1 if this is a valid binary floating-point operation.
4177 OP is the expression matched, and MODE is its mode. */
4180 binary_fp_operator (rtx op, enum machine_mode mode)
4182 if (mode != VOIDmode && mode != GET_MODE (op))
4185 switch (GET_CODE (op))
4191 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4199 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4201 return GET_CODE (op) == MULT;
4205 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4207 return GET_CODE (op) == DIV;
4211 arith_or_logical_operator (rtx op, enum machine_mode mode)
4213 return ((mode == VOIDmode || GET_MODE (op) == mode)
4214 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4215 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4218 /* Returns 1 if OP is memory operand with a displacement. */
4221 memory_displacement_operand (rtx op, enum machine_mode mode)
4223 struct ix86_address parts;
4225 if (! memory_operand (op, mode))
4228 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4231 return parts.disp != NULL_RTX;
4234 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4235 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4237 ??? It seems likely that this will only work because cmpsi is an
4238 expander, and no actual insns use this. */
4241 cmpsi_operand (rtx op, enum machine_mode mode)
4243 if (nonimmediate_operand (op, mode))
4246 if (GET_CODE (op) == AND
4247 && GET_MODE (op) == SImode
4248 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4249 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4250 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4251 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4252 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4253 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4259 /* Returns 1 if OP is memory operand that can not be represented by the
4263 long_memory_operand (rtx op, enum machine_mode mode)
4265 if (! memory_operand (op, mode))
4268 return memory_address_length (op) != 0;
4271 /* Return nonzero if the rtx is known aligned. */
4274 aligned_operand (rtx op, enum machine_mode mode)
4276 struct ix86_address parts;
4278 if (!general_operand (op, mode))
4281 /* Registers and immediate operands are always "aligned". */
4282 if (GET_CODE (op) != MEM)
4285 /* Don't even try to do any aligned optimizations with volatiles. */
4286 if (MEM_VOLATILE_P (op))
4291 /* Pushes and pops are only valid on the stack pointer. */
4292 if (GET_CODE (op) == PRE_DEC
4293 || GET_CODE (op) == POST_INC)
4296 /* Decode the address. */
4297 if (! ix86_decompose_address (op, &parts))
4300 /* Look for some component that isn't known to be aligned. */
4304 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4309 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4314 if (GET_CODE (parts.disp) != CONST_INT
4315 || (INTVAL (parts.disp) & 3) != 0)
4319 /* Didn't find one -- this must be an aligned address. */
4323 /* Initialize the table of extra 80387 mathematical constants. */
4326 init_ext_80387_constants (void)
4328 static const char * cst[5] =
4330 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4331 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4332 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4333 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4334 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4338 for (i = 0; i < 5; i++)
4340 real_from_string (&ext_80387_constants_table[i], cst[i]);
4341 /* Ensure each constant is rounded to XFmode precision. */
4342 real_convert (&ext_80387_constants_table[i],
4343 XFmode, &ext_80387_constants_table[i]);
4346 ext_80387_constants_init = 1;
4349 /* Return true if the constant is something that can be loaded with
4350 a special instruction. */
4353 standard_80387_constant_p (rtx x)
4355 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4358 if (x == CONST0_RTX (GET_MODE (x)))
4360 if (x == CONST1_RTX (GET_MODE (x)))
4363 /* For XFmode constants, try to find a special 80387 instruction on
4364 those CPUs that benefit from them. */
4365 if (GET_MODE (x) == XFmode
4366 && x86_ext_80387_constants & TUNEMASK)
4371 if (! ext_80387_constants_init)
4372 init_ext_80387_constants ();
4374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4375 for (i = 0; i < 5; i++)
4376 if (real_identical (&r, &ext_80387_constants_table[i]))
4383 /* Return the opcode of the special instruction to be used to load
4387 standard_80387_constant_opcode (rtx x)
4389 switch (standard_80387_constant_p (x))
4409 /* Return the CONST_DOUBLE representing the 80387 constant that is
4410 loaded by the specified special instruction. The argument IDX
4411 matches the return value from standard_80387_constant_p. */
4414 standard_80387_constant_rtx (int idx)
4418 if (! ext_80387_constants_init)
4419 init_ext_80387_constants ();
4435 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4439 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4442 standard_sse_constant_p (rtx x)
4444 if (x == const0_rtx)
4446 return (x == CONST0_RTX (GET_MODE (x)));
4449 /* Returns 1 if OP contains a symbol reference */
4452 symbolic_reference_mentioned_p (rtx op)
4457 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4460 fmt = GET_RTX_FORMAT (GET_CODE (op));
4461 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4467 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4468 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4472 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4479 /* Return 1 if it is appropriate to emit `ret' instructions in the
4480 body of a function. Do this only if the epilogue is simple, needing a
4481 couple of insns. Prior to reloading, we can't tell how many registers
4482 must be saved, so return 0 then. Return 0 if there is no frame
4483 marker to de-allocate.
4485 If NON_SAVING_SETJMP is defined and true, then it is not possible
4486 for the epilogue to be simple, so return 0. This is a special case
4487 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4488 until final, but jump_optimize may need to know sooner if a
4492 ix86_can_use_return_insn_p (void)
4494 struct ix86_frame frame;
4496 #ifdef NON_SAVING_SETJMP
4497 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4501 if (! reload_completed || frame_pointer_needed)
4504 /* Don't allow more than 32 pop, since that's all we can do
4505 with one instruction. */
4506 if (current_function_pops_args
4507 && current_function_args_size >= 32768)
4510 ix86_compute_frame_layout (&frame);
4511 return frame.to_allocate == 0 && frame.nregs == 0;
4514 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4516 x86_64_sign_extended_value (rtx value)
4518 switch (GET_CODE (value))
4520 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4521 to be at least 32 and this all acceptable constants are
4522 represented as CONST_INT. */
4524 if (HOST_BITS_PER_WIDE_INT == 32)
4528 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4529 return trunc_int_for_mode (val, SImode) == val;
4533 /* For certain code models, the symbolic references are known to fit.
4534 in CM_SMALL_PIC model we know it fits if it is local to the shared
4535 library. Don't count TLS SYMBOL_REFs here, since they should fit
4536 only if inside of UNSPEC handled below. */
4538 /* TLS symbols are not constant. */
4539 if (tls_symbolic_operand (value, Pmode))
4541 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4543 /* For certain code models, the code is near as well. */
4545 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4546 || ix86_cmodel == CM_KERNEL);
4548 /* We also may accept the offsetted memory references in certain special
4551 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4552 switch (XINT (XEXP (value, 0), 1))
4554 case UNSPEC_GOTPCREL:
4556 case UNSPEC_GOTNTPOFF:
4562 if (GET_CODE (XEXP (value, 0)) == PLUS)
4564 rtx op1 = XEXP (XEXP (value, 0), 0);
4565 rtx op2 = XEXP (XEXP (value, 0), 1);
4566 HOST_WIDE_INT offset;
4568 if (ix86_cmodel == CM_LARGE)
4570 if (GET_CODE (op2) != CONST_INT)
4572 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4573 switch (GET_CODE (op1))
4576 /* For CM_SMALL assume that latest object is 16MB before
4577 end of 31bits boundary. We may also accept pretty
4578 large negative constants knowing that all objects are
4579 in the positive half of address space. */
4580 if (ix86_cmodel == CM_SMALL
4581 && offset < 16*1024*1024
4582 && trunc_int_for_mode (offset, SImode) == offset)
4584 /* For CM_KERNEL we know that all object resist in the
4585 negative half of 32bits address space. We may not
4586 accept negative offsets, since they may be just off
4587 and we may accept pretty large positive ones. */
4588 if (ix86_cmodel == CM_KERNEL
4590 && trunc_int_for_mode (offset, SImode) == offset)
4594 /* These conditions are similar to SYMBOL_REF ones, just the
4595 constraints for code models differ. */
4596 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4597 && offset < 16*1024*1024
4598 && trunc_int_for_mode (offset, SImode) == offset)
4600 if (ix86_cmodel == CM_KERNEL
4602 && trunc_int_for_mode (offset, SImode) == offset)
4606 switch (XINT (op1, 1))
4611 && trunc_int_for_mode (offset, SImode) == offset)
4625 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4627 x86_64_zero_extended_value (rtx value)
4629 switch (GET_CODE (value))
4632 if (HOST_BITS_PER_WIDE_INT == 32)
4633 return (GET_MODE (value) == VOIDmode
4634 && !CONST_DOUBLE_HIGH (value));
4638 if (HOST_BITS_PER_WIDE_INT == 32)
4639 return INTVAL (value) >= 0;
4641 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4644 /* For certain code models, the symbolic references are known to fit. */
4646 /* TLS symbols are not constant. */
4647 if (tls_symbolic_operand (value, Pmode))
4649 return ix86_cmodel == CM_SMALL;
4651 /* For certain code models, the code is near as well. */
4653 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4655 /* We also may accept the offsetted memory references in certain special
4658 if (GET_CODE (XEXP (value, 0)) == PLUS)
4660 rtx op1 = XEXP (XEXP (value, 0), 0);
4661 rtx op2 = XEXP (XEXP (value, 0), 1);
4663 if (ix86_cmodel == CM_LARGE)
4665 switch (GET_CODE (op1))
4669 /* For small code model we may accept pretty large positive
4670 offsets, since one bit is available for free. Negative
4671 offsets are limited by the size of NULL pointer area
4672 specified by the ABI. */
4673 if (ix86_cmodel == CM_SMALL
4674 && GET_CODE (op2) == CONST_INT
4675 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4676 && (trunc_int_for_mode (INTVAL (op2), SImode)
4679 /* ??? For the kernel, we may accept adjustment of
4680 -0x10000000, since we know that it will just convert
4681 negative address space to positive, but perhaps this
4682 is not worthwhile. */
4685 /* These conditions are similar to SYMBOL_REF ones, just the
4686 constraints for code models differ. */
4687 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4688 && GET_CODE (op2) == CONST_INT
4689 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4690 && (trunc_int_for_mode (INTVAL (op2), SImode)
4704 /* Value should be nonzero if functions must have frame pointers.
4705 Zero means the frame pointer need not be set up (and parms may
4706 be accessed via the stack pointer) in functions that seem suitable. */
4709 ix86_frame_pointer_required (void)
4711 /* If we accessed previous frames, then the generated code expects
4712 to be able to access the saved ebp value in our frame. */
4713 if (cfun->machine->accesses_prev_frame)
4716 /* Several x86 os'es need a frame pointer for other reasons,
4717 usually pertaining to setjmp. */
4718 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4721 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4722 the frame pointer by default. Turn it back on now if we've not
4723 got a leaf function. */
4724 if (TARGET_OMIT_LEAF_FRAME_POINTER
4725 && (!current_function_is_leaf))
4728 if (current_function_profile)
4734 /* Record that the current function accesses previous call frames. */
4737 ix86_setup_frame_addresses (void)
4739 cfun->machine->accesses_prev_frame = 1;
4742 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4743 # define USE_HIDDEN_LINKONCE 1
4745 # define USE_HIDDEN_LINKONCE 0
4748 static int pic_labels_used;
4750 /* Fills in the label name that should be used for a pc thunk for
4751 the given register. */
4754 get_pc_thunk_name (char name[32], unsigned int regno)
4756 if (USE_HIDDEN_LINKONCE)
4757 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4759 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4763 /* This function generates code for -fpic that loads %ebx with
4764 the return address of the caller and then returns. */
4767 ix86_file_end (void)
4772 for (regno = 0; regno < 8; ++regno)
4776 if (! ((pic_labels_used >> regno) & 1))
4779 get_pc_thunk_name (name, regno);
4781 if (USE_HIDDEN_LINKONCE)
4785 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4787 TREE_PUBLIC (decl) = 1;
4788 TREE_STATIC (decl) = 1;
4789 DECL_ONE_ONLY (decl) = 1;
4791 (*targetm.asm_out.unique_section) (decl, 0);
4792 named_section (decl, NULL, 0);
4794 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4795 fputs ("\t.hidden\t", asm_out_file);
4796 assemble_name (asm_out_file, name);
4797 fputc ('\n', asm_out_file);
4798 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4803 ASM_OUTPUT_LABEL (asm_out_file, name);
4806 xops[0] = gen_rtx_REG (SImode, regno);
4807 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4808 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4809 output_asm_insn ("ret", xops);
4812 if (NEED_INDICATE_EXEC_STACK)
4813 file_end_indicate_exec_stack ();
4816 /* Emit code for the SET_GOT patterns. */
4819 output_set_got (rtx dest)
4824 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4826 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4828 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4831 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4833 output_asm_insn ("call\t%a2", xops);
4836 /* Output the "canonical" label name ("Lxx$pb") here too. This
4837 is what will be referred to by the Mach-O PIC subsystem. */
4838 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4840 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4841 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4844 output_asm_insn ("pop{l}\t%0", xops);
4849 get_pc_thunk_name (name, REGNO (dest));
4850 pic_labels_used |= 1 << REGNO (dest);
4852 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4853 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4854 output_asm_insn ("call\t%X2", xops);
4857 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4858 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4859 else if (!TARGET_MACHO)
4860 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4865 /* Generate an "push" pattern for input ARG. */
4870 return gen_rtx_SET (VOIDmode,
4872 gen_rtx_PRE_DEC (Pmode,
4873 stack_pointer_rtx)),
4877 /* Return >= 0 if there is an unused call-clobbered register available
4878 for the entire function. */
4881 ix86_select_alt_pic_regnum (void)
4883 if (current_function_is_leaf && !current_function_profile)
4886 for (i = 2; i >= 0; --i)
4887 if (!regs_ever_live[i])
4891 return INVALID_REGNUM;
4894 /* Return 1 if we need to save REGNO. */
4896 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4898 if (pic_offset_table_rtx
4899 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4900 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4901 || current_function_profile
4902 || current_function_calls_eh_return
4903 || current_function_uses_const_pool))
4905 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4910 if (current_function_calls_eh_return && maybe_eh_return)
4915 unsigned test = EH_RETURN_DATA_REGNO (i);
4916 if (test == INVALID_REGNUM)
4923 return (regs_ever_live[regno]
4924 && !call_used_regs[regno]
4925 && !fixed_regs[regno]
4926 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4929 /* Return number of registers to be saved on the stack. */
4932 ix86_nsaved_regs (void)
4937 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4938 if (ix86_save_reg (regno, true))
4943 /* Return the offset between two registers, one to be eliminated, and the other
4944 its replacement, at the start of a routine. */
4947 ix86_initial_elimination_offset (int from, int to)
4949 struct ix86_frame frame;
4950 ix86_compute_frame_layout (&frame);
4952 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4953 return frame.hard_frame_pointer_offset;
4954 else if (from == FRAME_POINTER_REGNUM
4955 && to == HARD_FRAME_POINTER_REGNUM)
4956 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4959 if (to != STACK_POINTER_REGNUM)
4961 else if (from == ARG_POINTER_REGNUM)
4962 return frame.stack_pointer_offset;
4963 else if (from != FRAME_POINTER_REGNUM)
4966 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4970 /* Fill structure ix86_frame about frame of currently computed function. */
4973 ix86_compute_frame_layout (struct ix86_frame *frame)
4975 HOST_WIDE_INT total_size;
4976 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4977 HOST_WIDE_INT offset;
4978 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4979 HOST_WIDE_INT size = get_frame_size ();
4981 frame->nregs = ix86_nsaved_regs ();
4984 /* During reload iteration the amount of registers saved can change.
4985 Recompute the value as needed. Do not recompute when amount of registers
4986 didn't change as reload does mutiple calls to the function and does not
4987 expect the decision to change within single iteration. */
4989 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4991 int count = frame->nregs;
4993 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4994 /* The fast prologue uses move instead of push to save registers. This
4995 is significantly longer, but also executes faster as modern hardware
4996 can execute the moves in parallel, but can't do that for push/pop.
4998 Be careful about choosing what prologue to emit: When function takes
4999 many instructions to execute we may use slow version as well as in
5000 case function is known to be outside hot spot (this is known with
5001 feedback only). Weight the size of function by number of registers
5002 to save as it is cheap to use one or two push instructions but very
5003 slow to use many of them. */
5005 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5006 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5007 || (flag_branch_probabilities
5008 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5009 cfun->machine->use_fast_prologue_epilogue = false;
5011 cfun->machine->use_fast_prologue_epilogue
5012 = !expensive_function_p (count);
5014 if (TARGET_PROLOGUE_USING_MOVE
5015 && cfun->machine->use_fast_prologue_epilogue)
5016 frame->save_regs_using_mov = true;
5018 frame->save_regs_using_mov = false;
5021 /* Skip return address and saved base pointer. */
5022 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5024 frame->hard_frame_pointer_offset = offset;
5026 /* Do some sanity checking of stack_alignment_needed and
5027 preferred_alignment, since i386 port is the only using those features
5028 that may break easily. */
5030 if (size && !stack_alignment_needed)
5032 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5034 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5036 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5039 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5040 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5042 /* Register save area */
5043 offset += frame->nregs * UNITS_PER_WORD;
5046 if (ix86_save_varrargs_registers)
5048 offset += X86_64_VARARGS_SIZE;
5049 frame->va_arg_size = X86_64_VARARGS_SIZE;
5052 frame->va_arg_size = 0;
5054 /* Align start of frame for local function. */
5055 frame->padding1 = ((offset + stack_alignment_needed - 1)
5056 & -stack_alignment_needed) - offset;
5058 offset += frame->padding1;
5060 /* Frame pointer points here. */
5061 frame->frame_pointer_offset = offset;
5065 /* Add outgoing arguments area. Can be skipped if we eliminated
5066 all the function calls as dead code.
5067 Skipping is however impossible when function calls alloca. Alloca
5068 expander assumes that last current_function_outgoing_args_size
5069 of stack frame are unused. */
5070 if (ACCUMULATE_OUTGOING_ARGS
5071 && (!current_function_is_leaf || current_function_calls_alloca))
5073 offset += current_function_outgoing_args_size;
5074 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5077 frame->outgoing_arguments_size = 0;
5079 /* Align stack boundary. Only needed if we're calling another function
5081 if (!current_function_is_leaf || current_function_calls_alloca)
5082 frame->padding2 = ((offset + preferred_alignment - 1)
5083 & -preferred_alignment) - offset;
5085 frame->padding2 = 0;
5087 offset += frame->padding2;
5089 /* We've reached end of stack frame. */
5090 frame->stack_pointer_offset = offset;
5092 /* Size prologue needs to allocate. */
5093 frame->to_allocate =
5094 (size + frame->padding1 + frame->padding2
5095 + frame->outgoing_arguments_size + frame->va_arg_size);
5097 if ((!frame->to_allocate && frame->nregs <= 1)
5098 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5099 frame->save_regs_using_mov = false;
5101 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5102 && current_function_is_leaf)
5104 frame->red_zone_size = frame->to_allocate;
5105 if (frame->save_regs_using_mov)
5106 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5107 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5108 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5111 frame->red_zone_size = 0;
5112 frame->to_allocate -= frame->red_zone_size;
5113 frame->stack_pointer_offset -= frame->red_zone_size;
5115 fprintf (stderr, "nregs: %i\n", frame->nregs);
5116 fprintf (stderr, "size: %i\n", size);
5117 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5118 fprintf (stderr, "padding1: %i\n", frame->padding1);
5119 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5120 fprintf (stderr, "padding2: %i\n", frame->padding2);
5121 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5122 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5123 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5124 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5125 frame->hard_frame_pointer_offset);
5126 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5130 /* Emit code to save registers in the prologue. */
5133 ix86_emit_save_regs (void)
5138 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5139 if (ix86_save_reg (regno, true))
5141 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5142 RTX_FRAME_RELATED_P (insn) = 1;
5146 /* Emit code to save registers using MOV insns. First register
5147 is restored from POINTER + OFFSET. */
5149 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5154 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5155 if (ix86_save_reg (regno, true))
5157 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5159 gen_rtx_REG (Pmode, regno));
5160 RTX_FRAME_RELATED_P (insn) = 1;
5161 offset += UNITS_PER_WORD;
5165 /* Expand prologue or epilogue stack adjustment.
5166 The pattern exist to put a dependency on all ebp-based memory accesses.
5167 STYLE should be negative if instructions should be marked as frame related,
5168 zero if %r11 register is live and cannot be freely used and positive
5172 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5177 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5178 else if (x86_64_immediate_operand (offset, DImode))
5179 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5183 /* r11 is used by indirect sibcall return as well, set before the
5184 epilogue and used after the epilogue. ATM indirect sibcall
5185 shouldn't be used together with huge frame sizes in one
5186 function because of the frame_size check in sibcall.c. */
5189 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5190 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5192 RTX_FRAME_RELATED_P (insn) = 1;
5193 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5197 RTX_FRAME_RELATED_P (insn) = 1;
5200 /* Expand the prologue into a bunch of separate insns. */
5203 ix86_expand_prologue (void)
5207 struct ix86_frame frame;
5208 HOST_WIDE_INT allocate;
5210 ix86_compute_frame_layout (&frame);
5212 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5213 slower on all targets. Also sdb doesn't like it. */
5215 if (frame_pointer_needed)
5217 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5218 RTX_FRAME_RELATED_P (insn) = 1;
5220 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5221 RTX_FRAME_RELATED_P (insn) = 1;
5224 allocate = frame.to_allocate;
5226 if (!frame.save_regs_using_mov)
5227 ix86_emit_save_regs ();
5229 allocate += frame.nregs * UNITS_PER_WORD;
5231 /* When using red zone we may start register saving before allocating
5232 the stack frame saving one cycle of the prologue. */
5233 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5234 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5235 : stack_pointer_rtx,
5236 -frame.nregs * UNITS_PER_WORD);
5240 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5241 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5242 GEN_INT (-allocate), -1);
5245 /* Only valid for Win32. */
5246 rtx eax = gen_rtx_REG (SImode, 0);
5247 bool eax_live = ix86_eax_live_at_start_p ();
5254 emit_insn (gen_push (eax));
5258 insn = emit_move_insn (eax, GEN_INT (allocate));
5259 RTX_FRAME_RELATED_P (insn) = 1;
5261 insn = emit_insn (gen_allocate_stack_worker (eax));
5262 RTX_FRAME_RELATED_P (insn) = 1;
5266 rtx t = plus_constant (stack_pointer_rtx, allocate);
5267 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5271 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5273 if (!frame_pointer_needed || !frame.to_allocate)
5274 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5276 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5277 -frame.nregs * UNITS_PER_WORD);
5280 pic_reg_used = false;
5281 if (pic_offset_table_rtx
5282 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5283 || current_function_profile))
5285 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5287 if (alt_pic_reg_used != INVALID_REGNUM)
5288 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5290 pic_reg_used = true;
5295 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5297 /* Even with accurate pre-reload life analysis, we can wind up
5298 deleting all references to the pic register after reload.
5299 Consider if cross-jumping unifies two sides of a branch
5300 controlled by a comparison vs the only read from a global.
5301 In which case, allow the set_got to be deleted, though we're
5302 too late to do anything about the ebx save in the prologue. */
5303 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5306 /* Prevent function calls from be scheduled before the call to mcount.
5307 In the pic_reg_used case, make sure that the got load isn't deleted. */
5308 if (current_function_profile)
5309 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5312 /* Emit code to restore saved registers using MOV insns. First register
5313 is restored from POINTER + OFFSET. */
5315 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5316 int maybe_eh_return)
5319 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5321 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5322 if (ix86_save_reg (regno, maybe_eh_return))
5324 /* Ensure that adjust_address won't be forced to produce pointer
5325 out of range allowed by x86-64 instruction set. */
5326 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5330 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5331 emit_move_insn (r11, GEN_INT (offset));
5332 emit_insn (gen_adddi3 (r11, r11, pointer));
5333 base_address = gen_rtx_MEM (Pmode, r11);
5336 emit_move_insn (gen_rtx_REG (Pmode, regno),
5337 adjust_address (base_address, Pmode, offset));
5338 offset += UNITS_PER_WORD;
5342 /* Restore function stack, frame, and registers. */
5345 ix86_expand_epilogue (int style)
5348 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5349 struct ix86_frame frame;
5350 HOST_WIDE_INT offset;
5352 ix86_compute_frame_layout (&frame);
5354 /* Calculate start of saved registers relative to ebp. Special care
5355 must be taken for the normal return case of a function using
5356 eh_return: the eax and edx registers are marked as saved, but not
5357 restored along this path. */
5358 offset = frame.nregs;
5359 if (current_function_calls_eh_return && style != 2)
5361 offset *= -UNITS_PER_WORD;
5363 /* If we're only restoring one register and sp is not valid then
5364 using a move instruction to restore the register since it's
5365 less work than reloading sp and popping the register.
5367 The default code result in stack adjustment using add/lea instruction,
5368 while this code results in LEAVE instruction (or discrete equivalent),
5369 so it is profitable in some other cases as well. Especially when there
5370 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5371 and there is exactly one register to pop. This heuristic may need some
5372 tuning in future. */
5373 if ((!sp_valid && frame.nregs <= 1)
5374 || (TARGET_EPILOGUE_USING_MOVE
5375 && cfun->machine->use_fast_prologue_epilogue
5376 && (frame.nregs > 1 || frame.to_allocate))
5377 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5378 || (frame_pointer_needed && TARGET_USE_LEAVE
5379 && cfun->machine->use_fast_prologue_epilogue
5380 && frame.nregs == 1)
5381 || current_function_calls_eh_return)
5383 /* Restore registers. We can use ebp or esp to address the memory
5384 locations. If both are available, default to ebp, since offsets
5385 are known to be small. Only exception is esp pointing directly to the
5386 end of block of saved registers, where we may simplify addressing
5389 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5390 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5391 frame.to_allocate, style == 2);
5393 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5394 offset, style == 2);
5396 /* eh_return epilogues need %ecx added to the stack pointer. */
5399 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5401 if (frame_pointer_needed)
5403 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5404 tmp = plus_constant (tmp, UNITS_PER_WORD);
5405 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5407 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5408 emit_move_insn (hard_frame_pointer_rtx, tmp);
5410 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5415 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5416 tmp = plus_constant (tmp, (frame.to_allocate
5417 + frame.nregs * UNITS_PER_WORD));
5418 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5421 else if (!frame_pointer_needed)
5422 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5423 GEN_INT (frame.to_allocate
5424 + frame.nregs * UNITS_PER_WORD),
5426 /* If not an i386, mov & pop is faster than "leave". */
5427 else if (TARGET_USE_LEAVE || optimize_size
5428 || !cfun->machine->use_fast_prologue_epilogue)
5429 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5432 pro_epilogue_adjust_stack (stack_pointer_rtx,
5433 hard_frame_pointer_rtx,
5436 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5438 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5443 /* First step is to deallocate the stack frame so that we can
5444 pop the registers. */
5447 if (!frame_pointer_needed)
5449 pro_epilogue_adjust_stack (stack_pointer_rtx,
5450 hard_frame_pointer_rtx,
5451 GEN_INT (offset), style);
5453 else if (frame.to_allocate)
5454 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5455 GEN_INT (frame.to_allocate), style);
5457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5458 if (ix86_save_reg (regno, false))
5461 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5463 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5465 if (frame_pointer_needed)
5467 /* Leave results in shorter dependency chains on CPUs that are
5468 able to grok it fast. */
5469 if (TARGET_USE_LEAVE)
5470 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5471 else if (TARGET_64BIT)
5472 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5474 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5478 /* Sibcall epilogues don't want a return instruction. */
5482 if (current_function_pops_args && current_function_args_size)
5484 rtx popc = GEN_INT (current_function_pops_args);
5486 /* i386 can only pop 64K bytes. If asked to pop more, pop
5487 return address, do explicit add, and jump indirectly to the
5490 if (current_function_pops_args >= 65536)
5492 rtx ecx = gen_rtx_REG (SImode, 2);
5494 /* There is no "pascal" calling convention in 64bit ABI. */
5498 emit_insn (gen_popsi1 (ecx));
5499 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5500 emit_jump_insn (gen_return_indirect_internal (ecx));
5503 emit_jump_insn (gen_return_pop_internal (popc));
5506 emit_jump_insn (gen_return_internal ());
5509 /* Reset from the function's potential modifications. */
5512 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5513 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5515 if (pic_offset_table_rtx)
5516 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5519 /* Extract the parts of an RTL expression that is a valid memory address
5520 for an instruction. Return 0 if the structure of the address is
5521 grossly off. Return -1 if the address contains ASHIFT, so it is not
5522 strictly valid, but still used for computing length of lea instruction. */
5525 ix86_decompose_address (rtx addr, struct ix86_address *out)
5527 rtx base = NULL_RTX;
5528 rtx index = NULL_RTX;
5529 rtx disp = NULL_RTX;
5530 HOST_WIDE_INT scale = 1;
5531 rtx scale_rtx = NULL_RTX;
5533 enum ix86_address_seg seg = SEG_DEFAULT;
5535 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5537 else if (GET_CODE (addr) == PLUS)
5547 addends[n++] = XEXP (op, 1);
5550 while (GET_CODE (op) == PLUS);
5555 for (i = n; i >= 0; --i)
5558 switch (GET_CODE (op))
5563 index = XEXP (op, 0);
5564 scale_rtx = XEXP (op, 1);
5568 if (XINT (op, 1) == UNSPEC_TP
5569 && TARGET_TLS_DIRECT_SEG_REFS
5570 && seg == SEG_DEFAULT)
5571 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5600 else if (GET_CODE (addr) == MULT)
5602 index = XEXP (addr, 0); /* index*scale */
5603 scale_rtx = XEXP (addr, 1);
5605 else if (GET_CODE (addr) == ASHIFT)
5609 /* We're called for lea too, which implements ashift on occasion. */
5610 index = XEXP (addr, 0);
5611 tmp = XEXP (addr, 1);
5612 if (GET_CODE (tmp) != CONST_INT)
5614 scale = INTVAL (tmp);
5615 if ((unsigned HOST_WIDE_INT) scale > 3)
5621 disp = addr; /* displacement */
5623 /* Extract the integral value of scale. */
5626 if (GET_CODE (scale_rtx) != CONST_INT)
5628 scale = INTVAL (scale_rtx);
5631 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5632 if (base && index && scale == 1
5633 && (index == arg_pointer_rtx
5634 || index == frame_pointer_rtx
5635 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5642 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5643 if ((base == hard_frame_pointer_rtx
5644 || base == frame_pointer_rtx
5645 || base == arg_pointer_rtx) && !disp)
5648 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5649 Avoid this by transforming to [%esi+0]. */
5650 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5651 && base && !index && !disp
5653 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5656 /* Special case: encode reg+reg instead of reg*2. */
5657 if (!base && index && scale && scale == 2)
5658 base = index, scale = 1;
5660 /* Special case: scaling cannot be encoded without base or displacement. */
5661 if (!base && !disp && index && scale != 1)
5673 /* Return cost of the memory address x.
5674 For i386, it is better to use a complex address than let gcc copy
5675 the address into a reg and make a new pseudo. But not if the address
5676 requires to two regs - that would mean more pseudos with longer
5679 ix86_address_cost (rtx x)
5681 struct ix86_address parts;
5684 if (!ix86_decompose_address (x, &parts))
5687 /* More complex memory references are better. */
5688 if (parts.disp && parts.disp != const0_rtx)
5690 if (parts.seg != SEG_DEFAULT)
5693 /* Attempt to minimize number of registers in the address. */
5695 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5697 && (!REG_P (parts.index)
5698 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5702 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5704 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5705 && parts.base != parts.index)
5708 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5709 since it's predecode logic can't detect the length of instructions
5710 and it degenerates to vector decoded. Increase cost of such
5711 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5712 to split such addresses or even refuse such addresses at all.
5714 Following addressing modes are affected:
5719 The first and last case may be avoidable by explicitly coding the zero in
5720 memory address, but I don't have AMD-K6 machine handy to check this
5724 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5725 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5726 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5732 /* If X is a machine specific address (i.e. a symbol or label being
5733 referenced as a displacement from the GOT implemented using an
5734 UNSPEC), then return the base term. Otherwise return X. */
5737 ix86_find_base_term (rtx x)
5743 if (GET_CODE (x) != CONST)
5746 if (GET_CODE (term) == PLUS
5747 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5748 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5749 term = XEXP (term, 0);
5750 if (GET_CODE (term) != UNSPEC
5751 || XINT (term, 1) != UNSPEC_GOTPCREL)
5754 term = XVECEXP (term, 0, 0);
5756 if (GET_CODE (term) != SYMBOL_REF
5757 && GET_CODE (term) != LABEL_REF)
5763 term = ix86_delegitimize_address (x);
5765 if (GET_CODE (term) != SYMBOL_REF
5766 && GET_CODE (term) != LABEL_REF)
5772 /* Determine if a given RTX is a valid constant. We already know this
5773 satisfies CONSTANT_P. */
5776 legitimate_constant_p (rtx x)
5778 switch (GET_CODE (x))
5783 if (GET_CODE (x) == PLUS)
5785 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5790 /* Only some unspecs are valid as "constants". */
5791 if (GET_CODE (x) == UNSPEC)
5792 switch (XINT (x, 1))
5796 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5798 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5803 /* We must have drilled down to a symbol. */
5804 if (!symbolic_operand (x, Pmode))
5809 /* TLS symbols are never valid. */
5810 if (tls_symbolic_operand (x, Pmode))
5818 /* Otherwise we handle everything else in the move patterns. */
5822 /* Determine if it's legal to put X into the constant pool. This
5823 is not possible for the address of thread-local symbols, which
5824 is checked above. */
5827 ix86_cannot_force_const_mem (rtx x)
5829 return !legitimate_constant_p (x);
5832 /* Determine if a given RTX is a valid constant address. */
5835 constant_address_p (rtx x)
5837 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5840 /* Nonzero if the constant value X is a legitimate general operand
5841 when generating PIC code. It is given that flag_pic is on and
5842 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5845 legitimate_pic_operand_p (rtx x)
5849 switch (GET_CODE (x))
5852 inner = XEXP (x, 0);
5854 /* Only some unspecs are valid as "constants". */
5855 if (GET_CODE (inner) == UNSPEC)
5856 switch (XINT (inner, 1))
5859 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5867 return legitimate_pic_address_disp_p (x);
5874 /* Determine if a given CONST RTX is a valid memory displacement
5878 legitimate_pic_address_disp_p (rtx disp)
5882 /* In 64bit mode we can allow direct addresses of symbols and labels
5883 when they are not dynamic symbols. */
5886 /* TLS references should always be enclosed in UNSPEC. */
5887 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5889 if (GET_CODE (disp) == SYMBOL_REF
5890 && ix86_cmodel == CM_SMALL_PIC
5891 && SYMBOL_REF_LOCAL_P (disp))
5893 if (GET_CODE (disp) == LABEL_REF)
5895 if (GET_CODE (disp) == CONST
5896 && GET_CODE (XEXP (disp, 0)) == PLUS)
5898 rtx op0 = XEXP (XEXP (disp, 0), 0);
5899 rtx op1 = XEXP (XEXP (disp, 0), 1);
5901 /* TLS references should always be enclosed in UNSPEC. */
5902 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5904 if (((GET_CODE (op0) == SYMBOL_REF
5905 && ix86_cmodel == CM_SMALL_PIC
5906 && SYMBOL_REF_LOCAL_P (op0))
5907 || GET_CODE (op0) == LABEL_REF)
5908 && GET_CODE (op1) == CONST_INT
5909 && INTVAL (op1) < 16*1024*1024
5910 && INTVAL (op1) >= -16*1024*1024)
5914 if (GET_CODE (disp) != CONST)
5916 disp = XEXP (disp, 0);
5920 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5921 of GOT tables. We should not need these anyway. */
5922 if (GET_CODE (disp) != UNSPEC
5923 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5926 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5927 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5933 if (GET_CODE (disp) == PLUS)
5935 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5937 disp = XEXP (disp, 0);
5941 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5942 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5944 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5945 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5946 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5948 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5949 if (! strcmp (sym_name, "<pic base>"))
5954 if (GET_CODE (disp) != UNSPEC)
5957 switch (XINT (disp, 1))
5962 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5964 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5965 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5966 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5968 case UNSPEC_GOTTPOFF:
5969 case UNSPEC_GOTNTPOFF:
5970 case UNSPEC_INDNTPOFF:
5973 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5975 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5977 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5983 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5984 memory address for an instruction. The MODE argument is the machine mode
5985 for the MEM expression that wants to use this address.
5987 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5988 convert common non-canonical forms to canonical form so that they will
5992 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5994 struct ix86_address parts;
5995 rtx base, index, disp;
5996 HOST_WIDE_INT scale;
5997 const char *reason = NULL;
5998 rtx reason_rtx = NULL_RTX;
6000 if (TARGET_DEBUG_ADDR)
6003 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6004 GET_MODE_NAME (mode), strict);
6008 if (ix86_decompose_address (addr, &parts) <= 0)
6010 reason = "decomposition failed";
6015 index = parts.index;
6017 scale = parts.scale;
6019 /* Validate base register.
6021 Don't allow SUBREG's here, it can lead to spill failures when the base
6022 is one word out of a two word structure, which is represented internally
6029 if (GET_CODE (base) != REG)
6031 reason = "base is not a register";
6035 if (GET_MODE (base) != Pmode)
6037 reason = "base is not in Pmode";
6041 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6042 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6044 reason = "base is not valid";
6049 /* Validate index register.
6051 Don't allow SUBREG's here, it can lead to spill failures when the index
6052 is one word out of a two word structure, which is represented internally
6059 if (GET_CODE (index) != REG)
6061 reason = "index is not a register";
6065 if (GET_MODE (index) != Pmode)
6067 reason = "index is not in Pmode";
6071 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6072 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6074 reason = "index is not valid";
6079 /* Validate scale factor. */
6082 reason_rtx = GEN_INT (scale);
6085 reason = "scale without index";
6089 if (scale != 2 && scale != 4 && scale != 8)
6091 reason = "scale is not a valid multiplier";
6096 /* Validate displacement. */
6101 if (GET_CODE (disp) == CONST
6102 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6103 switch (XINT (XEXP (disp, 0), 1))
6107 case UNSPEC_GOTPCREL:
6110 goto is_legitimate_pic;
6112 case UNSPEC_GOTTPOFF:
6113 case UNSPEC_GOTNTPOFF:
6114 case UNSPEC_INDNTPOFF:
6120 reason = "invalid address unspec";
6124 else if (flag_pic && (SYMBOLIC_CONST (disp)
6126 && !machopic_operand_p (disp)
6131 if (TARGET_64BIT && (index || base))
6133 /* foo@dtpoff(%rX) is ok. */
6134 if (GET_CODE (disp) != CONST
6135 || GET_CODE (XEXP (disp, 0)) != PLUS
6136 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6137 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6138 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6139 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6141 reason = "non-constant pic memory reference";
6145 else if (! legitimate_pic_address_disp_p (disp))
6147 reason = "displacement is an invalid pic construct";
6151 /* This code used to verify that a symbolic pic displacement
6152 includes the pic_offset_table_rtx register.
6154 While this is good idea, unfortunately these constructs may
6155 be created by "adds using lea" optimization for incorrect
6164 This code is nonsensical, but results in addressing
6165 GOT table with pic_offset_table_rtx base. We can't
6166 just refuse it easily, since it gets matched by
6167 "addsi3" pattern, that later gets split to lea in the
6168 case output register differs from input. While this
6169 can be handled by separate addsi pattern for this case
6170 that never results in lea, this seems to be easier and
6171 correct fix for crash to disable this test. */
6173 else if (GET_CODE (disp) != LABEL_REF
6174 && GET_CODE (disp) != CONST_INT
6175 && (GET_CODE (disp) != CONST
6176 || !legitimate_constant_p (disp))
6177 && (GET_CODE (disp) != SYMBOL_REF
6178 || !legitimate_constant_p (disp)))
6180 reason = "displacement is not constant";
6183 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6185 reason = "displacement is out of range";
6190 /* Everything looks valid. */
6191 if (TARGET_DEBUG_ADDR)
6192 fprintf (stderr, "Success.\n");
6196 if (TARGET_DEBUG_ADDR)
6198 fprintf (stderr, "Error: %s\n", reason);
6199 debug_rtx (reason_rtx);
6204 /* Return an unique alias set for the GOT. */
6206 static HOST_WIDE_INT
6207 ix86_GOT_alias_set (void)
6209 static HOST_WIDE_INT set = -1;
6211 set = new_alias_set ();
6215 /* Return a legitimate reference for ORIG (an address) using the
6216 register REG. If REG is 0, a new pseudo is generated.
6218 There are two types of references that must be handled:
6220 1. Global data references must load the address from the GOT, via
6221 the PIC reg. An insn is emitted to do this load, and the reg is
6224 2. Static data references, constant pool addresses, and code labels
6225 compute the address as an offset from the GOT, whose base is in
6226 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6227 differentiate them from global data objects. The returned
6228 address is the PIC reg + an unspec constant.
6230 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6231 reg also appears in the address. */
6234 legitimize_pic_address (rtx orig, rtx reg)
6242 reg = gen_reg_rtx (Pmode);
6243 /* Use the generic Mach-O PIC machinery. */
6244 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6247 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6249 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6251 /* This symbol may be referenced via a displacement from the PIC
6252 base address (@GOTOFF). */
6254 if (reload_in_progress)
6255 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6256 if (GET_CODE (addr) == CONST)
6257 addr = XEXP (addr, 0);
6258 if (GET_CODE (addr) == PLUS)
6260 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6261 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6264 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6265 new = gen_rtx_CONST (Pmode, new);
6266 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6270 emit_move_insn (reg, new);
6274 else if (GET_CODE (addr) == SYMBOL_REF)
6278 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6279 new = gen_rtx_CONST (Pmode, new);
6280 new = gen_rtx_MEM (Pmode, new);
6281 RTX_UNCHANGING_P (new) = 1;
6282 set_mem_alias_set (new, ix86_GOT_alias_set ());
6285 reg = gen_reg_rtx (Pmode);
6286 /* Use directly gen_movsi, otherwise the address is loaded
6287 into register for CSE. We don't want to CSE this addresses,
6288 instead we CSE addresses from the GOT table, so skip this. */
6289 emit_insn (gen_movsi (reg, new));
6294 /* This symbol must be referenced via a load from the
6295 Global Offset Table (@GOT). */
6297 if (reload_in_progress)
6298 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6299 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6300 new = gen_rtx_CONST (Pmode, new);
6301 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6302 new = gen_rtx_MEM (Pmode, new);
6303 RTX_UNCHANGING_P (new) = 1;
6304 set_mem_alias_set (new, ix86_GOT_alias_set ());
6307 reg = gen_reg_rtx (Pmode);
6308 emit_move_insn (reg, new);
6314 if (GET_CODE (addr) == CONST)
6316 addr = XEXP (addr, 0);
6318 /* We must match stuff we generate before. Assume the only
6319 unspecs that can get here are ours. Not that we could do
6320 anything with them anyway.... */
6321 if (GET_CODE (addr) == UNSPEC
6322 || (GET_CODE (addr) == PLUS
6323 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6325 if (GET_CODE (addr) != PLUS)
6328 if (GET_CODE (addr) == PLUS)
6330 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6332 /* Check first to see if this is a constant offset from a @GOTOFF
6333 symbol reference. */
6334 if (local_symbolic_operand (op0, Pmode)
6335 && GET_CODE (op1) == CONST_INT)
6339 if (reload_in_progress)
6340 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6341 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6343 new = gen_rtx_PLUS (Pmode, new, op1);
6344 new = gen_rtx_CONST (Pmode, new);
6345 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6349 emit_move_insn (reg, new);
6355 if (INTVAL (op1) < -16*1024*1024
6356 || INTVAL (op1) >= 16*1024*1024)
6357 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6362 base = legitimize_pic_address (XEXP (addr, 0), reg);
6363 new = legitimize_pic_address (XEXP (addr, 1),
6364 base == reg ? NULL_RTX : reg);
6366 if (GET_CODE (new) == CONST_INT)
6367 new = plus_constant (base, INTVAL (new));
6370 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6372 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6373 new = XEXP (new, 1);
6375 new = gen_rtx_PLUS (Pmode, base, new);
6383 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6386 get_thread_pointer (int to_reg)
6390 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6394 reg = gen_reg_rtx (Pmode);
6395 insn = gen_rtx_SET (VOIDmode, reg, tp);
6396 insn = emit_insn (insn);
6401 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6402 false if we expect this to be used for a memory address and true if
6403 we expect to load the address into a register. */
6406 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6408 rtx dest, base, off, pic;
6413 case TLS_MODEL_GLOBAL_DYNAMIC:
6414 dest = gen_reg_rtx (Pmode);
6417 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6420 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6421 insns = get_insns ();
6424 emit_libcall_block (insns, dest, rax, x);
6427 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6430 case TLS_MODEL_LOCAL_DYNAMIC:
6431 base = gen_reg_rtx (Pmode);
6434 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6437 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6438 insns = get_insns ();
6441 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6442 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6443 emit_libcall_block (insns, base, rax, note);
6446 emit_insn (gen_tls_local_dynamic_base_32 (base));
6448 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6449 off = gen_rtx_CONST (Pmode, off);
6451 return gen_rtx_PLUS (Pmode, base, off);
6453 case TLS_MODEL_INITIAL_EXEC:
6457 type = UNSPEC_GOTNTPOFF;
6461 if (reload_in_progress)
6462 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6463 pic = pic_offset_table_rtx;
6464 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6466 else if (!TARGET_GNU_TLS)
6468 pic = gen_reg_rtx (Pmode);
6469 emit_insn (gen_set_got (pic));
6470 type = UNSPEC_GOTTPOFF;
6475 type = UNSPEC_INDNTPOFF;
6478 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6479 off = gen_rtx_CONST (Pmode, off);
6481 off = gen_rtx_PLUS (Pmode, pic, off);
6482 off = gen_rtx_MEM (Pmode, off);
6483 RTX_UNCHANGING_P (off) = 1;
6484 set_mem_alias_set (off, ix86_GOT_alias_set ());
6486 if (TARGET_64BIT || TARGET_GNU_TLS)
6488 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6489 off = force_reg (Pmode, off);
6490 return gen_rtx_PLUS (Pmode, base, off);
6494 base = get_thread_pointer (true);
6495 dest = gen_reg_rtx (Pmode);
6496 emit_insn (gen_subsi3 (dest, base, off));
6500 case TLS_MODEL_LOCAL_EXEC:
6501 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6502 (TARGET_64BIT || TARGET_GNU_TLS)
6503 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6504 off = gen_rtx_CONST (Pmode, off);
6506 if (TARGET_64BIT || TARGET_GNU_TLS)
6508 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6509 return gen_rtx_PLUS (Pmode, base, off);
6513 base = get_thread_pointer (true);
6514 dest = gen_reg_rtx (Pmode);
6515 emit_insn (gen_subsi3 (dest, base, off));
6526 /* Try machine-dependent ways of modifying an illegitimate address
6527 to be legitimate. If we find one, return the new, valid address.
6528 This macro is used in only one place: `memory_address' in explow.c.
6530 OLDX is the address as it was before break_out_memory_refs was called.
6531 In some cases it is useful to look at this to decide what needs to be done.
6533 MODE and WIN are passed so that this macro can use
6534 GO_IF_LEGITIMATE_ADDRESS.
6536 It is always safe for this macro to do nothing. It exists to recognize
6537 opportunities to optimize the output.
6539 For the 80386, we handle X+REG by loading X into a register R and
6540 using R+REG. R will go in a general reg and indexing will be used.
6541 However, if REG is a broken-out memory address or multiplication,
6542 nothing needs to be done because REG can certainly go in a general reg.
6544 When -fpic is used, special handling is needed for symbolic references.
6545 See comments by legitimize_pic_address in i386.c for details. */
6548 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6553 if (TARGET_DEBUG_ADDR)
6555 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6556 GET_MODE_NAME (mode));
6560 log = tls_symbolic_operand (x, mode);
6562 return legitimize_tls_address (x, log, false);
6564 if (flag_pic && SYMBOLIC_CONST (x))
6565 return legitimize_pic_address (x, 0);
6567 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6568 if (GET_CODE (x) == ASHIFT
6569 && GET_CODE (XEXP (x, 1)) == CONST_INT
6570 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6573 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6574 GEN_INT (1 << log));
6577 if (GET_CODE (x) == PLUS)
6579 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6581 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6582 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6583 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6586 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6587 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6588 GEN_INT (1 << log));
6591 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6592 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6593 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6596 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6597 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6598 GEN_INT (1 << log));
6601 /* Put multiply first if it isn't already. */
6602 if (GET_CODE (XEXP (x, 1)) == MULT)
6604 rtx tmp = XEXP (x, 0);
6605 XEXP (x, 0) = XEXP (x, 1);
6610 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6611 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6612 created by virtual register instantiation, register elimination, and
6613 similar optimizations. */
6614 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6617 x = gen_rtx_PLUS (Pmode,
6618 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6619 XEXP (XEXP (x, 1), 0)),
6620 XEXP (XEXP (x, 1), 1));
6624 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6625 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6626 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6627 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6629 && CONSTANT_P (XEXP (x, 1)))
6632 rtx other = NULL_RTX;
6634 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6636 constant = XEXP (x, 1);
6637 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6639 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6641 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6642 other = XEXP (x, 1);
6650 x = gen_rtx_PLUS (Pmode,
6651 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6652 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6653 plus_constant (other, INTVAL (constant)));
6657 if (changed && legitimate_address_p (mode, x, FALSE))
6660 if (GET_CODE (XEXP (x, 0)) == MULT)
6663 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6666 if (GET_CODE (XEXP (x, 1)) == MULT)
6669 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6673 && GET_CODE (XEXP (x, 1)) == REG
6674 && GET_CODE (XEXP (x, 0)) == REG)
6677 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6680 x = legitimize_pic_address (x, 0);
6683 if (changed && legitimate_address_p (mode, x, FALSE))
6686 if (GET_CODE (XEXP (x, 0)) == REG)
6688 rtx temp = gen_reg_rtx (Pmode);
6689 rtx val = force_operand (XEXP (x, 1), temp);
6691 emit_move_insn (temp, val);
6697 else if (GET_CODE (XEXP (x, 1)) == REG)
6699 rtx temp = gen_reg_rtx (Pmode);
6700 rtx val = force_operand (XEXP (x, 0), temp);
6702 emit_move_insn (temp, val);
6712 /* Print an integer constant expression in assembler syntax. Addition
6713 and subtraction are the only arithmetic that may appear in these
6714 expressions. FILE is the stdio stream to write to, X is the rtx, and
6715 CODE is the operand print code from the output string. */
6718 output_pic_addr_const (FILE *file, rtx x, int code)
6722 switch (GET_CODE (x))
6732 assemble_name (file, XSTR (x, 0));
6733 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6734 fputs ("@PLT", file);
6741 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6742 assemble_name (asm_out_file, buf);
6746 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6750 /* This used to output parentheses around the expression,
6751 but that does not work on the 386 (either ATT or BSD assembler). */
6752 output_pic_addr_const (file, XEXP (x, 0), code);
6756 if (GET_MODE (x) == VOIDmode)
6758 /* We can use %d if the number is <32 bits and positive. */
6759 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6760 fprintf (file, "0x%lx%08lx",
6761 (unsigned long) CONST_DOUBLE_HIGH (x),
6762 (unsigned long) CONST_DOUBLE_LOW (x));
6764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6767 /* We can't handle floating point constants;
6768 PRINT_OPERAND must handle them. */
6769 output_operand_lossage ("floating constant misused");
6773 /* Some assemblers need integer constants to appear first. */
6774 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6776 output_pic_addr_const (file, XEXP (x, 0), code);
6778 output_pic_addr_const (file, XEXP (x, 1), code);
6780 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6782 output_pic_addr_const (file, XEXP (x, 1), code);
6784 output_pic_addr_const (file, XEXP (x, 0), code);
6792 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6793 output_pic_addr_const (file, XEXP (x, 0), code);
6795 output_pic_addr_const (file, XEXP (x, 1), code);
6797 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6801 if (XVECLEN (x, 0) != 1)
6803 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6804 switch (XINT (x, 1))
6807 fputs ("@GOT", file);
6810 fputs ("@GOTOFF", file);
6812 case UNSPEC_GOTPCREL:
6813 fputs ("@GOTPCREL(%rip)", file);
6815 case UNSPEC_GOTTPOFF:
6816 /* FIXME: This might be @TPOFF in Sun ld too. */
6817 fputs ("@GOTTPOFF", file);
6820 fputs ("@TPOFF", file);
6824 fputs ("@TPOFF", file);
6826 fputs ("@NTPOFF", file);
6829 fputs ("@DTPOFF", file);
6831 case UNSPEC_GOTNTPOFF:
6833 fputs ("@GOTTPOFF(%rip)", file);
6835 fputs ("@GOTNTPOFF", file);
6837 case UNSPEC_INDNTPOFF:
6838 fputs ("@INDNTPOFF", file);
6841 output_operand_lossage ("invalid UNSPEC as operand");
6847 output_operand_lossage ("invalid expression as operand");
6851 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6852 We need to handle our special PIC relocations. */
6855 i386_dwarf_output_addr_const (FILE *file, rtx x)
6858 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6862 fprintf (file, "%s", ASM_LONG);
6865 output_pic_addr_const (file, x, '\0');
6867 output_addr_const (file, x);
6871 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6872 We need to emit DTP-relative relocations. */
6875 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6877 fputs (ASM_LONG, file);
6878 output_addr_const (file, x);
6879 fputs ("@DTPOFF", file);
6885 fputs (", 0", file);
6892 /* In the name of slightly smaller debug output, and to cater to
6893 general assembler losage, recognize PIC+GOTOFF and turn it back
6894 into a direct symbol reference. */
6897 ix86_delegitimize_address (rtx orig_x)
6901 if (GET_CODE (x) == MEM)
6906 if (GET_CODE (x) != CONST
6907 || GET_CODE (XEXP (x, 0)) != UNSPEC
6908 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6909 || GET_CODE (orig_x) != MEM)
6911 return XVECEXP (XEXP (x, 0), 0, 0);
6914 if (GET_CODE (x) != PLUS
6915 || GET_CODE (XEXP (x, 1)) != CONST)
6918 if (GET_CODE (XEXP (x, 0)) == REG
6919 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6920 /* %ebx + GOT/GOTOFF */
6922 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6924 /* %ebx + %reg * scale + GOT/GOTOFF */
6926 if (GET_CODE (XEXP (y, 0)) == REG
6927 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6929 else if (GET_CODE (XEXP (y, 1)) == REG
6930 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6934 if (GET_CODE (y) != REG
6935 && GET_CODE (y) != MULT
6936 && GET_CODE (y) != ASHIFT)
6942 x = XEXP (XEXP (x, 1), 0);
6943 if (GET_CODE (x) == UNSPEC
6944 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6945 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6948 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6949 return XVECEXP (x, 0, 0);
6952 if (GET_CODE (x) == PLUS
6953 && GET_CODE (XEXP (x, 0)) == UNSPEC
6954 && GET_CODE (XEXP (x, 1)) == CONST_INT
6955 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6956 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6957 && GET_CODE (orig_x) != MEM)))
6959 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6961 return gen_rtx_PLUS (Pmode, y, x);
6969 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6974 if (mode == CCFPmode || mode == CCFPUmode)
6976 enum rtx_code second_code, bypass_code;
6977 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6978 if (bypass_code != NIL || second_code != NIL)
6980 code = ix86_fp_compare_code_to_integer (code);
6984 code = reverse_condition (code);
6995 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7000 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7001 Those same assemblers have the same but opposite losage on cmov. */
7004 suffix = fp ? "nbe" : "a";
7007 if (mode == CCNOmode || mode == CCGOCmode)
7009 else if (mode == CCmode || mode == CCGCmode)
7020 if (mode == CCNOmode || mode == CCGOCmode)
7022 else if (mode == CCmode || mode == CCGCmode)
7031 suffix = fp ? "nb" : "ae";
7034 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7044 suffix = fp ? "u" : "p";
7047 suffix = fp ? "nu" : "np";
7052 fputs (suffix, file);
7055 /* Print the name of register X to FILE based on its machine mode and number.
7056 If CODE is 'w', pretend the mode is HImode.
7057 If CODE is 'b', pretend the mode is QImode.
7058 If CODE is 'k', pretend the mode is SImode.
7059 If CODE is 'q', pretend the mode is DImode.
7060 If CODE is 'h', pretend the reg is the `high' byte register.
7061 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7064 print_reg (rtx x, int code, FILE *file)
7066 if (REGNO (x) == ARG_POINTER_REGNUM
7067 || REGNO (x) == FRAME_POINTER_REGNUM
7068 || REGNO (x) == FLAGS_REG
7069 || REGNO (x) == FPSR_REG)
7072 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7075 if (code == 'w' || MMX_REG_P (x))
7077 else if (code == 'b')
7079 else if (code == 'k')
7081 else if (code == 'q')
7083 else if (code == 'y')
7085 else if (code == 'h')
7088 code = GET_MODE_SIZE (GET_MODE (x));
7090 /* Irritatingly, AMD extended registers use different naming convention
7091 from the normal registers. */
7092 if (REX_INT_REG_P (x))
7099 error ("extended registers have no high halves");
7102 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7105 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7108 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7111 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7114 error ("unsupported operand size for extended register");
7122 if (STACK_TOP_P (x))
7124 fputs ("st(0)", file);
7131 if (! ANY_FP_REG_P (x))
7132 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7137 fputs (hi_reg_name[REGNO (x)], file);
7140 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7142 fputs (qi_reg_name[REGNO (x)], file);
7145 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7147 fputs (qi_high_reg_name[REGNO (x)], file);
7154 /* Locate some local-dynamic symbol still in use by this function
7155 so that we can print its name in some tls_local_dynamic_base
7159 get_some_local_dynamic_name (void)
7163 if (cfun->machine->some_ld_name)
7164 return cfun->machine->some_ld_name;
7166 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7168 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7169 return cfun->machine->some_ld_name;
7175 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7179 if (GET_CODE (x) == SYMBOL_REF
7180 && local_dynamic_symbolic_operand (x, Pmode))
7182 cfun->machine->some_ld_name = XSTR (x, 0);
7190 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7191 C -- print opcode suffix for set/cmov insn.
7192 c -- like C, but print reversed condition
7193 F,f -- likewise, but for floating-point.
7194 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7196 R -- print the prefix for register names.
7197 z -- print the opcode suffix for the size of the current operand.
7198 * -- print a star (in certain assembler syntax)
7199 A -- print an absolute memory reference.
7200 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7201 s -- print a shift double count, followed by the assemblers argument
7203 b -- print the QImode name of the register for the indicated operand.
7204 %b0 would print %al if operands[0] is reg 0.
7205 w -- likewise, print the HImode name of the register.
7206 k -- likewise, print the SImode name of the register.
7207 q -- likewise, print the DImode name of the register.
7208 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7209 y -- print "st(0)" instead of "st" as a register.
7210 D -- print condition for SSE cmp instruction.
7211 P -- if PIC, print an @PLT suffix.
7212 X -- don't print any sort of PIC '@' suffix for a symbol.
7213 & -- print some in-use local-dynamic symbol name.
7217 print_operand (FILE *file, rtx x, int code)
7224 if (ASSEMBLER_DIALECT == ASM_ATT)
7229 assemble_name (file, get_some_local_dynamic_name ());
7233 if (ASSEMBLER_DIALECT == ASM_ATT)
7235 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7237 /* Intel syntax. For absolute addresses, registers should not
7238 be surrounded by braces. */
7239 if (GET_CODE (x) != REG)
7242 PRINT_OPERAND (file, x, 0);
7250 PRINT_OPERAND (file, x, 0);
7255 if (ASSEMBLER_DIALECT == ASM_ATT)
7260 if (ASSEMBLER_DIALECT == ASM_ATT)
7265 if (ASSEMBLER_DIALECT == ASM_ATT)
7270 if (ASSEMBLER_DIALECT == ASM_ATT)
7275 if (ASSEMBLER_DIALECT == ASM_ATT)
7280 if (ASSEMBLER_DIALECT == ASM_ATT)
7285 /* 387 opcodes don't get size suffixes if the operands are
7287 if (STACK_REG_P (x))
7290 /* Likewise if using Intel opcodes. */
7291 if (ASSEMBLER_DIALECT == ASM_INTEL)
7294 /* This is the size of op from size of operand. */
7295 switch (GET_MODE_SIZE (GET_MODE (x)))
7298 #ifdef HAVE_GAS_FILDS_FISTS
7304 if (GET_MODE (x) == SFmode)
7319 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7321 #ifdef GAS_MNEMONICS
7347 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7349 PRINT_OPERAND (file, x, 0);
7355 /* Little bit of braindamage here. The SSE compare instructions
7356 does use completely different names for the comparisons that the
7357 fp conditional moves. */
7358 switch (GET_CODE (x))
7373 fputs ("unord", file);
7377 fputs ("neq", file);
7381 fputs ("nlt", file);
7385 fputs ("nle", file);
7388 fputs ("ord", file);
7396 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7397 if (ASSEMBLER_DIALECT == ASM_ATT)
7399 switch (GET_MODE (x))
7401 case HImode: putc ('w', file); break;
7403 case SFmode: putc ('l', file); break;
7405 case DFmode: putc ('q', file); break;
7413 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7416 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7417 if (ASSEMBLER_DIALECT == ASM_ATT)
7420 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7423 /* Like above, but reverse condition */
7425 /* Check to see if argument to %c is really a constant
7426 and not a condition code which needs to be reversed. */
7427 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7429 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7432 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7435 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7436 if (ASSEMBLER_DIALECT == ASM_ATT)
7439 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7445 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7448 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7451 int pred_val = INTVAL (XEXP (x, 0));
7453 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7454 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7456 int taken = pred_val > REG_BR_PROB_BASE / 2;
7457 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7459 /* Emit hints only in the case default branch prediction
7460 heuristics would fail. */
7461 if (taken != cputaken)
7463 /* We use 3e (DS) prefix for taken branches and
7464 2e (CS) prefix for not taken branches. */
7466 fputs ("ds ; ", file);
7468 fputs ("cs ; ", file);
7475 output_operand_lossage ("invalid operand code `%c'", code);
7479 if (GET_CODE (x) == REG)
7480 print_reg (x, code, file);
7482 else if (GET_CODE (x) == MEM)
7484 /* No `byte ptr' prefix for call instructions. */
7485 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7488 switch (GET_MODE_SIZE (GET_MODE (x)))
7490 case 1: size = "BYTE"; break;
7491 case 2: size = "WORD"; break;
7492 case 4: size = "DWORD"; break;
7493 case 8: size = "QWORD"; break;
7494 case 12: size = "XWORD"; break;
7495 case 16: size = "XMMWORD"; break;
7500 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7503 else if (code == 'w')
7505 else if (code == 'k')
7509 fputs (" PTR ", file);
7513 /* Avoid (%rip) for call operands. */
7514 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7515 && GET_CODE (x) != CONST_INT)
7516 output_addr_const (file, x);
7517 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7518 output_operand_lossage ("invalid constraints for operand");
7523 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7528 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7529 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7531 if (ASSEMBLER_DIALECT == ASM_ATT)
7533 fprintf (file, "0x%08lx", l);
7536 /* These float cases don't actually occur as immediate operands. */
7537 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7541 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7542 fprintf (file, "%s", dstr);
7545 else if (GET_CODE (x) == CONST_DOUBLE
7546 && GET_MODE (x) == XFmode)
7550 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7551 fprintf (file, "%s", dstr);
7558 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7560 if (ASSEMBLER_DIALECT == ASM_ATT)
7563 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7564 || GET_CODE (x) == LABEL_REF)
7566 if (ASSEMBLER_DIALECT == ASM_ATT)
7569 fputs ("OFFSET FLAT:", file);
7572 if (GET_CODE (x) == CONST_INT)
7573 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7575 output_pic_addr_const (file, x, code);
7577 output_addr_const (file, x);
7581 /* Print a memory operand whose address is ADDR. */
7584 print_operand_address (FILE *file, rtx addr)
7586 struct ix86_address parts;
7587 rtx base, index, disp;
7590 if (! ix86_decompose_address (addr, &parts))
7594 index = parts.index;
7596 scale = parts.scale;
7604 if (USER_LABEL_PREFIX[0] == 0)
7606 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7612 if (!base && !index)
7614 /* Displacement only requires special attention. */
7616 if (GET_CODE (disp) == CONST_INT)
7618 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7620 if (USER_LABEL_PREFIX[0] == 0)
7622 fputs ("ds:", file);
7624 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7627 output_pic_addr_const (file, disp, 0);
7629 output_addr_const (file, disp);
7631 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7633 && ((GET_CODE (disp) == SYMBOL_REF
7634 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7635 || GET_CODE (disp) == LABEL_REF
7636 || (GET_CODE (disp) == CONST
7637 && GET_CODE (XEXP (disp, 0)) == PLUS
7638 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7639 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7640 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7641 fputs ("(%rip)", file);
7645 if (ASSEMBLER_DIALECT == ASM_ATT)
7650 output_pic_addr_const (file, disp, 0);
7651 else if (GET_CODE (disp) == LABEL_REF)
7652 output_asm_label (disp);
7654 output_addr_const (file, disp);
7659 print_reg (base, 0, file);
7663 print_reg (index, 0, file);
7665 fprintf (file, ",%d", scale);
7671 rtx offset = NULL_RTX;
7675 /* Pull out the offset of a symbol; print any symbol itself. */
7676 if (GET_CODE (disp) == CONST
7677 && GET_CODE (XEXP (disp, 0)) == PLUS
7678 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7680 offset = XEXP (XEXP (disp, 0), 1);
7681 disp = gen_rtx_CONST (VOIDmode,
7682 XEXP (XEXP (disp, 0), 0));
7686 output_pic_addr_const (file, disp, 0);
7687 else if (GET_CODE (disp) == LABEL_REF)
7688 output_asm_label (disp);
7689 else if (GET_CODE (disp) == CONST_INT)
7692 output_addr_const (file, disp);
7698 print_reg (base, 0, file);
7701 if (INTVAL (offset) >= 0)
7703 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7707 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7714 print_reg (index, 0, file);
7716 fprintf (file, "*%d", scale);
7724 output_addr_const_extra (FILE *file, rtx x)
7728 if (GET_CODE (x) != UNSPEC)
7731 op = XVECEXP (x, 0, 0);
7732 switch (XINT (x, 1))
7734 case UNSPEC_GOTTPOFF:
7735 output_addr_const (file, op);
7736 /* FIXME: This might be @TPOFF in Sun ld. */
7737 fputs ("@GOTTPOFF", file);
7740 output_addr_const (file, op);
7741 fputs ("@TPOFF", file);
7744 output_addr_const (file, op);
7746 fputs ("@TPOFF", file);
7748 fputs ("@NTPOFF", file);
7751 output_addr_const (file, op);
7752 fputs ("@DTPOFF", file);
7754 case UNSPEC_GOTNTPOFF:
7755 output_addr_const (file, op);
7757 fputs ("@GOTTPOFF(%rip)", file);
7759 fputs ("@GOTNTPOFF", file);
7761 case UNSPEC_INDNTPOFF:
7762 output_addr_const (file, op);
7763 fputs ("@INDNTPOFF", file);
7773 /* Split one or more DImode RTL references into pairs of SImode
7774 references. The RTL can be REG, offsettable MEM, integer constant, or
7775 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7776 split and "num" is its length. lo_half and hi_half are output arrays
7777 that parallel "operands". */
7780 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7784 rtx op = operands[num];
7786 /* simplify_subreg refuse to split volatile memory addresses,
7787 but we still have to handle it. */
7788 if (GET_CODE (op) == MEM)
7790 lo_half[num] = adjust_address (op, SImode, 0);
7791 hi_half[num] = adjust_address (op, SImode, 4);
7795 lo_half[num] = simplify_gen_subreg (SImode, op,
7796 GET_MODE (op) == VOIDmode
7797 ? DImode : GET_MODE (op), 0);
7798 hi_half[num] = simplify_gen_subreg (SImode, op,
7799 GET_MODE (op) == VOIDmode
7800 ? DImode : GET_MODE (op), 4);
7804 /* Split one or more TImode RTL references into pairs of SImode
7805 references. The RTL can be REG, offsettable MEM, integer constant, or
7806 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7807 split and "num" is its length. lo_half and hi_half are output arrays
7808 that parallel "operands". */
7811 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7815 rtx op = operands[num];
7817 /* simplify_subreg refuse to split volatile memory addresses, but we
7818 still have to handle it. */
7819 if (GET_CODE (op) == MEM)
7821 lo_half[num] = adjust_address (op, DImode, 0);
7822 hi_half[num] = adjust_address (op, DImode, 8);
7826 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7827 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7832 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7833 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7834 is the expression of the binary operation. The output may either be
7835 emitted here, or returned to the caller, like all output_* functions.
7837 There is no guarantee that the operands are the same mode, as they
7838 might be within FLOAT or FLOAT_EXTEND expressions. */
7840 #ifndef SYSV386_COMPAT
7841 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7842 wants to fix the assemblers because that causes incompatibility
7843 with gcc. No-one wants to fix gcc because that causes
7844 incompatibility with assemblers... You can use the option of
7845 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7846 #define SYSV386_COMPAT 1
7850 output_387_binary_op (rtx insn, rtx *operands)
7852 static char buf[30];
7855 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7857 #ifdef ENABLE_CHECKING
7858 /* Even if we do not want to check the inputs, this documents input
7859 constraints. Which helps in understanding the following code. */
7860 if (STACK_REG_P (operands[0])
7861 && ((REG_P (operands[1])
7862 && REGNO (operands[0]) == REGNO (operands[1])
7863 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7864 || (REG_P (operands[2])
7865 && REGNO (operands[0]) == REGNO (operands[2])
7866 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7867 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7873 switch (GET_CODE (operands[3]))
7876 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7877 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7894 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7895 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7918 if (GET_MODE (operands[0]) == SFmode)
7919 strcat (buf, "ss\t{%2, %0|%0, %2}");
7921 strcat (buf, "sd\t{%2, %0|%0, %2}");
7926 switch (GET_CODE (operands[3]))
7930 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7932 rtx temp = operands[2];
7933 operands[2] = operands[1];
7937 /* know operands[0] == operands[1]. */
7939 if (GET_CODE (operands[2]) == MEM)
7945 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7947 if (STACK_TOP_P (operands[0]))
7948 /* How is it that we are storing to a dead operand[2]?
7949 Well, presumably operands[1] is dead too. We can't
7950 store the result to st(0) as st(0) gets popped on this
7951 instruction. Instead store to operands[2] (which I
7952 think has to be st(1)). st(1) will be popped later.
7953 gcc <= 2.8.1 didn't have this check and generated
7954 assembly code that the Unixware assembler rejected. */
7955 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7957 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7961 if (STACK_TOP_P (operands[0]))
7962 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7964 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7969 if (GET_CODE (operands[1]) == MEM)
7975 if (GET_CODE (operands[2]) == MEM)
7981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7984 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7985 derived assemblers, confusingly reverse the direction of
7986 the operation for fsub{r} and fdiv{r} when the
7987 destination register is not st(0). The Intel assembler
7988 doesn't have this brain damage. Read !SYSV386_COMPAT to
7989 figure out what the hardware really does. */
7990 if (STACK_TOP_P (operands[0]))
7991 p = "{p\t%0, %2|rp\t%2, %0}";
7993 p = "{rp\t%2, %0|p\t%0, %2}";
7995 if (STACK_TOP_P (operands[0]))
7996 /* As above for fmul/fadd, we can't store to st(0). */
7997 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7999 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8004 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8007 if (STACK_TOP_P (operands[0]))
8008 p = "{rp\t%0, %1|p\t%1, %0}";
8010 p = "{p\t%1, %0|rp\t%0, %1}";
8012 if (STACK_TOP_P (operands[0]))
8013 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8015 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8020 if (STACK_TOP_P (operands[0]))
8022 if (STACK_TOP_P (operands[1]))
8023 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8025 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8028 else if (STACK_TOP_P (operands[1]))
8031 p = "{\t%1, %0|r\t%0, %1}";
8033 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8039 p = "{r\t%2, %0|\t%0, %2}";
8041 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8054 /* Output code to initialize control word copies used by
8055 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8056 is set to control word rounding downwards. */
8058 emit_i387_cw_initialization (rtx normal, rtx round_down)
8060 rtx reg = gen_reg_rtx (HImode);
8062 emit_insn (gen_x86_fnstcw_1 (normal));
8063 emit_move_insn (reg, normal);
8064 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8066 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8068 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8069 emit_move_insn (round_down, reg);
8072 /* Output code for INSN to convert a float to a signed int. OPERANDS
8073 are the insn operands. The output may be [HSD]Imode and the input
8074 operand may be [SDX]Fmode. */
8077 output_fix_trunc (rtx insn, rtx *operands)
8079 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8080 int dimode_p = GET_MODE (operands[0]) == DImode;
8082 /* Jump through a hoop or two for DImode, since the hardware has no
8083 non-popping instruction. We used to do this a different way, but
8084 that was somewhat fragile and broke with post-reload splitters. */
8085 if (dimode_p && !stack_top_dies)
8086 output_asm_insn ("fld\t%y1", operands);
8088 if (!STACK_TOP_P (operands[1]))
8091 if (GET_CODE (operands[0]) != MEM)
8094 output_asm_insn ("fldcw\t%3", operands);
8095 if (stack_top_dies || dimode_p)
8096 output_asm_insn ("fistp%z0\t%0", operands);
8098 output_asm_insn ("fist%z0\t%0", operands);
8099 output_asm_insn ("fldcw\t%2", operands);
8104 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8105 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8106 when fucom should be used. */
8109 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8112 rtx cmp_op0 = operands[0];
8113 rtx cmp_op1 = operands[1];
8114 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8119 cmp_op1 = operands[2];
8123 if (GET_MODE (operands[0]) == SFmode)
8125 return "ucomiss\t{%1, %0|%0, %1}";
8127 return "comiss\t{%1, %0|%0, %1}";
8130 return "ucomisd\t{%1, %0|%0, %1}";
8132 return "comisd\t{%1, %0|%0, %1}";
8135 if (! STACK_TOP_P (cmp_op0))
8138 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8140 if (STACK_REG_P (cmp_op1)
8142 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8143 && REGNO (cmp_op1) != FIRST_STACK_REG)
8145 /* If both the top of the 387 stack dies, and the other operand
8146 is also a stack register that dies, then this must be a
8147 `fcompp' float compare */
8151 /* There is no double popping fcomi variant. Fortunately,
8152 eflags is immune from the fstp's cc clobbering. */
8154 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8156 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8164 return "fucompp\n\tfnstsw\t%0";
8166 return "fcompp\n\tfnstsw\t%0";
8179 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8181 static const char * const alt[24] =
8193 "fcomi\t{%y1, %0|%0, %y1}",
8194 "fcomip\t{%y1, %0|%0, %y1}",
8195 "fucomi\t{%y1, %0|%0, %y1}",
8196 "fucomip\t{%y1, %0|%0, %y1}",
8203 "fcom%z2\t%y2\n\tfnstsw\t%0",
8204 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8205 "fucom%z2\t%y2\n\tfnstsw\t%0",
8206 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8208 "ficom%z2\t%y2\n\tfnstsw\t%0",
8209 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8217 mask = eflags_p << 3;
8218 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8219 mask |= unordered_p << 1;
8220 mask |= stack_top_dies;
8233 ix86_output_addr_vec_elt (FILE *file, int value)
8235 const char *directive = ASM_LONG;
8240 directive = ASM_QUAD;
8246 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8250 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8253 fprintf (file, "%s%s%d-%s%d\n",
8254 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8255 else if (HAVE_AS_GOTOFF_IN_DATA)
8256 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8258 else if (TARGET_MACHO)
8260 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8261 machopic_output_function_base_name (file);
8262 fprintf(file, "\n");
8266 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8267 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8270 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8274 ix86_expand_clear (rtx dest)
8278 /* We play register width games, which are only valid after reload. */
8279 if (!reload_completed)
8282 /* Avoid HImode and its attendant prefix byte. */
8283 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8284 dest = gen_rtx_REG (SImode, REGNO (dest));
8286 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8288 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8289 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8291 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8292 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8298 /* X is an unchanging MEM. If it is a constant pool reference, return
8299 the constant pool rtx, else NULL. */
8302 maybe_get_pool_constant (rtx x)
8304 x = ix86_delegitimize_address (XEXP (x, 0));
8306 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8307 return get_pool_constant (x);
8313 ix86_expand_move (enum machine_mode mode, rtx operands[])
8315 int strict = (reload_in_progress || reload_completed);
8317 enum tls_model model;
8322 model = tls_symbolic_operand (op1, Pmode);
8325 op1 = legitimize_tls_address (op1, model, true);
8326 op1 = force_operand (op1, op0);
8331 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8336 rtx temp = ((reload_in_progress
8337 || ((op0 && GET_CODE (op0) == REG)
8339 ? op0 : gen_reg_rtx (Pmode));
8340 op1 = machopic_indirect_data_reference (op1, temp);
8341 op1 = machopic_legitimize_pic_address (op1, mode,
8342 temp == op1 ? 0 : temp);
8344 else if (MACHOPIC_INDIRECT)
8345 op1 = machopic_indirect_data_reference (op1, 0);
8349 if (GET_CODE (op0) == MEM)
8350 op1 = force_reg (Pmode, op1);
8354 if (GET_CODE (temp) != REG)
8355 temp = gen_reg_rtx (Pmode);
8356 temp = legitimize_pic_address (op1, temp);
8361 #endif /* TARGET_MACHO */
8365 if (GET_CODE (op0) == MEM
8366 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8367 || !push_operand (op0, mode))
8368 && GET_CODE (op1) == MEM)
8369 op1 = force_reg (mode, op1);
8371 if (push_operand (op0, mode)
8372 && ! general_no_elim_operand (op1, mode))
8373 op1 = copy_to_mode_reg (mode, op1);
8375 /* Force large constants in 64bit compilation into register
8376 to get them CSEed. */
8377 if (TARGET_64BIT && mode == DImode
8378 && immediate_operand (op1, mode)
8379 && !x86_64_zero_extended_value (op1)
8380 && !register_operand (op0, mode)
8381 && optimize && !reload_completed && !reload_in_progress)
8382 op1 = copy_to_mode_reg (mode, op1);
8384 if (FLOAT_MODE_P (mode))
8386 /* If we are loading a floating point constant to a register,
8387 force the value to memory now, since we'll get better code
8388 out the back end. */
8392 else if (GET_CODE (op1) == CONST_DOUBLE)
8394 op1 = validize_mem (force_const_mem (mode, op1));
8395 if (!register_operand (op0, mode))
8397 rtx temp = gen_reg_rtx (mode);
8398 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8399 emit_move_insn (op0, temp);
8406 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8410 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8412 /* Force constants other than zero into memory. We do not know how
8413 the instructions used to build constants modify the upper 64 bits
8414 of the register, once we have that information we may be able
8415 to handle some of them more efficiently. */
8416 if ((reload_in_progress | reload_completed) == 0
8417 && register_operand (operands[0], mode)
8418 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8419 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8421 /* Make operand1 a register if it isn't already. */
8423 && !register_operand (operands[0], mode)
8424 && !register_operand (operands[1], mode))
8426 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8427 emit_move_insn (operands[0], temp);
8431 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8434 /* Attempt to expand a binary operator. Make the expansion closer to the
8435 actual machine, then just general_operand, which will allow 3 separate
8436 memory references (one output, two input) in a single insn. */
8439 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8442 int matching_memory;
8443 rtx src1, src2, dst, op, clob;
8449 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8450 if (GET_RTX_CLASS (code) == 'c'
8451 && (rtx_equal_p (dst, src2)
8452 || immediate_operand (src1, mode)))
8459 /* If the destination is memory, and we do not have matching source
8460 operands, do things in registers. */
8461 matching_memory = 0;
8462 if (GET_CODE (dst) == MEM)
8464 if (rtx_equal_p (dst, src1))
8465 matching_memory = 1;
8466 else if (GET_RTX_CLASS (code) == 'c'
8467 && rtx_equal_p (dst, src2))
8468 matching_memory = 2;
8470 dst = gen_reg_rtx (mode);
8473 /* Both source operands cannot be in memory. */
8474 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8476 if (matching_memory != 2)
8477 src2 = force_reg (mode, src2);
8479 src1 = force_reg (mode, src1);
8482 /* If the operation is not commutable, source 1 cannot be a constant
8483 or non-matching memory. */
8484 if ((CONSTANT_P (src1)
8485 || (!matching_memory && GET_CODE (src1) == MEM))
8486 && GET_RTX_CLASS (code) != 'c')
8487 src1 = force_reg (mode, src1);
8489 /* If optimizing, copy to regs to improve CSE */
8490 if (optimize && ! no_new_pseudos)
8492 if (GET_CODE (dst) == MEM)
8493 dst = gen_reg_rtx (mode);
8494 if (GET_CODE (src1) == MEM)
8495 src1 = force_reg (mode, src1);
8496 if (GET_CODE (src2) == MEM)
8497 src2 = force_reg (mode, src2);
8500 /* Emit the instruction. */
8502 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8503 if (reload_in_progress)
8505 /* Reload doesn't know about the flags register, and doesn't know that
8506 it doesn't want to clobber it. We can only do this with PLUS. */
8513 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8514 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8517 /* Fix up the destination if needed. */
8518 if (dst != operands[0])
8519 emit_move_insn (operands[0], dst);
8522 /* Return TRUE or FALSE depending on whether the binary operator meets the
8523 appropriate constraints. */
8526 ix86_binary_operator_ok (enum rtx_code code,
8527 enum machine_mode mode ATTRIBUTE_UNUSED,
8530 /* Both source operands cannot be in memory. */
8531 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8533 /* If the operation is not commutable, source 1 cannot be a constant. */
8534 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8536 /* If the destination is memory, we must have a matching source operand. */
8537 if (GET_CODE (operands[0]) == MEM
8538 && ! (rtx_equal_p (operands[0], operands[1])
8539 || (GET_RTX_CLASS (code) == 'c'
8540 && rtx_equal_p (operands[0], operands[2]))))
8542 /* If the operation is not commutable and the source 1 is memory, we must
8543 have a matching destination. */
8544 if (GET_CODE (operands[1]) == MEM
8545 && GET_RTX_CLASS (code) != 'c'
8546 && ! rtx_equal_p (operands[0], operands[1]))
8551 /* Attempt to expand a unary operator. Make the expansion closer to the
8552 actual machine, then just general_operand, which will allow 2 separate
8553 memory references (one output, one input) in a single insn. */
8556 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8559 int matching_memory;
8560 rtx src, dst, op, clob;
8565 /* If the destination is memory, and we do not have matching source
8566 operands, do things in registers. */
8567 matching_memory = 0;
8568 if (GET_CODE (dst) == MEM)
8570 if (rtx_equal_p (dst, src))
8571 matching_memory = 1;
8573 dst = gen_reg_rtx (mode);
8576 /* When source operand is memory, destination must match. */
8577 if (!matching_memory && GET_CODE (src) == MEM)
8578 src = force_reg (mode, src);
8580 /* If optimizing, copy to regs to improve CSE */
8581 if (optimize && ! no_new_pseudos)
8583 if (GET_CODE (dst) == MEM)
8584 dst = gen_reg_rtx (mode);
8585 if (GET_CODE (src) == MEM)
8586 src = force_reg (mode, src);
8589 /* Emit the instruction. */
8591 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8592 if (reload_in_progress || code == NOT)
8594 /* Reload doesn't know about the flags register, and doesn't know that
8595 it doesn't want to clobber it. */
8602 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8603 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8606 /* Fix up the destination if needed. */
8607 if (dst != operands[0])
8608 emit_move_insn (operands[0], dst);
8611 /* Return TRUE or FALSE depending on whether the unary operator meets the
8612 appropriate constraints. */
8615 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8616 enum machine_mode mode ATTRIBUTE_UNUSED,
8617 rtx operands[2] ATTRIBUTE_UNUSED)
8619 /* If one of operands is memory, source and destination must match. */
8620 if ((GET_CODE (operands[0]) == MEM
8621 || GET_CODE (operands[1]) == MEM)
8622 && ! rtx_equal_p (operands[0], operands[1]))
8627 /* Return TRUE or FALSE depending on whether the first SET in INSN
8628 has source and destination with matching CC modes, and that the
8629 CC mode is at least as constrained as REQ_MODE. */
8632 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8635 enum machine_mode set_mode;
8637 set = PATTERN (insn);
8638 if (GET_CODE (set) == PARALLEL)
8639 set = XVECEXP (set, 0, 0);
8640 if (GET_CODE (set) != SET)
8642 if (GET_CODE (SET_SRC (set)) != COMPARE)
8645 set_mode = GET_MODE (SET_DEST (set));
8649 if (req_mode != CCNOmode
8650 && (req_mode != CCmode
8651 || XEXP (SET_SRC (set), 1) != const0_rtx))
8655 if (req_mode == CCGCmode)
8659 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8663 if (req_mode == CCZmode)
8673 return (GET_MODE (SET_SRC (set)) == set_mode);
8676 /* Generate insn patterns to do an integer compare of OPERANDS. */
8679 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8681 enum machine_mode cmpmode;
8684 cmpmode = SELECT_CC_MODE (code, op0, op1);
8685 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8687 /* This is very simple, but making the interface the same as in the
8688 FP case makes the rest of the code easier. */
8689 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8690 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8692 /* Return the test that should be put into the flags user, i.e.
8693 the bcc, scc, or cmov instruction. */
8694 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8697 /* Figure out whether to use ordered or unordered fp comparisons.
8698 Return the appropriate mode to use. */
8701 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8703 /* ??? In order to make all comparisons reversible, we do all comparisons
8704 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8705 all forms trapping and nontrapping comparisons, we can make inequality
8706 comparisons trapping again, since it results in better code when using
8707 FCOM based compares. */
8708 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8712 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8714 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8715 return ix86_fp_compare_mode (code);
8718 /* Only zero flag is needed. */
8720 case NE: /* ZF!=0 */
8722 /* Codes needing carry flag. */
8723 case GEU: /* CF=0 */
8724 case GTU: /* CF=0 & ZF=0 */
8725 case LTU: /* CF=1 */
8726 case LEU: /* CF=1 | ZF=1 */
8728 /* Codes possibly doable only with sign flag when
8729 comparing against zero. */
8730 case GE: /* SF=OF or SF=0 */
8731 case LT: /* SF<>OF or SF=1 */
8732 if (op1 == const0_rtx)
8735 /* For other cases Carry flag is not required. */
8737 /* Codes doable only with sign flag when comparing
8738 against zero, but we miss jump instruction for it
8739 so we need to use relational tests against overflow
8740 that thus needs to be zero. */
8741 case GT: /* ZF=0 & SF=OF */
8742 case LE: /* ZF=1 | SF<>OF */
8743 if (op1 == const0_rtx)
8747 /* strcmp pattern do (use flags) and combine may ask us for proper
8756 /* Return the fixed registers used for condition codes. */
8759 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8766 /* If two condition code modes are compatible, return a condition code
8767 mode which is compatible with both. Otherwise, return
8770 static enum machine_mode
8771 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8776 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8779 if ((m1 == CCGCmode && m2 == CCGOCmode)
8780 || (m1 == CCGOCmode && m2 == CCGCmode))
8808 /* These are only compatible with themselves, which we already
8814 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8817 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8819 enum rtx_code swapped_code = swap_condition (code);
8820 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8821 || (ix86_fp_comparison_cost (swapped_code)
8822 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8825 /* Swap, force into registers, or otherwise massage the two operands
8826 to a fp comparison. The operands are updated in place; the new
8827 comparison code is returned. */
8829 static enum rtx_code
8830 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8832 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8833 rtx op0 = *pop0, op1 = *pop1;
8834 enum machine_mode op_mode = GET_MODE (op0);
8835 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8837 /* All of the unordered compare instructions only work on registers.
8838 The same is true of the XFmode compare instructions. The same is
8839 true of the fcomi compare instructions. */
8842 && (fpcmp_mode == CCFPUmode
8843 || op_mode == XFmode
8844 || ix86_use_fcomi_compare (code)))
8846 op0 = force_reg (op_mode, op0);
8847 op1 = force_reg (op_mode, op1);
8851 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8852 things around if they appear profitable, otherwise force op0
8855 if (standard_80387_constant_p (op0) == 0
8856 || (GET_CODE (op0) == MEM
8857 && ! (standard_80387_constant_p (op1) == 0
8858 || GET_CODE (op1) == MEM)))
8861 tmp = op0, op0 = op1, op1 = tmp;
8862 code = swap_condition (code);
8865 if (GET_CODE (op0) != REG)
8866 op0 = force_reg (op_mode, op0);
8868 if (CONSTANT_P (op1))
8870 if (standard_80387_constant_p (op1))
8871 op1 = force_reg (op_mode, op1);
8873 op1 = validize_mem (force_const_mem (op_mode, op1));
8877 /* Try to rearrange the comparison to make it cheaper. */
8878 if (ix86_fp_comparison_cost (code)
8879 > ix86_fp_comparison_cost (swap_condition (code))
8880 && (GET_CODE (op1) == REG || !no_new_pseudos))
8883 tmp = op0, op0 = op1, op1 = tmp;
8884 code = swap_condition (code);
8885 if (GET_CODE (op0) != REG)
8886 op0 = force_reg (op_mode, op0);
8894 /* Convert comparison codes we use to represent FP comparison to integer
8895 code that will result in proper branch. Return UNKNOWN if no such code
8897 static enum rtx_code
8898 ix86_fp_compare_code_to_integer (enum rtx_code code)
8927 /* Split comparison code CODE into comparisons we can do using branch
8928 instructions. BYPASS_CODE is comparison code for branch that will
8929 branch around FIRST_CODE and SECOND_CODE. If some of branches
8930 is not required, set value to NIL.
8931 We never require more than two branches. */
8933 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8934 enum rtx_code *first_code,
8935 enum rtx_code *second_code)
8941 /* The fcomi comparison sets flags as follows:
8951 case GT: /* GTU - CF=0 & ZF=0 */
8952 case GE: /* GEU - CF=0 */
8953 case ORDERED: /* PF=0 */
8954 case UNORDERED: /* PF=1 */
8955 case UNEQ: /* EQ - ZF=1 */
8956 case UNLT: /* LTU - CF=1 */
8957 case UNLE: /* LEU - CF=1 | ZF=1 */
8958 case LTGT: /* EQ - ZF=0 */
8960 case LT: /* LTU - CF=1 - fails on unordered */
8962 *bypass_code = UNORDERED;
8964 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8966 *bypass_code = UNORDERED;
8968 case EQ: /* EQ - ZF=1 - fails on unordered */
8970 *bypass_code = UNORDERED;
8972 case NE: /* NE - ZF=0 - fails on unordered */
8974 *second_code = UNORDERED;
8976 case UNGE: /* GEU - CF=0 - fails on unordered */
8978 *second_code = UNORDERED;
8980 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8982 *second_code = UNORDERED;
8987 if (!TARGET_IEEE_FP)
8994 /* Return cost of comparison done fcom + arithmetics operations on AX.
8995 All following functions do use number of instructions as a cost metrics.
8996 In future this should be tweaked to compute bytes for optimize_size and
8997 take into account performance of various instructions on various CPUs. */
8999 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9001 if (!TARGET_IEEE_FP)
9003 /* The cost of code output by ix86_expand_fp_compare. */
9031 /* Return cost of comparison done using fcomi operation.
9032 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9034 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9036 enum rtx_code bypass_code, first_code, second_code;
9037 /* Return arbitrarily high cost when instruction is not supported - this
9038 prevents gcc from using it. */
9041 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9042 return (bypass_code != NIL || second_code != NIL) + 2;
9045 /* Return cost of comparison done using sahf operation.
9046 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9048 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9050 enum rtx_code bypass_code, first_code, second_code;
9051 /* Return arbitrarily high cost when instruction is not preferred - this
9052 avoids gcc from using it. */
9053 if (!TARGET_USE_SAHF && !optimize_size)
9055 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9056 return (bypass_code != NIL || second_code != NIL) + 3;
9059 /* Compute cost of the comparison done using any method.
9060 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9062 ix86_fp_comparison_cost (enum rtx_code code)
9064 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9067 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9068 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9070 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9071 if (min > sahf_cost)
9073 if (min > fcomi_cost)
9078 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9081 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9082 rtx *second_test, rtx *bypass_test)
9084 enum machine_mode fpcmp_mode, intcmp_mode;
9086 int cost = ix86_fp_comparison_cost (code);
9087 enum rtx_code bypass_code, first_code, second_code;
9089 fpcmp_mode = ix86_fp_compare_mode (code);
9090 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9093 *second_test = NULL_RTX;
9095 *bypass_test = NULL_RTX;
9097 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9099 /* Do fcomi/sahf based test when profitable. */
9100 if ((bypass_code == NIL || bypass_test)
9101 && (second_code == NIL || second_test)
9102 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9107 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9116 scratch = gen_reg_rtx (HImode);
9117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9118 emit_insn (gen_x86_sahf_1 (scratch));
9121 /* The FP codes work out to act like unsigned. */
9122 intcmp_mode = fpcmp_mode;
9124 if (bypass_code != NIL)
9125 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9128 if (second_code != NIL)
9129 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9135 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9136 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9137 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9139 scratch = gen_reg_rtx (HImode);
9140 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9142 /* In the unordered case, we have to check C2 for NaN's, which
9143 doesn't happen to work out to anything nice combination-wise.
9144 So do some bit twiddling on the value we've got in AH to come
9145 up with an appropriate set of condition codes. */
9147 intcmp_mode = CCNOmode;
9152 if (code == GT || !TARGET_IEEE_FP)
9154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9160 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9161 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9162 intcmp_mode = CCmode;
9168 if (code == LT && TARGET_IEEE_FP)
9170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9172 intcmp_mode = CCmode;
9177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9183 if (code == GE || !TARGET_IEEE_FP)
9185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9191 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9198 if (code == LE && TARGET_IEEE_FP)
9200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9201 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9202 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9203 intcmp_mode = CCmode;
9208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9214 if (code == EQ && TARGET_IEEE_FP)
9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9218 intcmp_mode = CCmode;
9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9230 if (code == NE && TARGET_IEEE_FP)
9232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9249 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9258 /* Return the test that should be put into the flags user, i.e.
9259 the bcc, scc, or cmov instruction. */
9260 return gen_rtx_fmt_ee (code, VOIDmode,
9261 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9266 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9269 op0 = ix86_compare_op0;
9270 op1 = ix86_compare_op1;
9273 *second_test = NULL_RTX;
9275 *bypass_test = NULL_RTX;
9277 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9278 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9279 second_test, bypass_test);
9281 ret = ix86_expand_int_compare (code, op0, op1);
9286 /* Return true if the CODE will result in nontrivial jump sequence. */
9288 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9290 enum rtx_code bypass_code, first_code, second_code;
9293 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9294 return bypass_code != NIL || second_code != NIL;
9298 ix86_expand_branch (enum rtx_code code, rtx label)
9302 switch (GET_MODE (ix86_compare_op0))
9308 tmp = ix86_expand_compare (code, NULL, NULL);
9309 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9310 gen_rtx_LABEL_REF (VOIDmode, label),
9312 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9321 enum rtx_code bypass_code, first_code, second_code;
9323 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9326 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9328 /* Check whether we will use the natural sequence with one jump. If
9329 so, we can expand jump early. Otherwise delay expansion by
9330 creating compound insn to not confuse optimizers. */
9331 if (bypass_code == NIL && second_code == NIL
9334 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9335 gen_rtx_LABEL_REF (VOIDmode, label),
9340 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9341 ix86_compare_op0, ix86_compare_op1);
9342 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9343 gen_rtx_LABEL_REF (VOIDmode, label),
9345 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9347 use_fcomi = ix86_use_fcomi_compare (code);
9348 vec = rtvec_alloc (3 + !use_fcomi);
9349 RTVEC_ELT (vec, 0) = tmp;
9351 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9353 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9356 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9358 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9366 /* Expand DImode branch into multiple compare+branch. */
9368 rtx lo[2], hi[2], label2;
9369 enum rtx_code code1, code2, code3;
9371 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9373 tmp = ix86_compare_op0;
9374 ix86_compare_op0 = ix86_compare_op1;
9375 ix86_compare_op1 = tmp;
9376 code = swap_condition (code);
9378 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9379 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9381 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9382 avoid two branches. This costs one extra insn, so disable when
9383 optimizing for size. */
9385 if ((code == EQ || code == NE)
9387 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9392 if (hi[1] != const0_rtx)
9393 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9394 NULL_RTX, 0, OPTAB_WIDEN);
9397 if (lo[1] != const0_rtx)
9398 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9399 NULL_RTX, 0, OPTAB_WIDEN);
9401 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9402 NULL_RTX, 0, OPTAB_WIDEN);
9404 ix86_compare_op0 = tmp;
9405 ix86_compare_op1 = const0_rtx;
9406 ix86_expand_branch (code, label);
9410 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9411 op1 is a constant and the low word is zero, then we can just
9412 examine the high word. */
9414 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9417 case LT: case LTU: case GE: case GEU:
9418 ix86_compare_op0 = hi[0];
9419 ix86_compare_op1 = hi[1];
9420 ix86_expand_branch (code, label);
9426 /* Otherwise, we need two or three jumps. */
9428 label2 = gen_label_rtx ();
9431 code2 = swap_condition (code);
9432 code3 = unsigned_condition (code);
9436 case LT: case GT: case LTU: case GTU:
9439 case LE: code1 = LT; code2 = GT; break;
9440 case GE: code1 = GT; code2 = LT; break;
9441 case LEU: code1 = LTU; code2 = GTU; break;
9442 case GEU: code1 = GTU; code2 = LTU; break;
9444 case EQ: code1 = NIL; code2 = NE; break;
9445 case NE: code2 = NIL; break;
9453 * if (hi(a) < hi(b)) goto true;
9454 * if (hi(a) > hi(b)) goto false;
9455 * if (lo(a) < lo(b)) goto true;
9459 ix86_compare_op0 = hi[0];
9460 ix86_compare_op1 = hi[1];
9463 ix86_expand_branch (code1, label);
9465 ix86_expand_branch (code2, label2);
9467 ix86_compare_op0 = lo[0];
9468 ix86_compare_op1 = lo[1];
9469 ix86_expand_branch (code3, label);
9472 emit_label (label2);
9481 /* Split branch based on floating point condition. */
9483 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9484 rtx target1, rtx target2, rtx tmp)
9487 rtx label = NULL_RTX;
9489 int bypass_probability = -1, second_probability = -1, probability = -1;
9492 if (target2 != pc_rtx)
9495 code = reverse_condition_maybe_unordered (code);
9500 condition = ix86_expand_fp_compare (code, op1, op2,
9501 tmp, &second, &bypass);
9503 if (split_branch_probability >= 0)
9505 /* Distribute the probabilities across the jumps.
9506 Assume the BYPASS and SECOND to be always test
9508 probability = split_branch_probability;
9510 /* Value of 1 is low enough to make no need for probability
9511 to be updated. Later we may run some experiments and see
9512 if unordered values are more frequent in practice. */
9514 bypass_probability = 1;
9516 second_probability = 1;
9518 if (bypass != NULL_RTX)
9520 label = gen_label_rtx ();
9521 i = emit_jump_insn (gen_rtx_SET
9523 gen_rtx_IF_THEN_ELSE (VOIDmode,
9525 gen_rtx_LABEL_REF (VOIDmode,
9528 if (bypass_probability >= 0)
9530 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9531 GEN_INT (bypass_probability),
9534 i = emit_jump_insn (gen_rtx_SET
9536 gen_rtx_IF_THEN_ELSE (VOIDmode,
9537 condition, target1, target2)));
9538 if (probability >= 0)
9540 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9541 GEN_INT (probability),
9543 if (second != NULL_RTX)
9545 i = emit_jump_insn (gen_rtx_SET
9547 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9549 if (second_probability >= 0)
9551 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9552 GEN_INT (second_probability),
9555 if (label != NULL_RTX)
9560 ix86_expand_setcc (enum rtx_code code, rtx dest)
9562 rtx ret, tmp, tmpreg, equiv;
9563 rtx second_test, bypass_test;
9565 if (GET_MODE (ix86_compare_op0) == DImode
9567 return 0; /* FAIL */
9569 if (GET_MODE (dest) != QImode)
9572 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9573 PUT_MODE (ret, QImode);
9578 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9579 if (bypass_test || second_test)
9581 rtx test = second_test;
9583 rtx tmp2 = gen_reg_rtx (QImode);
9590 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9592 PUT_MODE (test, QImode);
9593 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9596 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9598 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9601 /* Attach a REG_EQUAL note describing the comparison result. */
9602 equiv = simplify_gen_relational (code, QImode,
9603 GET_MODE (ix86_compare_op0),
9604 ix86_compare_op0, ix86_compare_op1);
9605 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9607 return 1; /* DONE */
9610 /* Expand comparison setting or clearing carry flag. Return true when
9611 successful and set pop for the operation. */
9613 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9615 enum machine_mode mode =
9616 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9618 /* Do not handle DImode compares that go trought special path. Also we can't
9619 deal with FP compares yet. This is possible to add. */
9620 if ((mode == DImode && !TARGET_64BIT))
9622 if (FLOAT_MODE_P (mode))
9624 rtx second_test = NULL, bypass_test = NULL;
9625 rtx compare_op, compare_seq;
9627 /* Shortcut: following common codes never translate into carry flag compares. */
9628 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9629 || code == ORDERED || code == UNORDERED)
9632 /* These comparisons require zero flag; swap operands so they won't. */
9633 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9639 code = swap_condition (code);
9642 /* Try to expand the comparison and verify that we end up with carry flag
9643 based comparison. This is fails to be true only when we decide to expand
9644 comparison using arithmetic that is not too common scenario. */
9646 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9647 &second_test, &bypass_test);
9648 compare_seq = get_insns ();
9651 if (second_test || bypass_test)
9653 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9654 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9655 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9657 code = GET_CODE (compare_op);
9658 if (code != LTU && code != GEU)
9660 emit_insn (compare_seq);
9664 if (!INTEGRAL_MODE_P (mode))
9672 /* Convert a==0 into (unsigned)a<1. */
9675 if (op1 != const0_rtx)
9678 code = (code == EQ ? LTU : GEU);
9681 /* Convert a>b into b<a or a>=b-1. */
9684 if (GET_CODE (op1) == CONST_INT)
9686 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9687 /* Bail out on overflow. We still can swap operands but that
9688 would force loading of the constant into register. */
9689 if (op1 == const0_rtx
9690 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9692 code = (code == GTU ? GEU : LTU);
9699 code = (code == GTU ? LTU : GEU);
9703 /* Convert a>=0 into (unsigned)a<0x80000000. */
9706 if (mode == DImode || op1 != const0_rtx)
9708 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9709 code = (code == LT ? GEU : LTU);
9713 if (mode == DImode || op1 != constm1_rtx)
9715 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9716 code = (code == LE ? GEU : LTU);
9722 /* Swapping operands may cause constant to appear as first operand. */
9723 if (!nonimmediate_operand (op0, VOIDmode))
9727 op0 = force_reg (mode, op0);
9729 ix86_compare_op0 = op0;
9730 ix86_compare_op1 = op1;
9731 *pop = ix86_expand_compare (code, NULL, NULL);
9732 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9738 ix86_expand_int_movcc (rtx operands[])
9740 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9741 rtx compare_seq, compare_op;
9742 rtx second_test, bypass_test;
9743 enum machine_mode mode = GET_MODE (operands[0]);
9744 bool sign_bit_compare_p = false;;
9747 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9748 compare_seq = get_insns ();
9751 compare_code = GET_CODE (compare_op);
9753 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9754 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9755 sign_bit_compare_p = true;
9757 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9758 HImode insns, we'd be swallowed in word prefix ops. */
9760 if ((mode != HImode || TARGET_FAST_PREFIX)
9761 && (mode != DImode || TARGET_64BIT)
9762 && GET_CODE (operands[2]) == CONST_INT
9763 && GET_CODE (operands[3]) == CONST_INT)
9765 rtx out = operands[0];
9766 HOST_WIDE_INT ct = INTVAL (operands[2]);
9767 HOST_WIDE_INT cf = INTVAL (operands[3]);
9771 /* Sign bit compares are better done using shifts than we do by using
9773 if (sign_bit_compare_p
9774 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9775 ix86_compare_op1, &compare_op))
9777 /* Detect overlap between destination and compare sources. */
9780 if (!sign_bit_compare_p)
9784 compare_code = GET_CODE (compare_op);
9786 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9787 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9790 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9793 /* To simplify rest of code, restrict to the GEU case. */
9794 if (compare_code == LTU)
9796 HOST_WIDE_INT tmp = ct;
9799 compare_code = reverse_condition (compare_code);
9800 code = reverse_condition (code);
9805 PUT_CODE (compare_op,
9806 reverse_condition_maybe_unordered
9807 (GET_CODE (compare_op)));
9809 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9813 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9814 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9815 tmp = gen_reg_rtx (mode);
9818 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9820 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9824 if (code == GT || code == GE)
9825 code = reverse_condition (code);
9828 HOST_WIDE_INT tmp = ct;
9833 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9834 ix86_compare_op1, VOIDmode, 0, -1);
9847 tmp = expand_simple_binop (mode, PLUS,
9849 copy_rtx (tmp), 1, OPTAB_DIRECT);
9860 tmp = expand_simple_binop (mode, IOR,
9862 copy_rtx (tmp), 1, OPTAB_DIRECT);
9864 else if (diff == -1 && ct)
9874 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9876 tmp = expand_simple_binop (mode, PLUS,
9877 copy_rtx (tmp), GEN_INT (cf),
9878 copy_rtx (tmp), 1, OPTAB_DIRECT);
9886 * andl cf - ct, dest
9896 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9899 tmp = expand_simple_binop (mode, AND,
9901 gen_int_mode (cf - ct, mode),
9902 copy_rtx (tmp), 1, OPTAB_DIRECT);
9904 tmp = expand_simple_binop (mode, PLUS,
9905 copy_rtx (tmp), GEN_INT (ct),
9906 copy_rtx (tmp), 1, OPTAB_DIRECT);
9909 if (!rtx_equal_p (tmp, out))
9910 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9912 return 1; /* DONE */
9918 tmp = ct, ct = cf, cf = tmp;
9920 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9922 /* We may be reversing unordered compare to normal compare, that
9923 is not valid in general (we may convert non-trapping condition
9924 to trapping one), however on i386 we currently emit all
9925 comparisons unordered. */
9926 compare_code = reverse_condition_maybe_unordered (compare_code);
9927 code = reverse_condition_maybe_unordered (code);
9931 compare_code = reverse_condition (compare_code);
9932 code = reverse_condition (code);
9937 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9938 && GET_CODE (ix86_compare_op1) == CONST_INT)
9940 if (ix86_compare_op1 == const0_rtx
9941 && (code == LT || code == GE))
9942 compare_code = code;
9943 else if (ix86_compare_op1 == constm1_rtx)
9947 else if (code == GT)
9952 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9953 if (compare_code != NIL
9954 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9955 && (cf == -1 || ct == -1))
9957 /* If lea code below could be used, only optimize
9958 if it results in a 2 insn sequence. */
9960 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9961 || diff == 3 || diff == 5 || diff == 9)
9962 || (compare_code == LT && ct == -1)
9963 || (compare_code == GE && cf == -1))
9966 * notl op1 (if necessary)
9974 code = reverse_condition (code);
9977 out = emit_store_flag (out, code, ix86_compare_op0,
9978 ix86_compare_op1, VOIDmode, 0, -1);
9980 out = expand_simple_binop (mode, IOR,
9982 out, 1, OPTAB_DIRECT);
9983 if (out != operands[0])
9984 emit_move_insn (operands[0], out);
9986 return 1; /* DONE */
9991 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9992 || diff == 3 || diff == 5 || diff == 9)
9993 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9994 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10000 * lea cf(dest*(ct-cf)),dest
10004 * This also catches the degenerate setcc-only case.
10010 out = emit_store_flag (out, code, ix86_compare_op0,
10011 ix86_compare_op1, VOIDmode, 0, 1);
10014 /* On x86_64 the lea instruction operates on Pmode, so we need
10015 to get arithmetics done in proper mode to match. */
10017 tmp = copy_rtx (out);
10021 out1 = copy_rtx (out);
10022 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10026 tmp = gen_rtx_PLUS (mode, tmp, out1);
10032 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10035 if (!rtx_equal_p (tmp, out))
10038 out = force_operand (tmp, copy_rtx (out));
10040 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10042 if (!rtx_equal_p (out, operands[0]))
10043 emit_move_insn (operands[0], copy_rtx (out));
10045 return 1; /* DONE */
10049 * General case: Jumpful:
10050 * xorl dest,dest cmpl op1, op2
10051 * cmpl op1, op2 movl ct, dest
10052 * setcc dest jcc 1f
10053 * decl dest movl cf, dest
10054 * andl (cf-ct),dest 1:
10057 * Size 20. Size 14.
10059 * This is reasonably steep, but branch mispredict costs are
10060 * high on modern cpus, so consider failing only if optimizing
10064 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10065 && BRANCH_COST >= 2)
10071 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10072 /* We may be reversing unordered compare to normal compare,
10073 that is not valid in general (we may convert non-trapping
10074 condition to trapping one), however on i386 we currently
10075 emit all comparisons unordered. */
10076 code = reverse_condition_maybe_unordered (code);
10079 code = reverse_condition (code);
10080 if (compare_code != NIL)
10081 compare_code = reverse_condition (compare_code);
10085 if (compare_code != NIL)
10087 /* notl op1 (if needed)
10092 For x < 0 (resp. x <= -1) there will be no notl,
10093 so if possible swap the constants to get rid of the
10095 True/false will be -1/0 while code below (store flag
10096 followed by decrement) is 0/-1, so the constants need
10097 to be exchanged once more. */
10099 if (compare_code == GE || !cf)
10101 code = reverse_condition (code);
10106 HOST_WIDE_INT tmp = cf;
10111 out = emit_store_flag (out, code, ix86_compare_op0,
10112 ix86_compare_op1, VOIDmode, 0, -1);
10116 out = emit_store_flag (out, code, ix86_compare_op0,
10117 ix86_compare_op1, VOIDmode, 0, 1);
10119 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10120 copy_rtx (out), 1, OPTAB_DIRECT);
10123 out = expand_simple_binop (mode, AND, copy_rtx (out),
10124 gen_int_mode (cf - ct, mode),
10125 copy_rtx (out), 1, OPTAB_DIRECT);
10127 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10128 copy_rtx (out), 1, OPTAB_DIRECT);
10129 if (!rtx_equal_p (out, operands[0]))
10130 emit_move_insn (operands[0], copy_rtx (out));
10132 return 1; /* DONE */
10136 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10138 /* Try a few things more with specific constants and a variable. */
10141 rtx var, orig_out, out, tmp;
10143 if (BRANCH_COST <= 2)
10144 return 0; /* FAIL */
10146 /* If one of the two operands is an interesting constant, load a
10147 constant with the above and mask it in with a logical operation. */
10149 if (GET_CODE (operands[2]) == CONST_INT)
10152 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10153 operands[3] = constm1_rtx, op = and_optab;
10154 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10155 operands[3] = const0_rtx, op = ior_optab;
10157 return 0; /* FAIL */
10159 else if (GET_CODE (operands[3]) == CONST_INT)
10162 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10163 operands[2] = constm1_rtx, op = and_optab;
10164 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10165 operands[2] = const0_rtx, op = ior_optab;
10167 return 0; /* FAIL */
10170 return 0; /* FAIL */
10172 orig_out = operands[0];
10173 tmp = gen_reg_rtx (mode);
10176 /* Recurse to get the constant loaded. */
10177 if (ix86_expand_int_movcc (operands) == 0)
10178 return 0; /* FAIL */
10180 /* Mask in the interesting variable. */
10181 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10183 if (!rtx_equal_p (out, orig_out))
10184 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10186 return 1; /* DONE */
10190 * For comparison with above,
10200 if (! nonimmediate_operand (operands[2], mode))
10201 operands[2] = force_reg (mode, operands[2]);
10202 if (! nonimmediate_operand (operands[3], mode))
10203 operands[3] = force_reg (mode, operands[3]);
10205 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10207 rtx tmp = gen_reg_rtx (mode);
10208 emit_move_insn (tmp, operands[3]);
10211 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10213 rtx tmp = gen_reg_rtx (mode);
10214 emit_move_insn (tmp, operands[2]);
10218 if (! register_operand (operands[2], VOIDmode)
10220 || ! register_operand (operands[3], VOIDmode)))
10221 operands[2] = force_reg (mode, operands[2]);
10224 && ! register_operand (operands[3], VOIDmode))
10225 operands[3] = force_reg (mode, operands[3]);
10227 emit_insn (compare_seq);
10228 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10229 gen_rtx_IF_THEN_ELSE (mode,
10230 compare_op, operands[2],
10233 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10234 gen_rtx_IF_THEN_ELSE (mode,
10236 copy_rtx (operands[3]),
10237 copy_rtx (operands[0]))));
10239 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10240 gen_rtx_IF_THEN_ELSE (mode,
10242 copy_rtx (operands[2]),
10243 copy_rtx (operands[0]))));
10245 return 1; /* DONE */
10249 ix86_expand_fp_movcc (rtx operands[])
10251 enum rtx_code code;
10253 rtx compare_op, second_test, bypass_test;
10255 /* For SF/DFmode conditional moves based on comparisons
10256 in same mode, we may want to use SSE min/max instructions. */
10257 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10258 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10259 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10260 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10261 && (!TARGET_IEEE_FP
10262 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10263 /* We may be called from the post-reload splitter. */
10264 && (!REG_P (operands[0])
10265 || SSE_REG_P (operands[0])
10266 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10268 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10269 code = GET_CODE (operands[1]);
10271 /* See if we have (cross) match between comparison operands and
10272 conditional move operands. */
10273 if (rtx_equal_p (operands[2], op1))
10278 code = reverse_condition_maybe_unordered (code);
10280 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10282 /* Check for min operation. */
10283 if (code == LT || code == UNLE)
10291 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10292 if (memory_operand (op0, VOIDmode))
10293 op0 = force_reg (GET_MODE (operands[0]), op0);
10294 if (GET_MODE (operands[0]) == SFmode)
10295 emit_insn (gen_minsf3 (operands[0], op0, op1));
10297 emit_insn (gen_mindf3 (operands[0], op0, op1));
10300 /* Check for max operation. */
10301 if (code == GT || code == UNGE)
10309 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10310 if (memory_operand (op0, VOIDmode))
10311 op0 = force_reg (GET_MODE (operands[0]), op0);
10312 if (GET_MODE (operands[0]) == SFmode)
10313 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10315 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10319 /* Manage condition to be sse_comparison_operator. In case we are
10320 in non-ieee mode, try to canonicalize the destination operand
10321 to be first in the comparison - this helps reload to avoid extra
10323 if (!sse_comparison_operator (operands[1], VOIDmode)
10324 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10326 rtx tmp = ix86_compare_op0;
10327 ix86_compare_op0 = ix86_compare_op1;
10328 ix86_compare_op1 = tmp;
10329 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10330 VOIDmode, ix86_compare_op0,
10333 /* Similarly try to manage result to be first operand of conditional
10334 move. We also don't support the NE comparison on SSE, so try to
10336 if ((rtx_equal_p (operands[0], operands[3])
10337 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10338 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10340 rtx tmp = operands[2];
10341 operands[2] = operands[3];
10343 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10344 (GET_CODE (operands[1])),
10345 VOIDmode, ix86_compare_op0,
10348 if (GET_MODE (operands[0]) == SFmode)
10349 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10350 operands[2], operands[3],
10351 ix86_compare_op0, ix86_compare_op1));
10353 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10354 operands[2], operands[3],
10355 ix86_compare_op0, ix86_compare_op1));
10359 /* The floating point conditional move instructions don't directly
10360 support conditions resulting from a signed integer comparison. */
10362 code = GET_CODE (operands[1]);
10363 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10365 /* The floating point conditional move instructions don't directly
10366 support signed integer comparisons. */
10368 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10370 if (second_test != NULL || bypass_test != NULL)
10372 tmp = gen_reg_rtx (QImode);
10373 ix86_expand_setcc (code, tmp);
10375 ix86_compare_op0 = tmp;
10376 ix86_compare_op1 = const0_rtx;
10377 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10379 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10381 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10382 emit_move_insn (tmp, operands[3]);
10385 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10387 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10388 emit_move_insn (tmp, operands[2]);
10392 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10393 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10398 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10399 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10404 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10405 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10413 /* Expand conditional increment or decrement using adb/sbb instructions.
10414 The default case using setcc followed by the conditional move can be
10415 done by generic code. */
10417 ix86_expand_int_addcc (rtx operands[])
10419 enum rtx_code code = GET_CODE (operands[1]);
10421 rtx val = const0_rtx;
10422 bool fpcmp = false;
10423 enum machine_mode mode = GET_MODE (operands[0]);
10425 if (operands[3] != const1_rtx
10426 && operands[3] != constm1_rtx)
10428 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10429 ix86_compare_op1, &compare_op))
10431 code = GET_CODE (compare_op);
10433 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10434 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10437 code = ix86_fp_compare_code_to_integer (code);
10444 PUT_CODE (compare_op,
10445 reverse_condition_maybe_unordered
10446 (GET_CODE (compare_op)));
10448 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10450 PUT_MODE (compare_op, mode);
10452 /* Construct either adc or sbb insn. */
10453 if ((code == LTU) == (operands[3] == constm1_rtx))
10455 switch (GET_MODE (operands[0]))
10458 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10461 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10464 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10467 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10475 switch (GET_MODE (operands[0]))
10478 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10481 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10484 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10487 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10493 return 1; /* DONE */
10497 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10498 works for floating pointer parameters and nonoffsetable memories.
10499 For pushes, it returns just stack offsets; the values will be saved
10500 in the right order. Maximally three parts are generated. */
10503 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10508 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10510 size = (GET_MODE_SIZE (mode) + 4) / 8;
10512 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10514 if (size < 2 || size > 3)
10517 /* Optimize constant pool reference to immediates. This is used by fp
10518 moves, that force all constants to memory to allow combining. */
10519 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10521 rtx tmp = maybe_get_pool_constant (operand);
10526 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10528 /* The only non-offsetable memories we handle are pushes. */
10529 if (! push_operand (operand, VOIDmode))
10532 operand = copy_rtx (operand);
10533 PUT_MODE (operand, Pmode);
10534 parts[0] = parts[1] = parts[2] = operand;
10536 else if (!TARGET_64BIT)
10538 if (mode == DImode)
10539 split_di (&operand, 1, &parts[0], &parts[1]);
10542 if (REG_P (operand))
10544 if (!reload_completed)
10546 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10547 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10549 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10551 else if (offsettable_memref_p (operand))
10553 operand = adjust_address (operand, SImode, 0);
10554 parts[0] = operand;
10555 parts[1] = adjust_address (operand, SImode, 4);
10557 parts[2] = adjust_address (operand, SImode, 8);
10559 else if (GET_CODE (operand) == CONST_DOUBLE)
10564 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10568 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10569 parts[2] = gen_int_mode (l[2], SImode);
10572 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10577 parts[1] = gen_int_mode (l[1], SImode);
10578 parts[0] = gen_int_mode (l[0], SImode);
10586 if (mode == TImode)
10587 split_ti (&operand, 1, &parts[0], &parts[1]);
10588 if (mode == XFmode || mode == TFmode)
10590 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10591 if (REG_P (operand))
10593 if (!reload_completed)
10595 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10596 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10598 else if (offsettable_memref_p (operand))
10600 operand = adjust_address (operand, DImode, 0);
10601 parts[0] = operand;
10602 parts[1] = adjust_address (operand, upper_mode, 8);
10604 else if (GET_CODE (operand) == CONST_DOUBLE)
10609 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10610 real_to_target (l, &r, mode);
10611 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10612 if (HOST_BITS_PER_WIDE_INT >= 64)
10615 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10616 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10619 parts[0] = immed_double_const (l[0], l[1], DImode);
10620 if (upper_mode == SImode)
10621 parts[1] = gen_int_mode (l[2], SImode);
10622 else if (HOST_BITS_PER_WIDE_INT >= 64)
10625 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10626 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10629 parts[1] = immed_double_const (l[2], l[3], DImode);
10639 /* Emit insns to perform a move or push of DI, DF, and XF values.
10640 Return false when normal moves are needed; true when all required
10641 insns have been emitted. Operands 2-4 contain the input values
10642 int the correct order; operands 5-7 contain the output values. */
10645 ix86_split_long_move (rtx operands[])
10650 int collisions = 0;
10651 enum machine_mode mode = GET_MODE (operands[0]);
10653 /* The DFmode expanders may ask us to move double.
10654 For 64bit target this is single move. By hiding the fact
10655 here we simplify i386.md splitters. */
10656 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10658 /* Optimize constant pool reference to immediates. This is used by
10659 fp moves, that force all constants to memory to allow combining. */
10661 if (GET_CODE (operands[1]) == MEM
10662 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10663 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10664 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10665 if (push_operand (operands[0], VOIDmode))
10667 operands[0] = copy_rtx (operands[0]);
10668 PUT_MODE (operands[0], Pmode);
10671 operands[0] = gen_lowpart (DImode, operands[0]);
10672 operands[1] = gen_lowpart (DImode, operands[1]);
10673 emit_move_insn (operands[0], operands[1]);
10677 /* The only non-offsettable memory we handle is push. */
10678 if (push_operand (operands[0], VOIDmode))
10680 else if (GET_CODE (operands[0]) == MEM
10681 && ! offsettable_memref_p (operands[0]))
10684 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10685 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10687 /* When emitting push, take care for source operands on the stack. */
10688 if (push && GET_CODE (operands[1]) == MEM
10689 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10692 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10693 XEXP (part[1][2], 0));
10694 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10695 XEXP (part[1][1], 0));
10698 /* We need to do copy in the right order in case an address register
10699 of the source overlaps the destination. */
10700 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10702 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10704 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10707 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10710 /* Collision in the middle part can be handled by reordering. */
10711 if (collisions == 1 && nparts == 3
10712 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10715 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10716 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10719 /* If there are more collisions, we can't handle it by reordering.
10720 Do an lea to the last part and use only one colliding move. */
10721 else if (collisions > 1)
10727 base = part[0][nparts - 1];
10729 /* Handle the case when the last part isn't valid for lea.
10730 Happens in 64-bit mode storing the 12-byte XFmode. */
10731 if (GET_MODE (base) != Pmode)
10732 base = gen_rtx_REG (Pmode, REGNO (base));
10734 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10735 part[1][0] = replace_equiv_address (part[1][0], base);
10736 part[1][1] = replace_equiv_address (part[1][1],
10737 plus_constant (base, UNITS_PER_WORD));
10739 part[1][2] = replace_equiv_address (part[1][2],
10740 plus_constant (base, 8));
10750 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10751 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10752 emit_move_insn (part[0][2], part[1][2]);
10757 /* In 64bit mode we don't have 32bit push available. In case this is
10758 register, it is OK - we will just use larger counterpart. We also
10759 retype memory - these comes from attempt to avoid REX prefix on
10760 moving of second half of TFmode value. */
10761 if (GET_MODE (part[1][1]) == SImode)
10763 if (GET_CODE (part[1][1]) == MEM)
10764 part[1][1] = adjust_address (part[1][1], DImode, 0);
10765 else if (REG_P (part[1][1]))
10766 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10769 if (GET_MODE (part[1][0]) == SImode)
10770 part[1][0] = part[1][1];
10773 emit_move_insn (part[0][1], part[1][1]);
10774 emit_move_insn (part[0][0], part[1][0]);
10778 /* Choose correct order to not overwrite the source before it is copied. */
10779 if ((REG_P (part[0][0])
10780 && REG_P (part[1][1])
10781 && (REGNO (part[0][0]) == REGNO (part[1][1])
10783 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10785 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10789 operands[2] = part[0][2];
10790 operands[3] = part[0][1];
10791 operands[4] = part[0][0];
10792 operands[5] = part[1][2];
10793 operands[6] = part[1][1];
10794 operands[7] = part[1][0];
10798 operands[2] = part[0][1];
10799 operands[3] = part[0][0];
10800 operands[5] = part[1][1];
10801 operands[6] = part[1][0];
10808 operands[2] = part[0][0];
10809 operands[3] = part[0][1];
10810 operands[4] = part[0][2];
10811 operands[5] = part[1][0];
10812 operands[6] = part[1][1];
10813 operands[7] = part[1][2];
10817 operands[2] = part[0][0];
10818 operands[3] = part[0][1];
10819 operands[5] = part[1][0];
10820 operands[6] = part[1][1];
10823 emit_move_insn (operands[2], operands[5]);
10824 emit_move_insn (operands[3], operands[6]);
10826 emit_move_insn (operands[4], operands[7]);
10832 ix86_split_ashldi (rtx *operands, rtx scratch)
10834 rtx low[2], high[2];
10837 if (GET_CODE (operands[2]) == CONST_INT)
10839 split_di (operands, 2, low, high);
10840 count = INTVAL (operands[2]) & 63;
10844 emit_move_insn (high[0], low[1]);
10845 emit_move_insn (low[0], const0_rtx);
10848 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10852 if (!rtx_equal_p (operands[0], operands[1]))
10853 emit_move_insn (operands[0], operands[1]);
10854 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10855 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10860 if (!rtx_equal_p (operands[0], operands[1]))
10861 emit_move_insn (operands[0], operands[1]);
10863 split_di (operands, 1, low, high);
10865 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10866 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10868 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10870 if (! no_new_pseudos)
10871 scratch = force_reg (SImode, const0_rtx);
10873 emit_move_insn (scratch, const0_rtx);
10875 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10879 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10884 ix86_split_ashrdi (rtx *operands, rtx scratch)
10886 rtx low[2], high[2];
10889 if (GET_CODE (operands[2]) == CONST_INT)
10891 split_di (operands, 2, low, high);
10892 count = INTVAL (operands[2]) & 63;
10896 emit_move_insn (low[0], high[1]);
10898 if (! reload_completed)
10899 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10902 emit_move_insn (high[0], low[0]);
10903 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10907 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10911 if (!rtx_equal_p (operands[0], operands[1]))
10912 emit_move_insn (operands[0], operands[1]);
10913 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10914 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10919 if (!rtx_equal_p (operands[0], operands[1]))
10920 emit_move_insn (operands[0], operands[1]);
10922 split_di (operands, 1, low, high);
10924 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10925 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10927 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10929 if (! no_new_pseudos)
10930 scratch = gen_reg_rtx (SImode);
10931 emit_move_insn (scratch, high[0]);
10932 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10933 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10937 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10942 ix86_split_lshrdi (rtx *operands, rtx scratch)
10944 rtx low[2], high[2];
10947 if (GET_CODE (operands[2]) == CONST_INT)
10949 split_di (operands, 2, low, high);
10950 count = INTVAL (operands[2]) & 63;
10954 emit_move_insn (low[0], high[1]);
10955 emit_move_insn (high[0], const0_rtx);
10958 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10962 if (!rtx_equal_p (operands[0], operands[1]))
10963 emit_move_insn (operands[0], operands[1]);
10964 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10965 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10970 if (!rtx_equal_p (operands[0], operands[1]))
10971 emit_move_insn (operands[0], operands[1]);
10973 split_di (operands, 1, low, high);
10975 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10976 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10978 /* Heh. By reversing the arguments, we can reuse this pattern. */
10979 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10981 if (! no_new_pseudos)
10982 scratch = force_reg (SImode, const0_rtx);
10984 emit_move_insn (scratch, const0_rtx);
10986 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10990 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10994 /* Helper function for the string operations below. Dest VARIABLE whether
10995 it is aligned to VALUE bytes. If true, jump to the label. */
10997 ix86_expand_aligntest (rtx variable, int value)
10999 rtx label = gen_label_rtx ();
11000 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11001 if (GET_MODE (variable) == DImode)
11002 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11004 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11005 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11010 /* Adjust COUNTER by the VALUE. */
11012 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11014 if (GET_MODE (countreg) == DImode)
11015 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11017 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11020 /* Zero extend possibly SImode EXP to Pmode register. */
11022 ix86_zero_extend_to_Pmode (rtx exp)
11025 if (GET_MODE (exp) == VOIDmode)
11026 return force_reg (Pmode, exp);
11027 if (GET_MODE (exp) == Pmode)
11028 return copy_to_mode_reg (Pmode, exp);
11029 r = gen_reg_rtx (Pmode);
11030 emit_insn (gen_zero_extendsidi2 (r, exp));
11034 /* Expand string move (memcpy) operation. Use i386 string operations when
11035 profitable. expand_clrstr contains similar code. */
11037 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11039 rtx srcreg, destreg, countreg, srcexp, destexp;
11040 enum machine_mode counter_mode;
11041 HOST_WIDE_INT align = 0;
11042 unsigned HOST_WIDE_INT count = 0;
11044 if (GET_CODE (align_exp) == CONST_INT)
11045 align = INTVAL (align_exp);
11047 /* Can't use any of this if the user has appropriated esi or edi. */
11048 if (global_regs[4] || global_regs[5])
11051 /* This simple hack avoids all inlining code and simplifies code below. */
11052 if (!TARGET_ALIGN_STRINGOPS)
11055 if (GET_CODE (count_exp) == CONST_INT)
11057 count = INTVAL (count_exp);
11058 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11062 /* Figure out proper mode for counter. For 32bits it is always SImode,
11063 for 64bits use SImode when possible, otherwise DImode.
11064 Set count to number of bytes copied when known at compile time. */
11065 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11066 || x86_64_zero_extended_value (count_exp))
11067 counter_mode = SImode;
11069 counter_mode = DImode;
11071 if (counter_mode != SImode && counter_mode != DImode)
11074 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11075 if (destreg != XEXP (dst, 0))
11076 dst = replace_equiv_address_nv (dst, destreg);
11077 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11078 if (srcreg != XEXP (src, 0))
11079 src = replace_equiv_address_nv (src, srcreg);
11081 /* When optimizing for size emit simple rep ; movsb instruction for
11082 counts not divisible by 4. */
11084 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11086 emit_insn (gen_cld ());
11087 countreg = ix86_zero_extend_to_Pmode (count_exp);
11088 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11089 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11090 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11094 /* For constant aligned (or small unaligned) copies use rep movsl
11095 followed by code copying the rest. For PentiumPro ensure 8 byte
11096 alignment to allow rep movsl acceleration. */
11098 else if (count != 0
11100 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11101 || optimize_size || count < (unsigned int) 64))
11103 unsigned HOST_WIDE_INT offset = 0;
11104 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11105 rtx srcmem, dstmem;
11107 emit_insn (gen_cld ());
11108 if (count & ~(size - 1))
11110 countreg = copy_to_mode_reg (counter_mode,
11111 GEN_INT ((count >> (size == 4 ? 2 : 3))
11112 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11113 countreg = ix86_zero_extend_to_Pmode (countreg);
11115 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11116 GEN_INT (size == 4 ? 2 : 3));
11117 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11118 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11120 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11121 countreg, destexp, srcexp));
11122 offset = count & ~(size - 1);
11124 if (size == 8 && (count & 0x04))
11126 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11128 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11130 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11135 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11137 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11139 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11144 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11146 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11148 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11151 /* The generic code based on the glibc implementation:
11152 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11153 allowing accelerated copying there)
11154 - copy the data using rep movsl
11155 - copy the rest. */
11160 rtx srcmem, dstmem;
11161 int desired_alignment = (TARGET_PENTIUMPRO
11162 && (count == 0 || count >= (unsigned int) 260)
11163 ? 8 : UNITS_PER_WORD);
11164 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11165 dst = change_address (dst, BLKmode, destreg);
11166 src = change_address (src, BLKmode, srcreg);
11168 /* In case we don't know anything about the alignment, default to
11169 library version, since it is usually equally fast and result in
11172 Also emit call when we know that the count is large and call overhead
11173 will not be important. */
11174 if (!TARGET_INLINE_ALL_STRINGOPS
11175 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11178 if (TARGET_SINGLE_STRINGOP)
11179 emit_insn (gen_cld ());
11181 countreg2 = gen_reg_rtx (Pmode);
11182 countreg = copy_to_mode_reg (counter_mode, count_exp);
11184 /* We don't use loops to align destination and to copy parts smaller
11185 than 4 bytes, because gcc is able to optimize such code better (in
11186 the case the destination or the count really is aligned, gcc is often
11187 able to predict the branches) and also it is friendlier to the
11188 hardware branch prediction.
11190 Using loops is beneficial for generic case, because we can
11191 handle small counts using the loops. Many CPUs (such as Athlon)
11192 have large REP prefix setup costs.
11194 This is quite costly. Maybe we can revisit this decision later or
11195 add some customizability to this code. */
11197 if (count == 0 && align < desired_alignment)
11199 label = gen_label_rtx ();
11200 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11201 LEU, 0, counter_mode, 1, label);
11205 rtx label = ix86_expand_aligntest (destreg, 1);
11206 srcmem = change_address (src, QImode, srcreg);
11207 dstmem = change_address (dst, QImode, destreg);
11208 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11209 ix86_adjust_counter (countreg, 1);
11210 emit_label (label);
11211 LABEL_NUSES (label) = 1;
11215 rtx label = ix86_expand_aligntest (destreg, 2);
11216 srcmem = change_address (src, HImode, srcreg);
11217 dstmem = change_address (dst, HImode, destreg);
11218 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11219 ix86_adjust_counter (countreg, 2);
11220 emit_label (label);
11221 LABEL_NUSES (label) = 1;
11223 if (align <= 4 && desired_alignment > 4)
11225 rtx label = ix86_expand_aligntest (destreg, 4);
11226 srcmem = change_address (src, SImode, srcreg);
11227 dstmem = change_address (dst, SImode, destreg);
11228 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11229 ix86_adjust_counter (countreg, 4);
11230 emit_label (label);
11231 LABEL_NUSES (label) = 1;
11234 if (label && desired_alignment > 4 && !TARGET_64BIT)
11236 emit_label (label);
11237 LABEL_NUSES (label) = 1;
11240 if (!TARGET_SINGLE_STRINGOP)
11241 emit_insn (gen_cld ());
11244 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11246 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11250 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11251 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11253 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11254 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11255 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11256 countreg2, destexp, srcexp));
11260 emit_label (label);
11261 LABEL_NUSES (label) = 1;
11263 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11265 srcmem = change_address (src, SImode, srcreg);
11266 dstmem = change_address (dst, SImode, destreg);
11267 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11269 if ((align <= 4 || count == 0) && TARGET_64BIT)
11271 rtx label = ix86_expand_aligntest (countreg, 4);
11272 srcmem = change_address (src, SImode, srcreg);
11273 dstmem = change_address (dst, SImode, destreg);
11274 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11275 emit_label (label);
11276 LABEL_NUSES (label) = 1;
11278 if (align > 2 && count != 0 && (count & 2))
11280 srcmem = change_address (src, HImode, srcreg);
11281 dstmem = change_address (dst, HImode, destreg);
11282 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11284 if (align <= 2 || count == 0)
11286 rtx label = ix86_expand_aligntest (countreg, 2);
11287 srcmem = change_address (src, HImode, srcreg);
11288 dstmem = change_address (dst, HImode, destreg);
11289 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11290 emit_label (label);
11291 LABEL_NUSES (label) = 1;
11293 if (align > 1 && count != 0 && (count & 1))
11295 srcmem = change_address (src, QImode, srcreg);
11296 dstmem = change_address (dst, QImode, destreg);
11297 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11299 if (align <= 1 || count == 0)
11301 rtx label = ix86_expand_aligntest (countreg, 1);
11302 srcmem = change_address (src, QImode, srcreg);
11303 dstmem = change_address (dst, QImode, destreg);
11304 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11305 emit_label (label);
11306 LABEL_NUSES (label) = 1;
11313 /* Expand string clear operation (bzero). Use i386 string operations when
11314 profitable. expand_movstr contains similar code. */
11316 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11318 rtx destreg, zeroreg, countreg, destexp;
11319 enum machine_mode counter_mode;
11320 HOST_WIDE_INT align = 0;
11321 unsigned HOST_WIDE_INT count = 0;
11323 if (GET_CODE (align_exp) == CONST_INT)
11324 align = INTVAL (align_exp);
11326 /* Can't use any of this if the user has appropriated esi. */
11327 if (global_regs[4])
11330 /* This simple hack avoids all inlining code and simplifies code below. */
11331 if (!TARGET_ALIGN_STRINGOPS)
11334 if (GET_CODE (count_exp) == CONST_INT)
11336 count = INTVAL (count_exp);
11337 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11340 /* Figure out proper mode for counter. For 32bits it is always SImode,
11341 for 64bits use SImode when possible, otherwise DImode.
11342 Set count to number of bytes copied when known at compile time. */
11343 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11344 || x86_64_zero_extended_value (count_exp))
11345 counter_mode = SImode;
11347 counter_mode = DImode;
11349 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11350 if (destreg != XEXP (dst, 0))
11351 dst = replace_equiv_address_nv (dst, destreg);
11353 emit_insn (gen_cld ());
11355 /* When optimizing for size emit simple rep ; movsb instruction for
11356 counts not divisible by 4. */
11358 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11360 countreg = ix86_zero_extend_to_Pmode (count_exp);
11361 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11362 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11363 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11365 else if (count != 0
11367 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11368 || optimize_size || count < (unsigned int) 64))
11370 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11371 unsigned HOST_WIDE_INT offset = 0;
11373 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11374 if (count & ~(size - 1))
11376 countreg = copy_to_mode_reg (counter_mode,
11377 GEN_INT ((count >> (size == 4 ? 2 : 3))
11378 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11379 countreg = ix86_zero_extend_to_Pmode (countreg);
11380 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11381 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11382 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11383 offset = count & ~(size - 1);
11385 if (size == 8 && (count & 0x04))
11387 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11389 emit_insn (gen_strset (destreg, mem,
11390 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11395 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11397 emit_insn (gen_strset (destreg, mem,
11398 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11403 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11405 emit_insn (gen_strset (destreg, mem,
11406 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11413 /* Compute desired alignment of the string operation. */
11414 int desired_alignment = (TARGET_PENTIUMPRO
11415 && (count == 0 || count >= (unsigned int) 260)
11416 ? 8 : UNITS_PER_WORD);
11418 /* In case we don't know anything about the alignment, default to
11419 library version, since it is usually equally fast and result in
11422 Also emit call when we know that the count is large and call overhead
11423 will not be important. */
11424 if (!TARGET_INLINE_ALL_STRINGOPS
11425 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11428 if (TARGET_SINGLE_STRINGOP)
11429 emit_insn (gen_cld ());
11431 countreg2 = gen_reg_rtx (Pmode);
11432 countreg = copy_to_mode_reg (counter_mode, count_exp);
11433 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11434 /* Get rid of MEM_OFFSET, it won't be accurate. */
11435 dst = change_address (dst, BLKmode, destreg);
11437 if (count == 0 && align < desired_alignment)
11439 label = gen_label_rtx ();
11440 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11441 LEU, 0, counter_mode, 1, label);
11445 rtx label = ix86_expand_aligntest (destreg, 1);
11446 emit_insn (gen_strset (destreg, dst,
11447 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11448 ix86_adjust_counter (countreg, 1);
11449 emit_label (label);
11450 LABEL_NUSES (label) = 1;
11454 rtx label = ix86_expand_aligntest (destreg, 2);
11455 emit_insn (gen_strset (destreg, dst,
11456 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11457 ix86_adjust_counter (countreg, 2);
11458 emit_label (label);
11459 LABEL_NUSES (label) = 1;
11461 if (align <= 4 && desired_alignment > 4)
11463 rtx label = ix86_expand_aligntest (destreg, 4);
11464 emit_insn (gen_strset (destreg, dst,
11466 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11468 ix86_adjust_counter (countreg, 4);
11469 emit_label (label);
11470 LABEL_NUSES (label) = 1;
11473 if (label && desired_alignment > 4 && !TARGET_64BIT)
11475 emit_label (label);
11476 LABEL_NUSES (label) = 1;
11480 if (!TARGET_SINGLE_STRINGOP)
11481 emit_insn (gen_cld ());
11484 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11486 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11490 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11491 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11493 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11494 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11498 emit_label (label);
11499 LABEL_NUSES (label) = 1;
11502 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11503 emit_insn (gen_strset (destreg, dst,
11504 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11505 if (TARGET_64BIT && (align <= 4 || count == 0))
11507 rtx label = ix86_expand_aligntest (countreg, 4);
11508 emit_insn (gen_strset (destreg, dst,
11509 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11510 emit_label (label);
11511 LABEL_NUSES (label) = 1;
11513 if (align > 2 && count != 0 && (count & 2))
11514 emit_insn (gen_strset (destreg, dst,
11515 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11516 if (align <= 2 || count == 0)
11518 rtx label = ix86_expand_aligntest (countreg, 2);
11519 emit_insn (gen_strset (destreg, dst,
11520 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11521 emit_label (label);
11522 LABEL_NUSES (label) = 1;
11524 if (align > 1 && count != 0 && (count & 1))
11525 emit_insn (gen_strset (destreg, dst,
11526 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11527 if (align <= 1 || count == 0)
11529 rtx label = ix86_expand_aligntest (countreg, 1);
11530 emit_insn (gen_strset (destreg, dst,
11531 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11532 emit_label (label);
11533 LABEL_NUSES (label) = 1;
11539 /* Expand strlen. */
11541 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11543 rtx addr, scratch1, scratch2, scratch3, scratch4;
11545 /* The generic case of strlen expander is long. Avoid it's
11546 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11548 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11549 && !TARGET_INLINE_ALL_STRINGOPS
11551 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11554 addr = force_reg (Pmode, XEXP (src, 0));
11555 scratch1 = gen_reg_rtx (Pmode);
11557 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11560 /* Well it seems that some optimizer does not combine a call like
11561 foo(strlen(bar), strlen(bar));
11562 when the move and the subtraction is done here. It does calculate
11563 the length just once when these instructions are done inside of
11564 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11565 often used and I use one fewer register for the lifetime of
11566 output_strlen_unroll() this is better. */
11568 emit_move_insn (out, addr);
11570 ix86_expand_strlensi_unroll_1 (out, src, align);
11572 /* strlensi_unroll_1 returns the address of the zero at the end of
11573 the string, like memchr(), so compute the length by subtracting
11574 the start address. */
11576 emit_insn (gen_subdi3 (out, out, addr));
11578 emit_insn (gen_subsi3 (out, out, addr));
11583 scratch2 = gen_reg_rtx (Pmode);
11584 scratch3 = gen_reg_rtx (Pmode);
11585 scratch4 = force_reg (Pmode, constm1_rtx);
11587 emit_move_insn (scratch3, addr);
11588 eoschar = force_reg (QImode, eoschar);
11590 emit_insn (gen_cld ());
11591 src = replace_equiv_address_nv (src, scratch3);
11593 /* If .md starts supporting :P, this can be done in .md. */
11594 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11595 scratch4), UNSPEC_SCAS);
11596 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11599 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11600 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11604 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11605 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11611 /* Expand the appropriate insns for doing strlen if not just doing
11614 out = result, initialized with the start address
11615 align_rtx = alignment of the address.
11616 scratch = scratch register, initialized with the startaddress when
11617 not aligned, otherwise undefined
11619 This is just the body. It needs the initializations mentioned above and
11620 some address computing at the end. These things are done in i386.md. */
11623 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11627 rtx align_2_label = NULL_RTX;
11628 rtx align_3_label = NULL_RTX;
11629 rtx align_4_label = gen_label_rtx ();
11630 rtx end_0_label = gen_label_rtx ();
11632 rtx tmpreg = gen_reg_rtx (SImode);
11633 rtx scratch = gen_reg_rtx (SImode);
11637 if (GET_CODE (align_rtx) == CONST_INT)
11638 align = INTVAL (align_rtx);
11640 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11642 /* Is there a known alignment and is it less than 4? */
11645 rtx scratch1 = gen_reg_rtx (Pmode);
11646 emit_move_insn (scratch1, out);
11647 /* Is there a known alignment and is it not 2? */
11650 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11651 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11653 /* Leave just the 3 lower bits. */
11654 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11655 NULL_RTX, 0, OPTAB_WIDEN);
11657 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11658 Pmode, 1, align_4_label);
11659 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11660 Pmode, 1, align_2_label);
11661 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11662 Pmode, 1, align_3_label);
11666 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11667 check if is aligned to 4 - byte. */
11669 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11670 NULL_RTX, 0, OPTAB_WIDEN);
11672 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11673 Pmode, 1, align_4_label);
11676 mem = change_address (src, QImode, out);
11678 /* Now compare the bytes. */
11680 /* Compare the first n unaligned byte on a byte per byte basis. */
11681 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11682 QImode, 1, end_0_label);
11684 /* Increment the address. */
11686 emit_insn (gen_adddi3 (out, out, const1_rtx));
11688 emit_insn (gen_addsi3 (out, out, const1_rtx));
11690 /* Not needed with an alignment of 2 */
11693 emit_label (align_2_label);
11695 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11699 emit_insn (gen_adddi3 (out, out, const1_rtx));
11701 emit_insn (gen_addsi3 (out, out, const1_rtx));
11703 emit_label (align_3_label);
11706 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11710 emit_insn (gen_adddi3 (out, out, const1_rtx));
11712 emit_insn (gen_addsi3 (out, out, const1_rtx));
11715 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11716 align this loop. It gives only huge programs, but does not help to
11718 emit_label (align_4_label);
11720 mem = change_address (src, SImode, out);
11721 emit_move_insn (scratch, mem);
11723 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11725 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11727 /* This formula yields a nonzero result iff one of the bytes is zero.
11728 This saves three branches inside loop and many cycles. */
11730 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11731 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11732 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11733 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11734 gen_int_mode (0x80808080, SImode)));
11735 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11740 rtx reg = gen_reg_rtx (SImode);
11741 rtx reg2 = gen_reg_rtx (Pmode);
11742 emit_move_insn (reg, tmpreg);
11743 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11745 /* If zero is not in the first two bytes, move two bytes forward. */
11746 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11747 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11748 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11749 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11750 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11753 /* Emit lea manually to avoid clobbering of flags. */
11754 emit_insn (gen_rtx_SET (SImode, reg2,
11755 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11758 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11759 emit_insn (gen_rtx_SET (VOIDmode, out,
11760 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11767 rtx end_2_label = gen_label_rtx ();
11768 /* Is zero in the first two bytes? */
11770 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11771 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11772 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11773 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11774 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11776 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11777 JUMP_LABEL (tmp) = end_2_label;
11779 /* Not in the first two. Move two bytes forward. */
11780 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11782 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11784 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11786 emit_label (end_2_label);
11790 /* Avoid branch in fixing the byte. */
11791 tmpreg = gen_lowpart (QImode, tmpreg);
11792 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11793 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11795 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11797 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11799 emit_label (end_0_label);
11803 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11804 rtx callarg2 ATTRIBUTE_UNUSED,
11805 rtx pop, int sibcall)
11807 rtx use = NULL, call;
11809 if (pop == const0_rtx)
11811 if (TARGET_64BIT && pop)
11815 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11816 fnaddr = machopic_indirect_call_target (fnaddr);
11818 /* Static functions and indirect calls don't need the pic register. */
11819 if (! TARGET_64BIT && flag_pic
11820 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11821 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11822 use_reg (&use, pic_offset_table_rtx);
11824 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11826 rtx al = gen_rtx_REG (QImode, 0);
11827 emit_move_insn (al, callarg2);
11828 use_reg (&use, al);
11830 #endif /* TARGET_MACHO */
11832 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11834 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11835 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11837 if (sibcall && TARGET_64BIT
11838 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11841 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11842 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11843 emit_move_insn (fnaddr, addr);
11844 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11847 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11849 call = gen_rtx_SET (VOIDmode, retval, call);
11852 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11853 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11854 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11857 call = emit_call_insn (call);
11859 CALL_INSN_FUNCTION_USAGE (call) = use;
11863 /* Clear stack slot assignments remembered from previous functions.
11864 This is called from INIT_EXPANDERS once before RTL is emitted for each
11867 static struct machine_function *
11868 ix86_init_machine_status (void)
11870 struct machine_function *f;
11872 f = ggc_alloc_cleared (sizeof (struct machine_function));
11873 f->use_fast_prologue_epilogue_nregs = -1;
11878 /* Return a MEM corresponding to a stack slot with mode MODE.
11879 Allocate a new slot if necessary.
11881 The RTL for a function can have several slots available: N is
11882 which slot to use. */
11885 assign_386_stack_local (enum machine_mode mode, int n)
11887 struct stack_local_entry *s;
11889 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11892 for (s = ix86_stack_locals; s; s = s->next)
11893 if (s->mode == mode && s->n == n)
11896 s = (struct stack_local_entry *)
11897 ggc_alloc (sizeof (struct stack_local_entry));
11900 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11902 s->next = ix86_stack_locals;
11903 ix86_stack_locals = s;
11907 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11909 static GTY(()) rtx ix86_tls_symbol;
11911 ix86_tls_get_addr (void)
11914 if (!ix86_tls_symbol)
11916 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11917 (TARGET_GNU_TLS && !TARGET_64BIT)
11918 ? "___tls_get_addr"
11919 : "__tls_get_addr");
11922 return ix86_tls_symbol;
11925 /* Calculate the length of the memory address in the instruction
11926 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11929 memory_address_length (rtx addr)
11931 struct ix86_address parts;
11932 rtx base, index, disp;
11935 if (GET_CODE (addr) == PRE_DEC
11936 || GET_CODE (addr) == POST_INC
11937 || GET_CODE (addr) == PRE_MODIFY
11938 || GET_CODE (addr) == POST_MODIFY)
11941 if (! ix86_decompose_address (addr, &parts))
11945 index = parts.index;
11950 - esp as the base always wants an index,
11951 - ebp as the base always wants a displacement. */
11953 /* Register Indirect. */
11954 if (base && !index && !disp)
11956 /* esp (for its index) and ebp (for its displacement) need
11957 the two-byte modrm form. */
11958 if (addr == stack_pointer_rtx
11959 || addr == arg_pointer_rtx
11960 || addr == frame_pointer_rtx
11961 || addr == hard_frame_pointer_rtx)
11965 /* Direct Addressing. */
11966 else if (disp && !base && !index)
11971 /* Find the length of the displacement constant. */
11974 if (GET_CODE (disp) == CONST_INT
11975 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11981 /* ebp always wants a displacement. */
11982 else if (base == hard_frame_pointer_rtx)
11985 /* An index requires the two-byte modrm form.... */
11987 /* ...like esp, which always wants an index. */
11988 || base == stack_pointer_rtx
11989 || base == arg_pointer_rtx
11990 || base == frame_pointer_rtx)
11997 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11998 is set, expect that insn have 8bit immediate alternative. */
12000 ix86_attr_length_immediate_default (rtx insn, int shortform)
12004 extract_insn_cached (insn);
12005 for (i = recog_data.n_operands - 1; i >= 0; --i)
12006 if (CONSTANT_P (recog_data.operand[i]))
12011 && GET_CODE (recog_data.operand[i]) == CONST_INT
12012 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12016 switch (get_attr_mode (insn))
12027 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12032 fatal_insn ("unknown insn mode", insn);
12038 /* Compute default value for "length_address" attribute. */
12040 ix86_attr_length_address_default (rtx insn)
12044 if (get_attr_type (insn) == TYPE_LEA)
12046 rtx set = PATTERN (insn);
12047 if (GET_CODE (set) == SET)
12049 else if (GET_CODE (set) == PARALLEL
12050 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12051 set = XVECEXP (set, 0, 0);
12054 #ifdef ENABLE_CHECKING
12060 return memory_address_length (SET_SRC (set));
12063 extract_insn_cached (insn);
12064 for (i = recog_data.n_operands - 1; i >= 0; --i)
12065 if (GET_CODE (recog_data.operand[i]) == MEM)
12067 return memory_address_length (XEXP (recog_data.operand[i], 0));
12073 /* Return the maximum number of instructions a cpu can issue. */
12076 ix86_issue_rate (void)
12080 case PROCESSOR_PENTIUM:
12084 case PROCESSOR_PENTIUMPRO:
12085 case PROCESSOR_PENTIUM4:
12086 case PROCESSOR_ATHLON:
12095 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12096 by DEP_INSN and nothing set by DEP_INSN. */
12099 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12103 /* Simplify the test for uninteresting insns. */
12104 if (insn_type != TYPE_SETCC
12105 && insn_type != TYPE_ICMOV
12106 && insn_type != TYPE_FCMOV
12107 && insn_type != TYPE_IBR)
12110 if ((set = single_set (dep_insn)) != 0)
12112 set = SET_DEST (set);
12115 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12116 && XVECLEN (PATTERN (dep_insn), 0) == 2
12117 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12118 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12120 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12121 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12126 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12129 /* This test is true if the dependent insn reads the flags but
12130 not any other potentially set register. */
12131 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12134 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12140 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12141 address with operands set by DEP_INSN. */
12144 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12148 if (insn_type == TYPE_LEA
12151 addr = PATTERN (insn);
12152 if (GET_CODE (addr) == SET)
12154 else if (GET_CODE (addr) == PARALLEL
12155 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12156 addr = XVECEXP (addr, 0, 0);
12159 addr = SET_SRC (addr);
12164 extract_insn_cached (insn);
12165 for (i = recog_data.n_operands - 1; i >= 0; --i)
12166 if (GET_CODE (recog_data.operand[i]) == MEM)
12168 addr = XEXP (recog_data.operand[i], 0);
12175 return modified_in_p (addr, dep_insn);
12179 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12181 enum attr_type insn_type, dep_insn_type;
12182 enum attr_memory memory, dep_memory;
12184 int dep_insn_code_number;
12186 /* Anti and output dependencies have zero cost on all CPUs. */
12187 if (REG_NOTE_KIND (link) != 0)
12190 dep_insn_code_number = recog_memoized (dep_insn);
12192 /* If we can't recognize the insns, we can't really do anything. */
12193 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12196 insn_type = get_attr_type (insn);
12197 dep_insn_type = get_attr_type (dep_insn);
12201 case PROCESSOR_PENTIUM:
12202 /* Address Generation Interlock adds a cycle of latency. */
12203 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12206 /* ??? Compares pair with jump/setcc. */
12207 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12210 /* Floating point stores require value to be ready one cycle earlier. */
12211 if (insn_type == TYPE_FMOV
12212 && get_attr_memory (insn) == MEMORY_STORE
12213 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12217 case PROCESSOR_PENTIUMPRO:
12218 memory = get_attr_memory (insn);
12219 dep_memory = get_attr_memory (dep_insn);
12221 /* Since we can't represent delayed latencies of load+operation,
12222 increase the cost here for non-imov insns. */
12223 if (dep_insn_type != TYPE_IMOV
12224 && dep_insn_type != TYPE_FMOV
12225 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12228 /* INT->FP conversion is expensive. */
12229 if (get_attr_fp_int_src (dep_insn))
12232 /* There is one cycle extra latency between an FP op and a store. */
12233 if (insn_type == TYPE_FMOV
12234 && (set = single_set (dep_insn)) != NULL_RTX
12235 && (set2 = single_set (insn)) != NULL_RTX
12236 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12237 && GET_CODE (SET_DEST (set2)) == MEM)
12240 /* Show ability of reorder buffer to hide latency of load by executing
12241 in parallel with previous instruction in case
12242 previous instruction is not needed to compute the address. */
12243 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12244 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12246 /* Claim moves to take one cycle, as core can issue one load
12247 at time and the next load can start cycle later. */
12248 if (dep_insn_type == TYPE_IMOV
12249 || dep_insn_type == TYPE_FMOV)
12257 memory = get_attr_memory (insn);
12258 dep_memory = get_attr_memory (dep_insn);
12259 /* The esp dependency is resolved before the instruction is really
12261 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12262 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12265 /* Since we can't represent delayed latencies of load+operation,
12266 increase the cost here for non-imov insns. */
12267 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12268 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12270 /* INT->FP conversion is expensive. */
12271 if (get_attr_fp_int_src (dep_insn))
12274 /* Show ability of reorder buffer to hide latency of load by executing
12275 in parallel with previous instruction in case
12276 previous instruction is not needed to compute the address. */
12277 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12278 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12280 /* Claim moves to take one cycle, as core can issue one load
12281 at time and the next load can start cycle later. */
12282 if (dep_insn_type == TYPE_IMOV
12283 || dep_insn_type == TYPE_FMOV)
12292 case PROCESSOR_ATHLON:
12294 memory = get_attr_memory (insn);
12295 dep_memory = get_attr_memory (dep_insn);
12297 /* Show ability of reorder buffer to hide latency of load by executing
12298 in parallel with previous instruction in case
12299 previous instruction is not needed to compute the address. */
12300 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12301 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12303 enum attr_unit unit = get_attr_unit (insn);
12306 /* Because of the difference between the length of integer and
12307 floating unit pipeline preparation stages, the memory operands
12308 for floating point are cheaper.
12310 ??? For Athlon it the difference is most probably 2. */
12311 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12314 loadcost = TARGET_ATHLON ? 2 : 0;
12316 if (cost >= loadcost)
12331 struct ppro_sched_data
12334 int issued_this_cycle;
12338 static enum attr_ppro_uops
12339 ix86_safe_ppro_uops (rtx insn)
12341 if (recog_memoized (insn) >= 0)
12342 return get_attr_ppro_uops (insn);
12344 return PPRO_UOPS_MANY;
12348 ix86_dump_ppro_packet (FILE *dump)
12350 if (ix86_sched_data.ppro.decode[0])
12352 fprintf (dump, "PPRO packet: %d",
12353 INSN_UID (ix86_sched_data.ppro.decode[0]));
12354 if (ix86_sched_data.ppro.decode[1])
12355 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12356 if (ix86_sched_data.ppro.decode[2])
12357 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12358 fputc ('\n', dump);
12362 /* We're beginning a new block. Initialize data structures as necessary. */
12365 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12366 int sched_verbose ATTRIBUTE_UNUSED,
12367 int veclen ATTRIBUTE_UNUSED)
12369 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12372 /* Shift INSN to SLOT, and shift everything else down. */
12375 ix86_reorder_insn (rtx *insnp, rtx *slot)
12381 insnp[0] = insnp[1];
12382 while (++insnp != slot);
12388 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12391 enum attr_ppro_uops cur_uops;
12392 int issued_this_cycle;
12396 /* At this point .ppro.decode contains the state of the three
12397 decoders from last "cycle". That is, those insns that were
12398 actually independent. But here we're scheduling for the
12399 decoder, and we may find things that are decodable in the
12402 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12403 issued_this_cycle = 0;
12406 cur_uops = ix86_safe_ppro_uops (*insnp);
12408 /* If the decoders are empty, and we've a complex insn at the
12409 head of the priority queue, let it issue without complaint. */
12410 if (decode[0] == NULL)
12412 if (cur_uops == PPRO_UOPS_MANY)
12414 decode[0] = *insnp;
12418 /* Otherwise, search for a 2-4 uop unsn to issue. */
12419 while (cur_uops != PPRO_UOPS_FEW)
12421 if (insnp == ready)
12423 cur_uops = ix86_safe_ppro_uops (*--insnp);
12426 /* If so, move it to the head of the line. */
12427 if (cur_uops == PPRO_UOPS_FEW)
12428 ix86_reorder_insn (insnp, e_ready);
12430 /* Issue the head of the queue. */
12431 issued_this_cycle = 1;
12432 decode[0] = *e_ready--;
12435 /* Look for simple insns to fill in the other two slots. */
12436 for (i = 1; i < 3; ++i)
12437 if (decode[i] == NULL)
12439 if (ready > e_ready)
12443 cur_uops = ix86_safe_ppro_uops (*insnp);
12444 while (cur_uops != PPRO_UOPS_ONE)
12446 if (insnp == ready)
12448 cur_uops = ix86_safe_ppro_uops (*--insnp);
12451 /* Found one. Move it to the head of the queue and issue it. */
12452 if (cur_uops == PPRO_UOPS_ONE)
12454 ix86_reorder_insn (insnp, e_ready);
12455 decode[i] = *e_ready--;
12456 issued_this_cycle++;
12460 /* ??? Didn't find one. Ideally, here we would do a lazy split
12461 of 2-uop insns, issue one and queue the other. */
12465 if (issued_this_cycle == 0)
12466 issued_this_cycle = 1;
12467 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12470 /* We are about to being issuing insns for this clock cycle.
12471 Override the default sort algorithm to better slot instructions. */
12473 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12474 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12475 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12477 int n_ready = *n_readyp;
12478 rtx *e_ready = ready + n_ready - 1;
12480 /* Make sure to go ahead and initialize key items in
12481 ix86_sched_data if we are not going to bother trying to
12482 reorder the ready queue. */
12485 ix86_sched_data.ppro.issued_this_cycle = 1;
12494 case PROCESSOR_PENTIUMPRO:
12495 ix86_sched_reorder_ppro (ready, e_ready);
12500 return ix86_issue_rate ();
12503 /* We are about to issue INSN. Return the number of insns left on the
12504 ready queue that can be issued this cycle. */
12507 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12508 int can_issue_more)
12514 return can_issue_more - 1;
12516 case PROCESSOR_PENTIUMPRO:
12518 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12520 if (uops == PPRO_UOPS_MANY)
12523 ix86_dump_ppro_packet (dump);
12524 ix86_sched_data.ppro.decode[0] = insn;
12525 ix86_sched_data.ppro.decode[1] = NULL;
12526 ix86_sched_data.ppro.decode[2] = NULL;
12528 ix86_dump_ppro_packet (dump);
12529 ix86_sched_data.ppro.decode[0] = NULL;
12531 else if (uops == PPRO_UOPS_FEW)
12534 ix86_dump_ppro_packet (dump);
12535 ix86_sched_data.ppro.decode[0] = insn;
12536 ix86_sched_data.ppro.decode[1] = NULL;
12537 ix86_sched_data.ppro.decode[2] = NULL;
12541 for (i = 0; i < 3; ++i)
12542 if (ix86_sched_data.ppro.decode[i] == NULL)
12544 ix86_sched_data.ppro.decode[i] = insn;
12552 ix86_dump_ppro_packet (dump);
12553 ix86_sched_data.ppro.decode[0] = NULL;
12554 ix86_sched_data.ppro.decode[1] = NULL;
12555 ix86_sched_data.ppro.decode[2] = NULL;
12559 return --ix86_sched_data.ppro.issued_this_cycle;
12564 ia32_use_dfa_pipeline_interface (void)
12566 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12571 /* How many alternative schedules to try. This should be as wide as the
12572 scheduling freedom in the DFA, but no wider. Making this value too
12573 large results extra work for the scheduler. */
12576 ia32_multipass_dfa_lookahead (void)
12578 if (ix86_tune == PROCESSOR_PENTIUM)
12585 /* Compute the alignment given to a constant that is being placed in memory.
12586 EXP is the constant and ALIGN is the alignment that the object would
12588 The value of this function is used instead of that alignment to align
12592 ix86_constant_alignment (tree exp, int align)
12594 if (TREE_CODE (exp) == REAL_CST)
12596 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12598 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12601 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12602 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12603 return BITS_PER_WORD;
12608 /* Compute the alignment for a static variable.
12609 TYPE is the data type, and ALIGN is the alignment that
12610 the object would ordinarily have. The value of this function is used
12611 instead of that alignment to align the object. */
12614 ix86_data_alignment (tree type, int align)
12616 if (AGGREGATE_TYPE_P (type)
12617 && TYPE_SIZE (type)
12618 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12619 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12620 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12623 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12624 to 16byte boundary. */
12627 if (AGGREGATE_TYPE_P (type)
12628 && TYPE_SIZE (type)
12629 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12630 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12631 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12635 if (TREE_CODE (type) == ARRAY_TYPE)
12637 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12639 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12642 else if (TREE_CODE (type) == COMPLEX_TYPE)
12645 if (TYPE_MODE (type) == DCmode && align < 64)
12647 if (TYPE_MODE (type) == XCmode && align < 128)
12650 else if ((TREE_CODE (type) == RECORD_TYPE
12651 || TREE_CODE (type) == UNION_TYPE
12652 || TREE_CODE (type) == QUAL_UNION_TYPE)
12653 && TYPE_FIELDS (type))
12655 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12657 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12660 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12661 || TREE_CODE (type) == INTEGER_TYPE)
12663 if (TYPE_MODE (type) == DFmode && align < 64)
12665 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12672 /* Compute the alignment for a local variable.
12673 TYPE is the data type, and ALIGN is the alignment that
12674 the object would ordinarily have. The value of this macro is used
12675 instead of that alignment to align the object. */
12678 ix86_local_alignment (tree type, int align)
12680 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12681 to 16byte boundary. */
12684 if (AGGREGATE_TYPE_P (type)
12685 && TYPE_SIZE (type)
12686 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12687 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12688 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12691 if (TREE_CODE (type) == ARRAY_TYPE)
12693 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12695 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12698 else if (TREE_CODE (type) == COMPLEX_TYPE)
12700 if (TYPE_MODE (type) == DCmode && align < 64)
12702 if (TYPE_MODE (type) == XCmode && align < 128)
12705 else if ((TREE_CODE (type) == RECORD_TYPE
12706 || TREE_CODE (type) == UNION_TYPE
12707 || TREE_CODE (type) == QUAL_UNION_TYPE)
12708 && TYPE_FIELDS (type))
12710 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12712 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12715 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12716 || TREE_CODE (type) == INTEGER_TYPE)
12719 if (TYPE_MODE (type) == DFmode && align < 64)
12721 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12727 /* Emit RTL insns to initialize the variable parts of a trampoline.
12728 FNADDR is an RTX for the address of the function's pure code.
12729 CXT is an RTX for the static chain value for the function. */
12731 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12735 /* Compute offset from the end of the jmp to the target function. */
12736 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12737 plus_constant (tramp, 10),
12738 NULL_RTX, 1, OPTAB_DIRECT);
12739 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12740 gen_int_mode (0xb9, QImode));
12741 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12742 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12743 gen_int_mode (0xe9, QImode));
12744 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12749 /* Try to load address using shorter movl instead of movabs.
12750 We may want to support movq for kernel mode, but kernel does not use
12751 trampolines at the moment. */
12752 if (x86_64_zero_extended_value (fnaddr))
12754 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12755 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12756 gen_int_mode (0xbb41, HImode));
12757 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12758 gen_lowpart (SImode, fnaddr));
12763 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12764 gen_int_mode (0xbb49, HImode));
12765 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12769 /* Load static chain using movabs to r10. */
12770 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12771 gen_int_mode (0xba49, HImode));
12772 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12775 /* Jump to the r11 */
12776 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12777 gen_int_mode (0xff49, HImode));
12778 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12779 gen_int_mode (0xe3, QImode));
12781 if (offset > TRAMPOLINE_SIZE)
12785 #ifdef ENABLE_EXECUTE_STACK
12786 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12787 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12791 #define def_builtin(MASK, NAME, TYPE, CODE) \
12793 if ((MASK) & target_flags \
12794 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12795 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12796 NULL, NULL_TREE); \
12799 struct builtin_description
12801 const unsigned int mask;
12802 const enum insn_code icode;
12803 const char *const name;
12804 const enum ix86_builtins code;
12805 const enum rtx_code comparison;
12806 const unsigned int flag;
12809 static const struct builtin_description bdesc_comi[] =
12811 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12812 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12813 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12814 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12815 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12816 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12817 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12818 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12819 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12820 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12821 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12822 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12825 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12826 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12827 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12829 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12832 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12833 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12834 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12837 static const struct builtin_description bdesc_2arg[] =
12840 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12841 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12842 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12843 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12844 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12845 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12847 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12849 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12850 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12851 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12852 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12853 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12854 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12855 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12856 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12857 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12858 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12859 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12860 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12861 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12862 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12863 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12864 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12865 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12866 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12867 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12868 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12870 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12871 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12872 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12873 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12875 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12876 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12877 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12878 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12880 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12881 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12882 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12883 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12884 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12887 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12888 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12889 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12890 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12891 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12892 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12893 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12894 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12896 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12897 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12898 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12899 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12900 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12901 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12902 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12903 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12906 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12907 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12915 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12917 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12918 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12919 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12920 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12921 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12922 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12924 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12925 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12926 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12927 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12929 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12930 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12934 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12937 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12941 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12942 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12943 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12945 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12947 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12948 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12949 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12950 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12952 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12953 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12954 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12955 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12956 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12957 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12959 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12960 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12961 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12962 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12965 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12978 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12979 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12980 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12981 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12982 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12983 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12984 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12985 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12986 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12987 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12988 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12989 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12990 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12991 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12992 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12993 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12994 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12995 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12996 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12998 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13022 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13023 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13024 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13025 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13026 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13027 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13028 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13029 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13074 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13075 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13077 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13079 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13084 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13089 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13091 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13093 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13094 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13099 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13100 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13101 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13102 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13103 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13104 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13107 static const struct builtin_description bdesc_1arg[] =
13109 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13110 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13112 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13113 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13114 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13116 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13117 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13118 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13119 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13120 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13121 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13128 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13134 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13139 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13141 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13142 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13144 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13146 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13147 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13148 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13150 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13153 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13154 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13155 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13159 ix86_init_builtins (void)
13162 ix86_init_mmx_sse_builtins ();
13165 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13166 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13169 ix86_init_mmx_sse_builtins (void)
13171 const struct builtin_description * d;
13174 tree pchar_type_node = build_pointer_type (char_type_node);
13175 tree pcchar_type_node = build_pointer_type (
13176 build_type_variant (char_type_node, 1, 0));
13177 tree pfloat_type_node = build_pointer_type (float_type_node);
13178 tree pcfloat_type_node = build_pointer_type (
13179 build_type_variant (float_type_node, 1, 0));
13180 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13181 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13182 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13185 tree int_ftype_v4sf_v4sf
13186 = build_function_type_list (integer_type_node,
13187 V4SF_type_node, V4SF_type_node, NULL_TREE);
13188 tree v4si_ftype_v4sf_v4sf
13189 = build_function_type_list (V4SI_type_node,
13190 V4SF_type_node, V4SF_type_node, NULL_TREE);
13191 /* MMX/SSE/integer conversions. */
13192 tree int_ftype_v4sf
13193 = build_function_type_list (integer_type_node,
13194 V4SF_type_node, NULL_TREE);
13195 tree int64_ftype_v4sf
13196 = build_function_type_list (long_long_integer_type_node,
13197 V4SF_type_node, NULL_TREE);
13198 tree int_ftype_v8qi
13199 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13200 tree v4sf_ftype_v4sf_int
13201 = build_function_type_list (V4SF_type_node,
13202 V4SF_type_node, integer_type_node, NULL_TREE);
13203 tree v4sf_ftype_v4sf_int64
13204 = build_function_type_list (V4SF_type_node,
13205 V4SF_type_node, long_long_integer_type_node,
13207 tree v4sf_ftype_v4sf_v2si
13208 = build_function_type_list (V4SF_type_node,
13209 V4SF_type_node, V2SI_type_node, NULL_TREE);
13210 tree int_ftype_v4hi_int
13211 = build_function_type_list (integer_type_node,
13212 V4HI_type_node, integer_type_node, NULL_TREE);
13213 tree v4hi_ftype_v4hi_int_int
13214 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13215 integer_type_node, integer_type_node,
13217 /* Miscellaneous. */
13218 tree v8qi_ftype_v4hi_v4hi
13219 = build_function_type_list (V8QI_type_node,
13220 V4HI_type_node, V4HI_type_node, NULL_TREE);
13221 tree v4hi_ftype_v2si_v2si
13222 = build_function_type_list (V4HI_type_node,
13223 V2SI_type_node, V2SI_type_node, NULL_TREE);
13224 tree v4sf_ftype_v4sf_v4sf_int
13225 = build_function_type_list (V4SF_type_node,
13226 V4SF_type_node, V4SF_type_node,
13227 integer_type_node, NULL_TREE);
13228 tree v2si_ftype_v4hi_v4hi
13229 = build_function_type_list (V2SI_type_node,
13230 V4HI_type_node, V4HI_type_node, NULL_TREE);
13231 tree v4hi_ftype_v4hi_int
13232 = build_function_type_list (V4HI_type_node,
13233 V4HI_type_node, integer_type_node, NULL_TREE);
13234 tree v4hi_ftype_v4hi_di
13235 = build_function_type_list (V4HI_type_node,
13236 V4HI_type_node, long_long_unsigned_type_node,
13238 tree v2si_ftype_v2si_di
13239 = build_function_type_list (V2SI_type_node,
13240 V2SI_type_node, long_long_unsigned_type_node,
13242 tree void_ftype_void
13243 = build_function_type (void_type_node, void_list_node);
13244 tree void_ftype_unsigned
13245 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13246 tree void_ftype_unsigned_unsigned
13247 = build_function_type_list (void_type_node, unsigned_type_node,
13248 unsigned_type_node, NULL_TREE);
13249 tree void_ftype_pcvoid_unsigned_unsigned
13250 = build_function_type_list (void_type_node, const_ptr_type_node,
13251 unsigned_type_node, unsigned_type_node,
13253 tree unsigned_ftype_void
13254 = build_function_type (unsigned_type_node, void_list_node);
13256 = build_function_type (long_long_unsigned_type_node, void_list_node);
13257 tree v4sf_ftype_void
13258 = build_function_type (V4SF_type_node, void_list_node);
13259 tree v2si_ftype_v4sf
13260 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13261 /* Loads/stores. */
13262 tree void_ftype_v8qi_v8qi_pchar
13263 = build_function_type_list (void_type_node,
13264 V8QI_type_node, V8QI_type_node,
13265 pchar_type_node, NULL_TREE);
13266 tree v4sf_ftype_pcfloat
13267 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13268 /* @@@ the type is bogus */
13269 tree v4sf_ftype_v4sf_pv2si
13270 = build_function_type_list (V4SF_type_node,
13271 V4SF_type_node, pv2si_type_node, NULL_TREE);
13272 tree void_ftype_pv2si_v4sf
13273 = build_function_type_list (void_type_node,
13274 pv2si_type_node, V4SF_type_node, NULL_TREE);
13275 tree void_ftype_pfloat_v4sf
13276 = build_function_type_list (void_type_node,
13277 pfloat_type_node, V4SF_type_node, NULL_TREE);
13278 tree void_ftype_pdi_di
13279 = build_function_type_list (void_type_node,
13280 pdi_type_node, long_long_unsigned_type_node,
13282 tree void_ftype_pv2di_v2di
13283 = build_function_type_list (void_type_node,
13284 pv2di_type_node, V2DI_type_node, NULL_TREE);
13285 /* Normal vector unops. */
13286 tree v4sf_ftype_v4sf
13287 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13289 /* Normal vector binops. */
13290 tree v4sf_ftype_v4sf_v4sf
13291 = build_function_type_list (V4SF_type_node,
13292 V4SF_type_node, V4SF_type_node, NULL_TREE);
13293 tree v8qi_ftype_v8qi_v8qi
13294 = build_function_type_list (V8QI_type_node,
13295 V8QI_type_node, V8QI_type_node, NULL_TREE);
13296 tree v4hi_ftype_v4hi_v4hi
13297 = build_function_type_list (V4HI_type_node,
13298 V4HI_type_node, V4HI_type_node, NULL_TREE);
13299 tree v2si_ftype_v2si_v2si
13300 = build_function_type_list (V2SI_type_node,
13301 V2SI_type_node, V2SI_type_node, NULL_TREE);
13302 tree di_ftype_di_di
13303 = build_function_type_list (long_long_unsigned_type_node,
13304 long_long_unsigned_type_node,
13305 long_long_unsigned_type_node, NULL_TREE);
13307 tree v2si_ftype_v2sf
13308 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13309 tree v2sf_ftype_v2si
13310 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13311 tree v2si_ftype_v2si
13312 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13313 tree v2sf_ftype_v2sf
13314 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13315 tree v2sf_ftype_v2sf_v2sf
13316 = build_function_type_list (V2SF_type_node,
13317 V2SF_type_node, V2SF_type_node, NULL_TREE);
13318 tree v2si_ftype_v2sf_v2sf
13319 = build_function_type_list (V2SI_type_node,
13320 V2SF_type_node, V2SF_type_node, NULL_TREE);
13321 tree pint_type_node = build_pointer_type (integer_type_node);
13322 tree pcint_type_node = build_pointer_type (
13323 build_type_variant (integer_type_node, 1, 0));
13324 tree pdouble_type_node = build_pointer_type (double_type_node);
13325 tree pcdouble_type_node = build_pointer_type (
13326 build_type_variant (double_type_node, 1, 0));
13327 tree int_ftype_v2df_v2df
13328 = build_function_type_list (integer_type_node,
13329 V2DF_type_node, V2DF_type_node, NULL_TREE);
13332 = build_function_type (intTI_type_node, void_list_node);
13333 tree v2di_ftype_void
13334 = build_function_type (V2DI_type_node, void_list_node);
13335 tree ti_ftype_ti_ti
13336 = build_function_type_list (intTI_type_node,
13337 intTI_type_node, intTI_type_node, NULL_TREE);
13338 tree void_ftype_pcvoid
13339 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13341 = build_function_type_list (V2DI_type_node,
13342 long_long_unsigned_type_node, NULL_TREE);
13344 = build_function_type_list (long_long_unsigned_type_node,
13345 V2DI_type_node, NULL_TREE);
13346 tree v4sf_ftype_v4si
13347 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13348 tree v4si_ftype_v4sf
13349 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13350 tree v2df_ftype_v4si
13351 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13352 tree v4si_ftype_v2df
13353 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13354 tree v2si_ftype_v2df
13355 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13356 tree v4sf_ftype_v2df
13357 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13358 tree v2df_ftype_v2si
13359 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13360 tree v2df_ftype_v4sf
13361 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13362 tree int_ftype_v2df
13363 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13364 tree int64_ftype_v2df
13365 = build_function_type_list (long_long_integer_type_node,
13366 V2DF_type_node, NULL_TREE);
13367 tree v2df_ftype_v2df_int
13368 = build_function_type_list (V2DF_type_node,
13369 V2DF_type_node, integer_type_node, NULL_TREE);
13370 tree v2df_ftype_v2df_int64
13371 = build_function_type_list (V2DF_type_node,
13372 V2DF_type_node, long_long_integer_type_node,
13374 tree v4sf_ftype_v4sf_v2df
13375 = build_function_type_list (V4SF_type_node,
13376 V4SF_type_node, V2DF_type_node, NULL_TREE);
13377 tree v2df_ftype_v2df_v4sf
13378 = build_function_type_list (V2DF_type_node,
13379 V2DF_type_node, V4SF_type_node, NULL_TREE);
13380 tree v2df_ftype_v2df_v2df_int
13381 = build_function_type_list (V2DF_type_node,
13382 V2DF_type_node, V2DF_type_node,
13385 tree v2df_ftype_v2df_pv2si
13386 = build_function_type_list (V2DF_type_node,
13387 V2DF_type_node, pv2si_type_node, NULL_TREE);
13388 tree void_ftype_pv2si_v2df
13389 = build_function_type_list (void_type_node,
13390 pv2si_type_node, V2DF_type_node, NULL_TREE);
13391 tree void_ftype_pdouble_v2df
13392 = build_function_type_list (void_type_node,
13393 pdouble_type_node, V2DF_type_node, NULL_TREE);
13394 tree void_ftype_pint_int
13395 = build_function_type_list (void_type_node,
13396 pint_type_node, integer_type_node, NULL_TREE);
13397 tree void_ftype_v16qi_v16qi_pchar
13398 = build_function_type_list (void_type_node,
13399 V16QI_type_node, V16QI_type_node,
13400 pchar_type_node, NULL_TREE);
13401 tree v2df_ftype_pcdouble
13402 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13403 tree v2df_ftype_v2df_v2df
13404 = build_function_type_list (V2DF_type_node,
13405 V2DF_type_node, V2DF_type_node, NULL_TREE);
13406 tree v16qi_ftype_v16qi_v16qi
13407 = build_function_type_list (V16QI_type_node,
13408 V16QI_type_node, V16QI_type_node, NULL_TREE);
13409 tree v8hi_ftype_v8hi_v8hi
13410 = build_function_type_list (V8HI_type_node,
13411 V8HI_type_node, V8HI_type_node, NULL_TREE);
13412 tree v4si_ftype_v4si_v4si
13413 = build_function_type_list (V4SI_type_node,
13414 V4SI_type_node, V4SI_type_node, NULL_TREE);
13415 tree v2di_ftype_v2di_v2di
13416 = build_function_type_list (V2DI_type_node,
13417 V2DI_type_node, V2DI_type_node, NULL_TREE);
13418 tree v2di_ftype_v2df_v2df
13419 = build_function_type_list (V2DI_type_node,
13420 V2DF_type_node, V2DF_type_node, NULL_TREE);
13421 tree v2df_ftype_v2df
13422 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13423 tree v2df_ftype_double
13424 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13425 tree v2df_ftype_double_double
13426 = build_function_type_list (V2DF_type_node,
13427 double_type_node, double_type_node, NULL_TREE);
13428 tree int_ftype_v8hi_int
13429 = build_function_type_list (integer_type_node,
13430 V8HI_type_node, integer_type_node, NULL_TREE);
13431 tree v8hi_ftype_v8hi_int_int
13432 = build_function_type_list (V8HI_type_node,
13433 V8HI_type_node, integer_type_node,
13434 integer_type_node, NULL_TREE);
13435 tree v2di_ftype_v2di_int
13436 = build_function_type_list (V2DI_type_node,
13437 V2DI_type_node, integer_type_node, NULL_TREE);
13438 tree v4si_ftype_v4si_int
13439 = build_function_type_list (V4SI_type_node,
13440 V4SI_type_node, integer_type_node, NULL_TREE);
13441 tree v8hi_ftype_v8hi_int
13442 = build_function_type_list (V8HI_type_node,
13443 V8HI_type_node, integer_type_node, NULL_TREE);
13444 tree v8hi_ftype_v8hi_v2di
13445 = build_function_type_list (V8HI_type_node,
13446 V8HI_type_node, V2DI_type_node, NULL_TREE);
13447 tree v4si_ftype_v4si_v2di
13448 = build_function_type_list (V4SI_type_node,
13449 V4SI_type_node, V2DI_type_node, NULL_TREE);
13450 tree v4si_ftype_v8hi_v8hi
13451 = build_function_type_list (V4SI_type_node,
13452 V8HI_type_node, V8HI_type_node, NULL_TREE);
13453 tree di_ftype_v8qi_v8qi
13454 = build_function_type_list (long_long_unsigned_type_node,
13455 V8QI_type_node, V8QI_type_node, NULL_TREE);
13456 tree v2di_ftype_v16qi_v16qi
13457 = build_function_type_list (V2DI_type_node,
13458 V16QI_type_node, V16QI_type_node, NULL_TREE);
13459 tree int_ftype_v16qi
13460 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13461 tree v16qi_ftype_pcchar
13462 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13463 tree void_ftype_pchar_v16qi
13464 = build_function_type_list (void_type_node,
13465 pchar_type_node, V16QI_type_node, NULL_TREE);
13466 tree v4si_ftype_pcint
13467 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13468 tree void_ftype_pcint_v4si
13469 = build_function_type_list (void_type_node,
13470 pcint_type_node, V4SI_type_node, NULL_TREE);
13471 tree v2di_ftype_v2di
13472 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13475 tree float128_type;
13477 /* The __float80 type. */
13478 if (TYPE_MODE (long_double_type_node) == XFmode)
13479 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13483 /* The __float80 type. */
13484 float80_type = make_node (REAL_TYPE);
13485 TYPE_PRECISION (float80_type) = 96;
13486 layout_type (float80_type);
13487 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13490 float128_type = make_node (REAL_TYPE);
13491 TYPE_PRECISION (float128_type) = 128;
13492 layout_type (float128_type);
13493 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13495 /* Add all builtins that are more or less simple operations on two
13497 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13499 /* Use one of the operands; the target can have a different mode for
13500 mask-generating compares. */
13501 enum machine_mode mode;
13506 mode = insn_data[d->icode].operand[1].mode;
13511 type = v16qi_ftype_v16qi_v16qi;
13514 type = v8hi_ftype_v8hi_v8hi;
13517 type = v4si_ftype_v4si_v4si;
13520 type = v2di_ftype_v2di_v2di;
13523 type = v2df_ftype_v2df_v2df;
13526 type = ti_ftype_ti_ti;
13529 type = v4sf_ftype_v4sf_v4sf;
13532 type = v8qi_ftype_v8qi_v8qi;
13535 type = v4hi_ftype_v4hi_v4hi;
13538 type = v2si_ftype_v2si_v2si;
13541 type = di_ftype_di_di;
13548 /* Override for comparisons. */
13549 if (d->icode == CODE_FOR_maskcmpv4sf3
13550 || d->icode == CODE_FOR_maskncmpv4sf3
13551 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13552 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13553 type = v4si_ftype_v4sf_v4sf;
13555 if (d->icode == CODE_FOR_maskcmpv2df3
13556 || d->icode == CODE_FOR_maskncmpv2df3
13557 || d->icode == CODE_FOR_vmmaskcmpv2df3
13558 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13559 type = v2di_ftype_v2df_v2df;
13561 def_builtin (d->mask, d->name, type, d->code);
13564 /* Add the remaining MMX insns with somewhat more complicated types. */
13565 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13566 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13567 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13568 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13569 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13571 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13572 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13573 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13575 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13576 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13578 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13579 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13581 /* comi/ucomi insns. */
13582 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13583 if (d->mask == MASK_SSE2)
13584 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13586 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13588 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13589 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13590 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13592 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13593 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13594 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13596 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13597 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13598 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13599 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13600 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13601 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13602 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13604 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13605 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13607 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13609 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13611 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13612 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13613 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13614 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13616 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13617 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13618 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13619 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13621 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13622 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13623 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13624 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13626 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13628 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13630 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13631 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13632 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13633 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13634 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13635 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13637 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13639 /* Original 3DNow! */
13640 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13641 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13642 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13643 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13644 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13645 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13647 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13648 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13649 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13650 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13651 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13652 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13653 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13654 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13655 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13656 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13657 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13658 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13659 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13661 /* 3DNow! extension as used in the Athlon CPU. */
13662 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13663 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13664 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13665 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13666 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13667 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13669 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13720 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13721 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13728 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13734 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13752 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13756 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13758 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13759 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13765 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13766 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13767 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13770 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13771 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13772 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13775 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13776 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13778 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13780 /* Prescott New Instructions. */
13781 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13782 void_ftype_pcvoid_unsigned_unsigned,
13783 IX86_BUILTIN_MONITOR);
13784 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13785 void_ftype_unsigned_unsigned,
13786 IX86_BUILTIN_MWAIT);
13787 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13789 IX86_BUILTIN_MOVSHDUP);
13790 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13792 IX86_BUILTIN_MOVSLDUP);
13793 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13794 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13795 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13796 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13797 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13798 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13801 /* Errors in the source file can cause expand_expr to return const0_rtx
13802 where we expect a vector. To avoid crashing, use one of the vector
13803 clear instructions. */
13805 safe_vector_operand (rtx x, enum machine_mode mode)
13807 if (x != const0_rtx)
13809 x = gen_reg_rtx (mode);
13811 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13812 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13813 : gen_rtx_SUBREG (DImode, x, 0)));
13815 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13816 : gen_rtx_SUBREG (V4SFmode, x, 0),
13817 CONST0_RTX (V4SFmode)));
13821 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13824 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13827 tree arg0 = TREE_VALUE (arglist);
13828 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13829 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13830 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13831 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13832 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13833 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13835 if (VECTOR_MODE_P (mode0))
13836 op0 = safe_vector_operand (op0, mode0);
13837 if (VECTOR_MODE_P (mode1))
13838 op1 = safe_vector_operand (op1, mode1);
13841 || GET_MODE (target) != tmode
13842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13843 target = gen_reg_rtx (tmode);
13845 if (GET_MODE (op1) == SImode && mode1 == TImode)
13847 rtx x = gen_reg_rtx (V4SImode);
13848 emit_insn (gen_sse2_loadd (x, op1));
13849 op1 = gen_lowpart (TImode, x);
13852 /* In case the insn wants input operands in modes different from
13853 the result, abort. */
13854 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13855 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13858 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13859 op0 = copy_to_mode_reg (mode0, op0);
13860 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13861 op1 = copy_to_mode_reg (mode1, op1);
13863 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13864 yet one of the two must not be a memory. This is normally enforced
13865 by expanders, but we didn't bother to create one here. */
13866 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13867 op0 = copy_to_mode_reg (mode0, op0);
13869 pat = GEN_FCN (icode) (target, op0, op1);
13876 /* Subroutine of ix86_expand_builtin to take care of stores. */
13879 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13882 tree arg0 = TREE_VALUE (arglist);
13883 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13884 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13885 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13886 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13887 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13889 if (VECTOR_MODE_P (mode1))
13890 op1 = safe_vector_operand (op1, mode1);
13892 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13893 op1 = copy_to_mode_reg (mode1, op1);
13895 pat = GEN_FCN (icode) (op0, op1);
13901 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13904 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13905 rtx target, int do_load)
13908 tree arg0 = TREE_VALUE (arglist);
13909 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13910 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13911 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13914 || GET_MODE (target) != tmode
13915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13916 target = gen_reg_rtx (tmode);
13918 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13921 if (VECTOR_MODE_P (mode0))
13922 op0 = safe_vector_operand (op0, mode0);
13924 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13925 op0 = copy_to_mode_reg (mode0, op0);
13928 pat = GEN_FCN (icode) (target, op0);
13935 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13936 sqrtss, rsqrtss, rcpss. */
13939 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13942 tree arg0 = TREE_VALUE (arglist);
13943 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13944 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13945 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13948 || GET_MODE (target) != tmode
13949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13950 target = gen_reg_rtx (tmode);
13952 if (VECTOR_MODE_P (mode0))
13953 op0 = safe_vector_operand (op0, mode0);
13955 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13956 op0 = copy_to_mode_reg (mode0, op0);
13959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13960 op1 = copy_to_mode_reg (mode0, op1);
13962 pat = GEN_FCN (icode) (target, op0, op1);
13969 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13972 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13976 tree arg0 = TREE_VALUE (arglist);
13977 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13978 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13979 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13981 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13982 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13983 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13984 enum rtx_code comparison = d->comparison;
13986 if (VECTOR_MODE_P (mode0))
13987 op0 = safe_vector_operand (op0, mode0);
13988 if (VECTOR_MODE_P (mode1))
13989 op1 = safe_vector_operand (op1, mode1);
13991 /* Swap operands if we have a comparison that isn't available in
13995 rtx tmp = gen_reg_rtx (mode1);
13996 emit_move_insn (tmp, op1);
14002 || GET_MODE (target) != tmode
14003 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14004 target = gen_reg_rtx (tmode);
14006 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14007 op0 = copy_to_mode_reg (mode0, op0);
14008 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14009 op1 = copy_to_mode_reg (mode1, op1);
14011 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14012 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14019 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14022 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14026 tree arg0 = TREE_VALUE (arglist);
14027 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14028 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14029 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14031 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14032 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14033 enum rtx_code comparison = d->comparison;
14035 if (VECTOR_MODE_P (mode0))
14036 op0 = safe_vector_operand (op0, mode0);
14037 if (VECTOR_MODE_P (mode1))
14038 op1 = safe_vector_operand (op1, mode1);
14040 /* Swap operands if we have a comparison that isn't available in
14049 target = gen_reg_rtx (SImode);
14050 emit_move_insn (target, const0_rtx);
14051 target = gen_rtx_SUBREG (QImode, target, 0);
14053 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14054 op0 = copy_to_mode_reg (mode0, op0);
14055 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14056 op1 = copy_to_mode_reg (mode1, op1);
14058 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14059 pat = GEN_FCN (d->icode) (op0, op1);
14063 emit_insn (gen_rtx_SET (VOIDmode,
14064 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14065 gen_rtx_fmt_ee (comparison, QImode,
14069 return SUBREG_REG (target);
14072 /* Expand an expression EXP that calls a built-in function,
14073 with result going to TARGET if that's convenient
14074 (and in mode MODE if that's convenient).
14075 SUBTARGET may be used as the target for computing one of EXP's operands.
14076 IGNORE is nonzero if the value is to be ignored. */
14079 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14080 enum machine_mode mode ATTRIBUTE_UNUSED,
14081 int ignore ATTRIBUTE_UNUSED)
14083 const struct builtin_description *d;
14085 enum insn_code icode;
14086 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14087 tree arglist = TREE_OPERAND (exp, 1);
14088 tree arg0, arg1, arg2;
14089 rtx op0, op1, op2, pat;
14090 enum machine_mode tmode, mode0, mode1, mode2;
14091 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14095 case IX86_BUILTIN_EMMS:
14096 emit_insn (gen_emms ());
14099 case IX86_BUILTIN_SFENCE:
14100 emit_insn (gen_sfence ());
14103 case IX86_BUILTIN_PEXTRW:
14104 case IX86_BUILTIN_PEXTRW128:
14105 icode = (fcode == IX86_BUILTIN_PEXTRW
14106 ? CODE_FOR_mmx_pextrw
14107 : CODE_FOR_sse2_pextrw);
14108 arg0 = TREE_VALUE (arglist);
14109 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14110 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14111 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14112 tmode = insn_data[icode].operand[0].mode;
14113 mode0 = insn_data[icode].operand[1].mode;
14114 mode1 = insn_data[icode].operand[2].mode;
14116 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14117 op0 = copy_to_mode_reg (mode0, op0);
14118 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14120 error ("selector must be an integer constant in the range 0..%i",
14121 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14122 return gen_reg_rtx (tmode);
14125 || GET_MODE (target) != tmode
14126 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14127 target = gen_reg_rtx (tmode);
14128 pat = GEN_FCN (icode) (target, op0, op1);
14134 case IX86_BUILTIN_PINSRW:
14135 case IX86_BUILTIN_PINSRW128:
14136 icode = (fcode == IX86_BUILTIN_PINSRW
14137 ? CODE_FOR_mmx_pinsrw
14138 : CODE_FOR_sse2_pinsrw);
14139 arg0 = TREE_VALUE (arglist);
14140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14141 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14142 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14143 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14144 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14145 tmode = insn_data[icode].operand[0].mode;
14146 mode0 = insn_data[icode].operand[1].mode;
14147 mode1 = insn_data[icode].operand[2].mode;
14148 mode2 = insn_data[icode].operand[3].mode;
14150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14151 op0 = copy_to_mode_reg (mode0, op0);
14152 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14153 op1 = copy_to_mode_reg (mode1, op1);
14154 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14156 error ("selector must be an integer constant in the range 0..%i",
14157 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14161 || GET_MODE (target) != tmode
14162 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14163 target = gen_reg_rtx (tmode);
14164 pat = GEN_FCN (icode) (target, op0, op1, op2);
14170 case IX86_BUILTIN_MASKMOVQ:
14171 case IX86_BUILTIN_MASKMOVDQU:
14172 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14173 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14174 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14175 : CODE_FOR_sse2_maskmovdqu));
14176 /* Note the arg order is different from the operand order. */
14177 arg1 = TREE_VALUE (arglist);
14178 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14179 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14180 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14181 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14182 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14183 mode0 = insn_data[icode].operand[0].mode;
14184 mode1 = insn_data[icode].operand[1].mode;
14185 mode2 = insn_data[icode].operand[2].mode;
14187 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14188 op0 = copy_to_mode_reg (mode0, op0);
14189 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14190 op1 = copy_to_mode_reg (mode1, op1);
14191 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14192 op2 = copy_to_mode_reg (mode2, op2);
14193 pat = GEN_FCN (icode) (op0, op1, op2);
14199 case IX86_BUILTIN_SQRTSS:
14200 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14201 case IX86_BUILTIN_RSQRTSS:
14202 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14203 case IX86_BUILTIN_RCPSS:
14204 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14206 case IX86_BUILTIN_LOADAPS:
14207 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14209 case IX86_BUILTIN_LOADUPS:
14210 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14212 case IX86_BUILTIN_STOREAPS:
14213 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14215 case IX86_BUILTIN_STOREUPS:
14216 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14218 case IX86_BUILTIN_LOADSS:
14219 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14221 case IX86_BUILTIN_STORESS:
14222 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14224 case IX86_BUILTIN_LOADHPS:
14225 case IX86_BUILTIN_LOADLPS:
14226 case IX86_BUILTIN_LOADHPD:
14227 case IX86_BUILTIN_LOADLPD:
14228 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14229 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14230 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14231 : CODE_FOR_sse2_movsd);
14232 arg0 = TREE_VALUE (arglist);
14233 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14234 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14235 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14236 tmode = insn_data[icode].operand[0].mode;
14237 mode0 = insn_data[icode].operand[1].mode;
14238 mode1 = insn_data[icode].operand[2].mode;
14240 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14241 op0 = copy_to_mode_reg (mode0, op0);
14242 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14244 || GET_MODE (target) != tmode
14245 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14246 target = gen_reg_rtx (tmode);
14247 pat = GEN_FCN (icode) (target, op0, op1);
14253 case IX86_BUILTIN_STOREHPS:
14254 case IX86_BUILTIN_STORELPS:
14255 case IX86_BUILTIN_STOREHPD:
14256 case IX86_BUILTIN_STORELPD:
14257 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14258 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14259 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14260 : CODE_FOR_sse2_movsd);
14261 arg0 = TREE_VALUE (arglist);
14262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14263 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14264 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14265 mode0 = insn_data[icode].operand[1].mode;
14266 mode1 = insn_data[icode].operand[2].mode;
14268 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14269 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14270 op1 = copy_to_mode_reg (mode1, op1);
14272 pat = GEN_FCN (icode) (op0, op0, op1);
14278 case IX86_BUILTIN_MOVNTPS:
14279 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14280 case IX86_BUILTIN_MOVNTQ:
14281 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14283 case IX86_BUILTIN_LDMXCSR:
14284 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14285 target = assign_386_stack_local (SImode, 0);
14286 emit_move_insn (target, op0);
14287 emit_insn (gen_ldmxcsr (target));
14290 case IX86_BUILTIN_STMXCSR:
14291 target = assign_386_stack_local (SImode, 0);
14292 emit_insn (gen_stmxcsr (target));
14293 return copy_to_mode_reg (SImode, target);
14295 case IX86_BUILTIN_SHUFPS:
14296 case IX86_BUILTIN_SHUFPD:
14297 icode = (fcode == IX86_BUILTIN_SHUFPS
14298 ? CODE_FOR_sse_shufps
14299 : CODE_FOR_sse2_shufpd);
14300 arg0 = TREE_VALUE (arglist);
14301 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14302 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14303 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14304 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14305 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14306 tmode = insn_data[icode].operand[0].mode;
14307 mode0 = insn_data[icode].operand[1].mode;
14308 mode1 = insn_data[icode].operand[2].mode;
14309 mode2 = insn_data[icode].operand[3].mode;
14311 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14312 op0 = copy_to_mode_reg (mode0, op0);
14313 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14314 op1 = copy_to_mode_reg (mode1, op1);
14315 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14317 /* @@@ better error message */
14318 error ("mask must be an immediate");
14319 return gen_reg_rtx (tmode);
14322 || GET_MODE (target) != tmode
14323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14324 target = gen_reg_rtx (tmode);
14325 pat = GEN_FCN (icode) (target, op0, op1, op2);
14331 case IX86_BUILTIN_PSHUFW:
14332 case IX86_BUILTIN_PSHUFD:
14333 case IX86_BUILTIN_PSHUFHW:
14334 case IX86_BUILTIN_PSHUFLW:
14335 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14336 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14337 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14338 : CODE_FOR_mmx_pshufw);
14339 arg0 = TREE_VALUE (arglist);
14340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14341 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14342 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14343 tmode = insn_data[icode].operand[0].mode;
14344 mode1 = insn_data[icode].operand[1].mode;
14345 mode2 = insn_data[icode].operand[2].mode;
14347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14348 op0 = copy_to_mode_reg (mode1, op0);
14349 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14351 /* @@@ better error message */
14352 error ("mask must be an immediate");
14356 || GET_MODE (target) != tmode
14357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14358 target = gen_reg_rtx (tmode);
14359 pat = GEN_FCN (icode) (target, op0, op1);
14365 case IX86_BUILTIN_PSLLDQI128:
14366 case IX86_BUILTIN_PSRLDQI128:
14367 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14368 : CODE_FOR_sse2_lshrti3);
14369 arg0 = TREE_VALUE (arglist);
14370 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14371 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14372 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14373 tmode = insn_data[icode].operand[0].mode;
14374 mode1 = insn_data[icode].operand[1].mode;
14375 mode2 = insn_data[icode].operand[2].mode;
14377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14379 op0 = copy_to_reg (op0);
14380 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14382 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14384 error ("shift must be an immediate");
14387 target = gen_reg_rtx (V2DImode);
14388 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14394 case IX86_BUILTIN_FEMMS:
14395 emit_insn (gen_femms ());
14398 case IX86_BUILTIN_PAVGUSB:
14399 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14401 case IX86_BUILTIN_PF2ID:
14402 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14404 case IX86_BUILTIN_PFACC:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14407 case IX86_BUILTIN_PFADD:
14408 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14410 case IX86_BUILTIN_PFCMPEQ:
14411 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14413 case IX86_BUILTIN_PFCMPGE:
14414 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14416 case IX86_BUILTIN_PFCMPGT:
14417 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14419 case IX86_BUILTIN_PFMAX:
14420 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14422 case IX86_BUILTIN_PFMIN:
14423 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14425 case IX86_BUILTIN_PFMUL:
14426 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14428 case IX86_BUILTIN_PFRCP:
14429 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14431 case IX86_BUILTIN_PFRCPIT1:
14432 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14434 case IX86_BUILTIN_PFRCPIT2:
14435 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14437 case IX86_BUILTIN_PFRSQIT1:
14438 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14440 case IX86_BUILTIN_PFRSQRT:
14441 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14443 case IX86_BUILTIN_PFSUB:
14444 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14446 case IX86_BUILTIN_PFSUBR:
14447 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14449 case IX86_BUILTIN_PI2FD:
14450 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14452 case IX86_BUILTIN_PMULHRW:
14453 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14455 case IX86_BUILTIN_PF2IW:
14456 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14458 case IX86_BUILTIN_PFNACC:
14459 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14461 case IX86_BUILTIN_PFPNACC:
14462 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14464 case IX86_BUILTIN_PI2FW:
14465 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14467 case IX86_BUILTIN_PSWAPDSI:
14468 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14470 case IX86_BUILTIN_PSWAPDSF:
14471 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14473 case IX86_BUILTIN_SSE_ZERO:
14474 target = gen_reg_rtx (V4SFmode);
14475 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14478 case IX86_BUILTIN_MMX_ZERO:
14479 target = gen_reg_rtx (DImode);
14480 emit_insn (gen_mmx_clrdi (target));
14483 case IX86_BUILTIN_CLRTI:
14484 target = gen_reg_rtx (V2DImode);
14485 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14489 case IX86_BUILTIN_SQRTSD:
14490 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14491 case IX86_BUILTIN_LOADAPD:
14492 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14493 case IX86_BUILTIN_LOADUPD:
14494 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14496 case IX86_BUILTIN_STOREAPD:
14497 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14498 case IX86_BUILTIN_STOREUPD:
14499 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14501 case IX86_BUILTIN_LOADSD:
14502 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14504 case IX86_BUILTIN_STORESD:
14505 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14507 case IX86_BUILTIN_SETPD1:
14508 target = assign_386_stack_local (DFmode, 0);
14509 arg0 = TREE_VALUE (arglist);
14510 emit_move_insn (adjust_address (target, DFmode, 0),
14511 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14512 op0 = gen_reg_rtx (V2DFmode);
14513 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14514 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14517 case IX86_BUILTIN_SETPD:
14518 target = assign_386_stack_local (V2DFmode, 0);
14519 arg0 = TREE_VALUE (arglist);
14520 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14521 emit_move_insn (adjust_address (target, DFmode, 0),
14522 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14523 emit_move_insn (adjust_address (target, DFmode, 8),
14524 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14525 op0 = gen_reg_rtx (V2DFmode);
14526 emit_insn (gen_sse2_movapd (op0, target));
14529 case IX86_BUILTIN_LOADRPD:
14530 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14531 gen_reg_rtx (V2DFmode), 1);
14532 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14535 case IX86_BUILTIN_LOADPD1:
14536 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14537 gen_reg_rtx (V2DFmode), 1);
14538 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14541 case IX86_BUILTIN_STOREPD1:
14542 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14543 case IX86_BUILTIN_STORERPD:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14546 case IX86_BUILTIN_CLRPD:
14547 target = gen_reg_rtx (V2DFmode);
14548 emit_insn (gen_sse_clrv2df (target));
14551 case IX86_BUILTIN_MFENCE:
14552 emit_insn (gen_sse2_mfence ());
14554 case IX86_BUILTIN_LFENCE:
14555 emit_insn (gen_sse2_lfence ());
14558 case IX86_BUILTIN_CLFLUSH:
14559 arg0 = TREE_VALUE (arglist);
14560 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14561 icode = CODE_FOR_sse2_clflush;
14562 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14563 op0 = copy_to_mode_reg (Pmode, op0);
14565 emit_insn (gen_sse2_clflush (op0));
14568 case IX86_BUILTIN_MOVNTPD:
14569 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14570 case IX86_BUILTIN_MOVNTDQ:
14571 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14572 case IX86_BUILTIN_MOVNTI:
14573 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14575 case IX86_BUILTIN_LOADDQA:
14576 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14577 case IX86_BUILTIN_LOADDQU:
14578 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14579 case IX86_BUILTIN_LOADD:
14580 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14582 case IX86_BUILTIN_STOREDQA:
14583 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14584 case IX86_BUILTIN_STOREDQU:
14585 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14586 case IX86_BUILTIN_STORED:
14587 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14589 case IX86_BUILTIN_MONITOR:
14590 arg0 = TREE_VALUE (arglist);
14591 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14592 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14593 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14594 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14595 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14597 op0 = copy_to_mode_reg (SImode, op0);
14599 op1 = copy_to_mode_reg (SImode, op1);
14601 op2 = copy_to_mode_reg (SImode, op2);
14602 emit_insn (gen_monitor (op0, op1, op2));
14605 case IX86_BUILTIN_MWAIT:
14606 arg0 = TREE_VALUE (arglist);
14607 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14608 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14609 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14611 op0 = copy_to_mode_reg (SImode, op0);
14613 op1 = copy_to_mode_reg (SImode, op1);
14614 emit_insn (gen_mwait (op0, op1));
14617 case IX86_BUILTIN_LOADDDUP:
14618 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14620 case IX86_BUILTIN_LDDQU:
14621 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14628 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14629 if (d->code == fcode)
14631 /* Compares are treated specially. */
14632 if (d->icode == CODE_FOR_maskcmpv4sf3
14633 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14634 || d->icode == CODE_FOR_maskncmpv4sf3
14635 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14636 || d->icode == CODE_FOR_maskcmpv2df3
14637 || d->icode == CODE_FOR_vmmaskcmpv2df3
14638 || d->icode == CODE_FOR_maskncmpv2df3
14639 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14640 return ix86_expand_sse_compare (d, arglist, target);
14642 return ix86_expand_binop_builtin (d->icode, arglist, target);
14645 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14646 if (d->code == fcode)
14647 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14649 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14650 if (d->code == fcode)
14651 return ix86_expand_sse_comi (d, arglist, target);
14653 /* @@@ Should really do something sensible here. */
14657 /* Store OPERAND to the memory after reload is completed. This means
14658 that we can't easily use assign_stack_local. */
14660 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14663 if (!reload_completed)
14665 if (TARGET_RED_ZONE)
14667 result = gen_rtx_MEM (mode,
14668 gen_rtx_PLUS (Pmode,
14670 GEN_INT (-RED_ZONE_SIZE)));
14671 emit_move_insn (result, operand);
14673 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14679 operand = gen_lowpart (DImode, operand);
14683 gen_rtx_SET (VOIDmode,
14684 gen_rtx_MEM (DImode,
14685 gen_rtx_PRE_DEC (DImode,
14686 stack_pointer_rtx)),
14692 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14701 split_di (&operand, 1, operands, operands + 1);
14703 gen_rtx_SET (VOIDmode,
14704 gen_rtx_MEM (SImode,
14705 gen_rtx_PRE_DEC (Pmode,
14706 stack_pointer_rtx)),
14709 gen_rtx_SET (VOIDmode,
14710 gen_rtx_MEM (SImode,
14711 gen_rtx_PRE_DEC (Pmode,
14712 stack_pointer_rtx)),
14717 /* It is better to store HImodes as SImodes. */
14718 if (!TARGET_PARTIAL_REG_STALL)
14719 operand = gen_lowpart (SImode, operand);
14723 gen_rtx_SET (VOIDmode,
14724 gen_rtx_MEM (GET_MODE (operand),
14725 gen_rtx_PRE_DEC (SImode,
14726 stack_pointer_rtx)),
14732 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14737 /* Free operand from the memory. */
14739 ix86_free_from_memory (enum machine_mode mode)
14741 if (!TARGET_RED_ZONE)
14745 if (mode == DImode || TARGET_64BIT)
14747 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14751 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14752 to pop or add instruction if registers are available. */
14753 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14754 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14759 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14760 QImode must go into class Q_REGS.
14761 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14762 movdf to do mem-to-mem moves through integer regs. */
14764 ix86_preferred_reload_class (rtx x, enum reg_class class)
14766 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14768 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14770 /* SSE can't load any constant directly yet. */
14771 if (SSE_CLASS_P (class))
14773 /* Floats can load 0 and 1. */
14774 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14776 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14777 if (MAYBE_SSE_CLASS_P (class))
14778 return (reg_class_subset_p (class, GENERAL_REGS)
14779 ? GENERAL_REGS : FLOAT_REGS);
14783 /* General regs can load everything. */
14784 if (reg_class_subset_p (class, GENERAL_REGS))
14785 return GENERAL_REGS;
14786 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14787 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14790 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14792 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14797 /* If we are copying between general and FP registers, we need a memory
14798 location. The same is true for SSE and MMX registers.
14800 The macro can't work reliably when one of the CLASSES is class containing
14801 registers from multiple units (SSE, MMX, integer). We avoid this by never
14802 combining those units in single alternative in the machine description.
14803 Ensure that this constraint holds to avoid unexpected surprises.
14805 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14806 enforce these sanity checks. */
14808 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14809 enum machine_mode mode, int strict)
14811 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14812 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14813 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14814 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14815 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14816 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14823 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14824 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14825 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14826 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14827 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14829 /* Return the cost of moving data from a register in class CLASS1 to
14830 one in class CLASS2.
14832 It is not required that the cost always equal 2 when FROM is the same as TO;
14833 on some machines it is expensive to move between registers if they are not
14834 general registers. */
14836 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14837 enum reg_class class2)
14839 /* In case we require secondary memory, compute cost of the store followed
14840 by load. In order to avoid bad register allocation choices, we need
14841 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14843 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14847 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14848 MEMORY_MOVE_COST (mode, class1, 1));
14849 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14850 MEMORY_MOVE_COST (mode, class2, 1));
14852 /* In case of copying from general_purpose_register we may emit multiple
14853 stores followed by single load causing memory size mismatch stall.
14854 Count this as arbitrarily high cost of 20. */
14855 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14858 /* In the case of FP/MMX moves, the registers actually overlap, and we
14859 have to switch modes in order to treat them differently. */
14860 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14861 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14867 /* Moves between SSE/MMX and integer unit are expensive. */
14868 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14869 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14870 return ix86_cost->mmxsse_to_integer;
14871 if (MAYBE_FLOAT_CLASS_P (class1))
14872 return ix86_cost->fp_move;
14873 if (MAYBE_SSE_CLASS_P (class1))
14874 return ix86_cost->sse_move;
14875 if (MAYBE_MMX_CLASS_P (class1))
14876 return ix86_cost->mmx_move;
14880 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14882 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14884 /* Flags and only flags can only hold CCmode values. */
14885 if (CC_REGNO_P (regno))
14886 return GET_MODE_CLASS (mode) == MODE_CC;
14887 if (GET_MODE_CLASS (mode) == MODE_CC
14888 || GET_MODE_CLASS (mode) == MODE_RANDOM
14889 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14891 if (FP_REGNO_P (regno))
14892 return VALID_FP_MODE_P (mode);
14893 if (SSE_REGNO_P (regno))
14894 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14895 if (MMX_REGNO_P (regno))
14897 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14898 /* We handle both integer and floats in the general purpose registers.
14899 In future we should be able to handle vector modes as well. */
14900 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14902 /* Take care for QImode values - they can be in non-QI regs, but then
14903 they do cause partial register stalls. */
14904 if (regno < 4 || mode != QImode || TARGET_64BIT)
14906 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14909 /* Return the cost of moving data of mode M between a
14910 register and memory. A value of 2 is the default; this cost is
14911 relative to those in `REGISTER_MOVE_COST'.
14913 If moving between registers and memory is more expensive than
14914 between two registers, you should define this macro to express the
14917 Model also increased moving costs of QImode registers in non
14921 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14923 if (FLOAT_CLASS_P (class))
14940 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14942 if (SSE_CLASS_P (class))
14945 switch (GET_MODE_SIZE (mode))
14959 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14961 if (MMX_CLASS_P (class))
14964 switch (GET_MODE_SIZE (mode))
14975 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14977 switch (GET_MODE_SIZE (mode))
14981 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14982 : ix86_cost->movzbl_load);
14984 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14985 : ix86_cost->int_store[0] + 4);
14988 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14990 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14991 if (mode == TFmode)
14993 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14994 * (((int) GET_MODE_SIZE (mode)
14995 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14999 /* Compute a (partial) cost for rtx X. Return true if the complete
15000 cost has been computed, and false if subexpressions should be
15001 scanned. In either case, *TOTAL contains the cost result. */
15004 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15006 enum machine_mode mode = GET_MODE (x);
15014 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15016 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15018 else if (flag_pic && SYMBOLIC_CONST (x)
15020 || (!GET_CODE (x) != LABEL_REF
15021 && (GET_CODE (x) != SYMBOL_REF
15022 || !SYMBOL_REF_LOCAL_P (x)))))
15029 if (mode == VOIDmode)
15032 switch (standard_80387_constant_p (x))
15037 default: /* Other constants */
15042 /* Start with (MEM (SYMBOL_REF)), since that's where
15043 it'll probably end up. Add a penalty for size. */
15044 *total = (COSTS_N_INSNS (1)
15045 + (flag_pic != 0 && !TARGET_64BIT)
15046 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15052 /* The zero extensions is often completely free on x86_64, so make
15053 it as cheap as possible. */
15054 if (TARGET_64BIT && mode == DImode
15055 && GET_MODE (XEXP (x, 0)) == SImode)
15057 else if (TARGET_ZERO_EXTEND_WITH_AND)
15058 *total = COSTS_N_INSNS (ix86_cost->add);
15060 *total = COSTS_N_INSNS (ix86_cost->movzx);
15064 *total = COSTS_N_INSNS (ix86_cost->movsx);
15068 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15069 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15071 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15074 *total = COSTS_N_INSNS (ix86_cost->add);
15077 if ((value == 2 || value == 3)
15078 && !TARGET_DECOMPOSE_LEA
15079 && ix86_cost->lea <= ix86_cost->shift_const)
15081 *total = COSTS_N_INSNS (ix86_cost->lea);
15091 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15093 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15095 if (INTVAL (XEXP (x, 1)) > 32)
15096 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15098 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15102 if (GET_CODE (XEXP (x, 1)) == AND)
15103 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15105 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15110 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15111 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15113 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15118 if (FLOAT_MODE_P (mode))
15119 *total = COSTS_N_INSNS (ix86_cost->fmul);
15120 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15122 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15125 for (nbits = 0; value != 0; value >>= 1)
15128 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15129 + nbits * ix86_cost->mult_bit);
15133 /* This is arbitrary */
15134 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15135 + 7 * ix86_cost->mult_bit);
15143 if (FLOAT_MODE_P (mode))
15144 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15146 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15150 if (FLOAT_MODE_P (mode))
15151 *total = COSTS_N_INSNS (ix86_cost->fadd);
15152 else if (!TARGET_DECOMPOSE_LEA
15153 && GET_MODE_CLASS (mode) == MODE_INT
15154 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15156 if (GET_CODE (XEXP (x, 0)) == PLUS
15157 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15158 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15159 && CONSTANT_P (XEXP (x, 1)))
15161 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15162 if (val == 2 || val == 4 || val == 8)
15164 *total = COSTS_N_INSNS (ix86_cost->lea);
15165 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15166 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15168 *total += rtx_cost (XEXP (x, 1), outer_code);
15172 else if (GET_CODE (XEXP (x, 0)) == MULT
15173 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15175 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15176 if (val == 2 || val == 4 || val == 8)
15178 *total = COSTS_N_INSNS (ix86_cost->lea);
15179 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15180 *total += rtx_cost (XEXP (x, 1), outer_code);
15184 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15186 *total = COSTS_N_INSNS (ix86_cost->lea);
15187 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15188 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15189 *total += rtx_cost (XEXP (x, 1), outer_code);
15196 if (FLOAT_MODE_P (mode))
15198 *total = COSTS_N_INSNS (ix86_cost->fadd);
15206 if (!TARGET_64BIT && mode == DImode)
15208 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15209 + (rtx_cost (XEXP (x, 0), outer_code)
15210 << (GET_MODE (XEXP (x, 0)) != DImode))
15211 + (rtx_cost (XEXP (x, 1), outer_code)
15212 << (GET_MODE (XEXP (x, 1)) != DImode)));
15218 if (FLOAT_MODE_P (mode))
15220 *total = COSTS_N_INSNS (ix86_cost->fchs);
15226 if (!TARGET_64BIT && mode == DImode)
15227 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15229 *total = COSTS_N_INSNS (ix86_cost->add);
15233 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15238 if (FLOAT_MODE_P (mode))
15239 *total = COSTS_N_INSNS (ix86_cost->fabs);
15243 if (FLOAT_MODE_P (mode))
15244 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15248 if (XINT (x, 1) == UNSPEC_TP)
15257 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15259 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15262 fputs ("\tpushl $", asm_out_file);
15263 assemble_name (asm_out_file, XSTR (symbol, 0));
15264 fputc ('\n', asm_out_file);
15270 static int current_machopic_label_num;
15272 /* Given a symbol name and its associated stub, write out the
15273 definition of the stub. */
15276 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15278 unsigned int length;
15279 char *binder_name, *symbol_name, lazy_ptr_name[32];
15280 int label = ++current_machopic_label_num;
15282 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15283 symb = (*targetm.strip_name_encoding) (symb);
15285 length = strlen (stub);
15286 binder_name = alloca (length + 32);
15287 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15289 length = strlen (symb);
15290 symbol_name = alloca (length + 32);
15291 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15293 sprintf (lazy_ptr_name, "L%d$lz", label);
15296 machopic_picsymbol_stub_section ();
15298 machopic_symbol_stub_section ();
15300 fprintf (file, "%s:\n", stub);
15301 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15305 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15306 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15307 fprintf (file, "\tjmp %%edx\n");
15310 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15312 fprintf (file, "%s:\n", binder_name);
15316 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15317 fprintf (file, "\tpushl %%eax\n");
15320 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15322 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15324 machopic_lazy_symbol_ptr_section ();
15325 fprintf (file, "%s:\n", lazy_ptr_name);
15326 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15327 fprintf (file, "\t.long %s\n", binder_name);
15329 #endif /* TARGET_MACHO */
15331 /* Order the registers for register allocator. */
15334 x86_order_regs_for_local_alloc (void)
15339 /* First allocate the local general purpose registers. */
15340 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15341 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15342 reg_alloc_order [pos++] = i;
15344 /* Global general purpose registers. */
15345 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15346 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15347 reg_alloc_order [pos++] = i;
15349 /* x87 registers come first in case we are doing FP math
15351 if (!TARGET_SSE_MATH)
15352 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15353 reg_alloc_order [pos++] = i;
15355 /* SSE registers. */
15356 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15357 reg_alloc_order [pos++] = i;
15358 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15359 reg_alloc_order [pos++] = i;
15361 /* x87 registers. */
15362 if (TARGET_SSE_MATH)
15363 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15364 reg_alloc_order [pos++] = i;
15366 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15367 reg_alloc_order [pos++] = i;
15369 /* Initialize the rest of array as we do not allocate some registers
15371 while (pos < FIRST_PSEUDO_REGISTER)
15372 reg_alloc_order [pos++] = 0;
15375 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15376 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15379 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15380 struct attribute_spec.handler. */
15382 ix86_handle_struct_attribute (tree *node, tree name,
15383 tree args ATTRIBUTE_UNUSED,
15384 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15387 if (DECL_P (*node))
15389 if (TREE_CODE (*node) == TYPE_DECL)
15390 type = &TREE_TYPE (*node);
15395 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15396 || TREE_CODE (*type) == UNION_TYPE)))
15398 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15399 *no_add_attrs = true;
15402 else if ((is_attribute_p ("ms_struct", name)
15403 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15404 || ((is_attribute_p ("gcc_struct", name)
15405 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15407 warning ("`%s' incompatible attribute ignored",
15408 IDENTIFIER_POINTER (name));
15409 *no_add_attrs = true;
15416 ix86_ms_bitfield_layout_p (tree record_type)
15418 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15419 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15420 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15423 /* Returns an expression indicating where the this parameter is
15424 located on entry to the FUNCTION. */
15427 x86_this_parameter (tree function)
15429 tree type = TREE_TYPE (function);
15433 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15434 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15437 if (ix86_function_regparm (type, function) > 0)
15441 parm = TYPE_ARG_TYPES (type);
15442 /* Figure out whether or not the function has a variable number of
15444 for (; parm; parm = TREE_CHAIN (parm))
15445 if (TREE_VALUE (parm) == void_type_node)
15447 /* If not, the this parameter is in the first argument. */
15451 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15453 return gen_rtx_REG (SImode, regno);
15457 if (aggregate_value_p (TREE_TYPE (type), type))
15458 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15460 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15463 /* Determine whether x86_output_mi_thunk can succeed. */
15466 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15467 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15468 HOST_WIDE_INT vcall_offset, tree function)
15470 /* 64-bit can handle anything. */
15474 /* For 32-bit, everything's fine if we have one free register. */
15475 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15478 /* Need a free register for vcall_offset. */
15482 /* Need a free register for GOT references. */
15483 if (flag_pic && !(*targetm.binds_local_p) (function))
15486 /* Otherwise ok. */
15490 /* Output the assembler code for a thunk function. THUNK_DECL is the
15491 declaration for the thunk function itself, FUNCTION is the decl for
15492 the target function. DELTA is an immediate constant offset to be
15493 added to THIS. If VCALL_OFFSET is nonzero, the word at
15494 *(*this + vcall_offset) should be added to THIS. */
15497 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15498 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15499 HOST_WIDE_INT vcall_offset, tree function)
15502 rtx this = x86_this_parameter (function);
15505 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15506 pull it in now and let DELTA benefit. */
15509 else if (vcall_offset)
15511 /* Put the this parameter into %eax. */
15513 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15514 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15517 this_reg = NULL_RTX;
15519 /* Adjust the this parameter by a fixed constant. */
15522 xops[0] = GEN_INT (delta);
15523 xops[1] = this_reg ? this_reg : this;
15526 if (!x86_64_general_operand (xops[0], DImode))
15528 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15530 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15534 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15537 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15540 /* Adjust the this parameter by a value stored in the vtable. */
15544 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15547 int tmp_regno = 2 /* ECX */;
15548 if (lookup_attribute ("fastcall",
15549 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15550 tmp_regno = 0 /* EAX */;
15551 tmp = gen_rtx_REG (SImode, tmp_regno);
15554 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15557 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15559 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15561 /* Adjust the this parameter. */
15562 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15563 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15565 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15566 xops[0] = GEN_INT (vcall_offset);
15568 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15569 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15571 xops[1] = this_reg;
15573 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15575 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15578 /* If necessary, drop THIS back to its stack slot. */
15579 if (this_reg && this_reg != this)
15581 xops[0] = this_reg;
15583 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15586 xops[0] = XEXP (DECL_RTL (function), 0);
15589 if (!flag_pic || (*targetm.binds_local_p) (function))
15590 output_asm_insn ("jmp\t%P0", xops);
15593 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15594 tmp = gen_rtx_CONST (Pmode, tmp);
15595 tmp = gen_rtx_MEM (QImode, tmp);
15597 output_asm_insn ("jmp\t%A0", xops);
15602 if (!flag_pic || (*targetm.binds_local_p) (function))
15603 output_asm_insn ("jmp\t%P0", xops);
15608 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15609 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15610 tmp = gen_rtx_MEM (QImode, tmp);
15612 output_asm_insn ("jmp\t%0", xops);
15615 #endif /* TARGET_MACHO */
15617 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15618 output_set_got (tmp);
15621 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15622 output_asm_insn ("jmp\t{*}%1", xops);
15628 x86_file_start (void)
15630 default_file_start ();
15631 if (X86_FILE_START_VERSION_DIRECTIVE)
15632 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15633 if (X86_FILE_START_FLTUSED)
15634 fputs ("\t.global\t__fltused\n", asm_out_file);
15635 if (ix86_asm_dialect == ASM_INTEL)
15636 fputs ("\t.intel_syntax\n", asm_out_file);
15640 x86_field_alignment (tree field, int computed)
15642 enum machine_mode mode;
15643 tree type = TREE_TYPE (field);
15645 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15647 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15648 ? get_inner_array_type (type) : type);
15649 if (mode == DFmode || mode == DCmode
15650 || GET_MODE_CLASS (mode) == MODE_INT
15651 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15652 return MIN (32, computed);
15656 /* Output assembler code to FILE to increment profiler label # LABELNO
15657 for profiling a function entry. */
15659 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15664 #ifndef NO_PROFILE_COUNTERS
15665 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15667 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15671 #ifndef NO_PROFILE_COUNTERS
15672 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15674 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15678 #ifndef NO_PROFILE_COUNTERS
15679 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15680 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15682 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15686 #ifndef NO_PROFILE_COUNTERS
15687 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15688 PROFILE_COUNT_REGISTER);
15690 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15694 /* We don't have exact information about the insn sizes, but we may assume
15695 quite safely that we are informed about all 1 byte insns and memory
15696 address sizes. This is enough to eliminate unnecessary padding in
15700 min_insn_size (rtx insn)
15704 if (!INSN_P (insn) || !active_insn_p (insn))
15707 /* Discard alignments we've emit and jump instructions. */
15708 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15709 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15711 if (GET_CODE (insn) == JUMP_INSN
15712 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15713 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15716 /* Important case - calls are always 5 bytes.
15717 It is common to have many calls in the row. */
15718 if (GET_CODE (insn) == CALL_INSN
15719 && symbolic_reference_mentioned_p (PATTERN (insn))
15720 && !SIBLING_CALL_P (insn))
15722 if (get_attr_length (insn) <= 1)
15725 /* For normal instructions we may rely on the sizes of addresses
15726 and the presence of symbol to require 4 bytes of encoding.
15727 This is not the case for jumps where references are PC relative. */
15728 if (GET_CODE (insn) != JUMP_INSN)
15730 l = get_attr_length_address (insn);
15731 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15740 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15744 k8_avoid_jump_misspredicts (void)
15746 rtx insn, start = get_insns ();
15747 int nbytes = 0, njumps = 0;
15750 /* Look for all minimal intervals of instructions containing 4 jumps.
15751 The intervals are bounded by START and INSN. NBYTES is the total
15752 size of instructions in the interval including INSN and not including
15753 START. When the NBYTES is smaller than 16 bytes, it is possible
15754 that the end of START and INSN ends up in the same 16byte page.
15756 The smallest offset in the page INSN can start is the case where START
15757 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15758 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15760 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15763 nbytes += min_insn_size (insn);
15765 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15766 INSN_UID (insn), min_insn_size (insn));
15767 if ((GET_CODE (insn) == JUMP_INSN
15768 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15769 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15770 || GET_CODE (insn) == CALL_INSN)
15777 start = NEXT_INSN (start);
15778 if ((GET_CODE (start) == JUMP_INSN
15779 && GET_CODE (PATTERN (start)) != ADDR_VEC
15780 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15781 || GET_CODE (start) == CALL_INSN)
15782 njumps--, isjump = 1;
15785 nbytes -= min_insn_size (start);
15790 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15791 INSN_UID (start), INSN_UID (insn), nbytes);
15793 if (njumps == 3 && isjump && nbytes < 16)
15795 int padsize = 15 - nbytes + min_insn_size (insn);
15798 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15799 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15804 /* Implement machine specific optimizations.
15805 At the moment we implement single transformation: AMD Athlon works faster
15806 when RET is not destination of conditional jump or directly preceded
15807 by other jump instruction. We avoid the penalty by inserting NOP just
15808 before the RET instructions in such cases. */
15814 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15816 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15818 basic_block bb = e->src;
15819 rtx ret = BB_END (bb);
15821 bool replace = false;
15823 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15824 || !maybe_hot_bb_p (bb))
15826 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15827 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15829 if (prev && GET_CODE (prev) == CODE_LABEL)
15832 for (e = bb->pred; e; e = e->pred_next)
15833 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15834 && !(e->flags & EDGE_FALLTHRU))
15839 prev = prev_active_insn (ret);
15841 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15842 || GET_CODE (prev) == CALL_INSN))
15844 /* Empty functions get branch mispredict even when the jump destination
15845 is not visible to us. */
15846 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15851 emit_insn_before (gen_return_internal_long (), ret);
15855 k8_avoid_jump_misspredicts ();
15858 /* Return nonzero when QImode register that must be represented via REX prefix
15861 x86_extended_QIreg_mentioned_p (rtx insn)
15864 extract_insn_cached (insn);
15865 for (i = 0; i < recog_data.n_operands; i++)
15866 if (REG_P (recog_data.operand[i])
15867 && REGNO (recog_data.operand[i]) >= 4)
15872 /* Return nonzero when P points to register encoded via REX prefix.
15873 Called via for_each_rtx. */
15875 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15877 unsigned int regno;
15880 regno = REGNO (*p);
15881 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15884 /* Return true when INSN mentions register that must be encoded using REX
15887 x86_extended_reg_mentioned_p (rtx insn)
15889 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15892 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15893 optabs would emit if we didn't have TFmode patterns. */
15896 x86_emit_floatuns (rtx operands[2])
15898 rtx neglab, donelab, i0, i1, f0, in, out;
15899 enum machine_mode mode, inmode;
15901 inmode = GET_MODE (operands[1]);
15902 if (inmode != SImode
15903 && inmode != DImode)
15907 in = force_reg (inmode, operands[1]);
15908 mode = GET_MODE (out);
15909 neglab = gen_label_rtx ();
15910 donelab = gen_label_rtx ();
15911 i1 = gen_reg_rtx (Pmode);
15912 f0 = gen_reg_rtx (mode);
15914 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15916 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15917 emit_jump_insn (gen_jump (donelab));
15920 emit_label (neglab);
15922 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15923 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15924 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15925 expand_float (f0, i0, 0);
15926 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15928 emit_label (donelab);
15931 /* Return if we do not know how to pass TYPE solely in registers. */
15933 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15935 if (default_must_pass_in_stack (mode, type))
15937 return (!TARGET_64BIT && type && mode == TImode);
15940 /* Initialize vector TARGET via VALS. */
15942 ix86_expand_vector_init (rtx target, rtx vals)
15944 enum machine_mode mode = GET_MODE (target);
15945 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15946 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15949 for (i = n_elts - 1; i >= 0; i--)
15950 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15951 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15954 /* Few special cases first...
15955 ... constants are best loaded from constant pool. */
15958 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15962 /* ... values where only first field is non-constant are best loaded
15963 from the pool and overwriten via move later. */
15966 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15967 GET_MODE_INNER (mode), 0);
15969 op = force_reg (mode, op);
15970 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15971 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15972 switch (GET_MODE (target))
15975 emit_insn (gen_sse2_movsd (target, target, op));
15978 emit_insn (gen_sse_movss (target, target, op));
15986 /* And the busy sequence doing rotations. */
15987 switch (GET_MODE (target))
15992 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15994 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15996 vecop0 = force_reg (V2DFmode, vecop0);
15997 vecop1 = force_reg (V2DFmode, vecop1);
15998 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16004 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16006 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16008 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16010 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16011 rtx tmp1 = gen_reg_rtx (V4SFmode);
16012 rtx tmp2 = gen_reg_rtx (V4SFmode);
16014 vecop0 = force_reg (V4SFmode, vecop0);
16015 vecop1 = force_reg (V4SFmode, vecop1);
16016 vecop2 = force_reg (V4SFmode, vecop2);
16017 vecop3 = force_reg (V4SFmode, vecop3);
16018 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16019 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16020 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16028 #include "gt-i386.h"