1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
575 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
576 integer data in xmm registers. Which results in pretty abysmal code. */
577 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
579 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
580 /* Some CPU cores are not able to predict more than 4 branch instructions in
581 the 16 byte window. */
582 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
583 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
584 const int x86_use_bt = m_ATHLON_K8;
586 /* In case the average insn count for single function invocation is
587 lower than this constant, emit fast (but longer) prologue and
589 #define FAST_PROLOGUE_INSN_COUNT 20
591 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
592 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
593 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
594 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
596 /* Array of the smallest class containing reg number REGNO, indexed by
597 REGNO. Used by REGNO_REG_CLASS in i386.h. */
599 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
602 AREG, DREG, CREG, BREG,
604 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
606 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
607 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
610 /* flags, fpsr, dirflag, frame */
611 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
612 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
614 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
616 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
617 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
622 /* The "default" register map used in 32bit mode. */
624 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
626 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
627 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
629 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
630 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
631 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
632 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
635 static int const x86_64_int_parameter_registers[6] =
637 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
638 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
641 static int const x86_64_int_return_registers[4] =
643 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
646 /* The "default" register map used in 64bit mode. */
647 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
650 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
651 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
653 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
654 8,9,10,11,12,13,14,15, /* extended integer registers */
655 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
658 /* Define the register numbers to be used in Dwarf debugging information.
659 The SVR4 reference port C compiler uses the following register numbers
660 in its Dwarf output code:
661 0 for %eax (gcc regno = 0)
662 1 for %ecx (gcc regno = 2)
663 2 for %edx (gcc regno = 1)
664 3 for %ebx (gcc regno = 3)
665 4 for %esp (gcc regno = 7)
666 5 for %ebp (gcc regno = 6)
667 6 for %esi (gcc regno = 4)
668 7 for %edi (gcc regno = 5)
669 The following three DWARF register numbers are never generated by
670 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
671 believes these numbers have these meanings.
672 8 for %eip (no gcc equivalent)
673 9 for %eflags (gcc regno = 17)
674 10 for %trapno (no gcc equivalent)
675 It is not at all clear how we should number the FP stack registers
676 for the x86 architecture. If the version of SDB on x86/svr4 were
677 a bit less brain dead with respect to floating-point then we would
678 have a precedent to follow with respect to DWARF register numbers
679 for x86 FP registers, but the SDB on x86/svr4 is so completely
680 broken with respect to FP registers that it is hardly worth thinking
681 of it as something to strive for compatibility with.
682 The version of x86/svr4 SDB I have at the moment does (partially)
683 seem to believe that DWARF register number 11 is associated with
684 the x86 register %st(0), but that's about all. Higher DWARF
685 register numbers don't seem to be associated with anything in
686 particular, and even for DWARF regno 11, SDB only seems to under-
687 stand that it should say that a variable lives in %st(0) (when
688 asked via an `=' command) if we said it was in DWARF regno 11,
689 but SDB still prints garbage when asked for the value of the
690 variable in question (via a `/' command).
691 (Also note that the labels SDB prints for various FP stack regs
692 when doing an `x' command are all wrong.)
693 Note that these problems generally don't affect the native SVR4
694 C compiler because it doesn't allow the use of -O with -g and
695 because when it is *not* optimizing, it allocates a memory
696 location for each floating-point variable, and the memory
697 location is what gets described in the DWARF AT_location
698 attribute for the variable in question.
699 Regardless of the severe mental illness of the x86/svr4 SDB, we
700 do something sensible here and we use the following DWARF
701 register numbers. Note that these are all stack-top-relative
703 11 for %st(0) (gcc regno = 8)
704 12 for %st(1) (gcc regno = 9)
705 13 for %st(2) (gcc regno = 10)
706 14 for %st(3) (gcc regno = 11)
707 15 for %st(4) (gcc regno = 12)
708 16 for %st(5) (gcc regno = 13)
709 17 for %st(6) (gcc regno = 14)
710 18 for %st(7) (gcc regno = 15)
712 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
714 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
715 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
716 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
717 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
718 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
719 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
723 /* Test and compare insns in i386.md store the information needed to
724 generate branch and scc insns here. */
726 rtx ix86_compare_op0 = NULL_RTX;
727 rtx ix86_compare_op1 = NULL_RTX;
729 #define MAX_386_STACK_LOCALS 3
730 /* Size of the register save area. */
731 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
733 /* Define the structure for the machine field in struct function. */
735 struct stack_local_entry GTY(())
740 struct stack_local_entry *next;
743 /* Structure describing stack frame layout.
744 Stack grows downward:
750 saved frame pointer if frame_pointer_needed
751 <- HARD_FRAME_POINTER
757 > to_allocate <- FRAME_POINTER
769 int outgoing_arguments_size;
772 HOST_WIDE_INT to_allocate;
773 /* The offsets relative to ARG_POINTER. */
774 HOST_WIDE_INT frame_pointer_offset;
775 HOST_WIDE_INT hard_frame_pointer_offset;
776 HOST_WIDE_INT stack_pointer_offset;
778 /* When save_regs_using_mov is set, emit prologue using
779 move instead of push instructions. */
780 bool save_regs_using_mov;
783 /* Used to enable/disable debugging features. */
784 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
785 /* Code model option as passed by user. */
786 const char *ix86_cmodel_string;
788 enum cmodel ix86_cmodel;
790 const char *ix86_asm_string;
791 enum asm_dialect ix86_asm_dialect = ASM_ATT;
793 const char *ix86_tls_dialect_string;
794 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
796 /* Which unit we are generating floating point math for. */
797 enum fpmath_unit ix86_fpmath;
799 /* Which cpu are we scheduling for. */
800 enum processor_type ix86_tune;
801 /* Which instruction set architecture to use. */
802 enum processor_type ix86_arch;
804 /* Strings to hold which cpu and instruction set architecture to use. */
805 const char *ix86_tune_string; /* for -mtune=<xxx> */
806 const char *ix86_arch_string; /* for -march=<xxx> */
807 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
809 /* # of registers to use to pass arguments. */
810 const char *ix86_regparm_string;
812 /* true if sse prefetch instruction is not NOOP. */
813 int x86_prefetch_sse;
815 /* ix86_regparm_string as a number */
818 /* Alignment to use for loops and jumps: */
820 /* Power of two alignment for loops. */
821 const char *ix86_align_loops_string;
823 /* Power of two alignment for non-loop jumps. */
824 const char *ix86_align_jumps_string;
826 /* Power of two alignment for stack boundary in bytes. */
827 const char *ix86_preferred_stack_boundary_string;
829 /* Preferred alignment for stack boundary in bits. */
830 unsigned int ix86_preferred_stack_boundary;
832 /* Values 1-5: see jump.c */
833 int ix86_branch_cost;
834 const char *ix86_branch_cost_string;
836 /* Power of two alignment for functions. */
837 const char *ix86_align_funcs_string;
839 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
840 char internal_label_prefix[16];
841 int internal_label_prefix_len;
843 static void output_pic_addr_const (FILE *, rtx, int);
844 static void put_condition_code (enum rtx_code, enum machine_mode,
846 static const char *get_some_local_dynamic_name (void);
847 static int get_some_local_dynamic_name_1 (rtx *, void *);
848 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
849 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
851 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
852 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
854 static rtx get_thread_pointer (int);
855 static rtx legitimize_tls_address (rtx, enum tls_model, int);
856 static void get_pc_thunk_name (char [32], unsigned int);
857 static rtx gen_push (rtx);
858 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
859 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
860 static struct machine_function * ix86_init_machine_status (void);
861 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
862 static int ix86_nsaved_regs (void);
863 static void ix86_emit_save_regs (void);
864 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
865 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
866 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
867 static HOST_WIDE_INT ix86_GOT_alias_set (void);
868 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
869 static rtx ix86_expand_aligntest (rtx, int);
870 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
871 static int ix86_issue_rate (void);
872 static int ix86_adjust_cost (rtx, rtx, rtx, int);
873 static int ia32_multipass_dfa_lookahead (void);
874 static void ix86_init_mmx_sse_builtins (void);
875 static rtx x86_this_parameter (tree);
876 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
877 HOST_WIDE_INT, tree);
878 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
879 static void x86_file_start (void);
880 static void ix86_reorg (void);
881 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
882 static tree ix86_build_builtin_va_list (void);
883 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
885 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
886 static bool ix86_vector_mode_supported_p (enum machine_mode);
888 static int ix86_address_cost (rtx);
889 static bool ix86_cannot_force_const_mem (rtx);
890 static rtx ix86_delegitimize_address (rtx);
892 struct builtin_description;
893 static rtx ix86_expand_sse_comi (const struct builtin_description *,
895 static rtx ix86_expand_sse_compare (const struct builtin_description *,
897 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
898 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
899 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
900 static rtx ix86_expand_store_builtin (enum insn_code, tree);
901 static rtx safe_vector_operand (rtx, enum machine_mode);
902 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
903 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
904 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
905 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
906 static int ix86_fp_comparison_cost (enum rtx_code code);
907 static unsigned int ix86_select_alt_pic_regnum (void);
908 static int ix86_save_reg (unsigned int, int);
909 static void ix86_compute_frame_layout (struct ix86_frame *);
910 static int ix86_comp_type_attributes (tree, tree);
911 static int ix86_function_regparm (tree, tree);
912 const struct attribute_spec ix86_attribute_table[];
913 static bool ix86_function_ok_for_sibcall (tree, tree);
914 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
915 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
916 static int ix86_value_regno (enum machine_mode);
917 static bool contains_128bit_aligned_vector_p (tree);
918 static rtx ix86_struct_value_rtx (tree, int);
919 static bool ix86_ms_bitfield_layout_p (tree);
920 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
921 static int extended_reg_mentioned_1 (rtx *, void *);
922 static bool ix86_rtx_costs (rtx, int, int, int *);
923 static int min_insn_size (rtx);
924 static tree ix86_md_asm_clobbers (tree clobbers);
925 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
926 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
928 static void ix86_init_builtins (void);
929 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
931 /* This function is only used on Solaris. */
932 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
935 /* Register class used for passing given 64bit part of the argument.
936 These represent classes as documented by the PS ABI, with the exception
937 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
938 use SF or DFmode move instead of DImode to avoid reformatting penalties.
940 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
941 whenever possible (upper half does contain padding).
943 enum x86_64_reg_class
946 X86_64_INTEGER_CLASS,
947 X86_64_INTEGERSI_CLASS,
954 X86_64_COMPLEX_X87_CLASS,
957 static const char * const x86_64_reg_class_name[] = {
958 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
959 "sseup", "x87", "x87up", "cplx87", "no"
962 #define MAX_CLASSES 4
964 /* Table of constants used by fldpi, fldln2, etc.... */
965 static REAL_VALUE_TYPE ext_80387_constants_table [5];
966 static bool ext_80387_constants_init = 0;
967 static void init_ext_80387_constants (void);
969 /* Initialize the GCC target structure. */
970 #undef TARGET_ATTRIBUTE_TABLE
971 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
972 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
973 # undef TARGET_MERGE_DECL_ATTRIBUTES
974 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
977 #undef TARGET_COMP_TYPE_ATTRIBUTES
978 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
980 #undef TARGET_INIT_BUILTINS
981 #define TARGET_INIT_BUILTINS ix86_init_builtins
982 #undef TARGET_EXPAND_BUILTIN
983 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
985 #undef TARGET_ASM_FUNCTION_EPILOGUE
986 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
988 #undef TARGET_ASM_OPEN_PAREN
989 #define TARGET_ASM_OPEN_PAREN ""
990 #undef TARGET_ASM_CLOSE_PAREN
991 #define TARGET_ASM_CLOSE_PAREN ""
993 #undef TARGET_ASM_ALIGNED_HI_OP
994 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
995 #undef TARGET_ASM_ALIGNED_SI_OP
996 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998 #undef TARGET_ASM_ALIGNED_DI_OP
999 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002 #undef TARGET_ASM_UNALIGNED_HI_OP
1003 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1004 #undef TARGET_ASM_UNALIGNED_SI_OP
1005 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1006 #undef TARGET_ASM_UNALIGNED_DI_OP
1007 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1009 #undef TARGET_SCHED_ADJUST_COST
1010 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1011 #undef TARGET_SCHED_ISSUE_RATE
1012 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1013 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1014 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1015 ia32_multipass_dfa_lookahead
1017 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1018 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1021 #undef TARGET_HAVE_TLS
1022 #define TARGET_HAVE_TLS true
1024 #undef TARGET_CANNOT_FORCE_CONST_MEM
1025 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1027 #undef TARGET_DELEGITIMIZE_ADDRESS
1028 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1030 #undef TARGET_MS_BITFIELD_LAYOUT_P
1031 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1033 #undef TARGET_ASM_OUTPUT_MI_THUNK
1034 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1035 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1038 #undef TARGET_ASM_FILE_START
1039 #define TARGET_ASM_FILE_START x86_file_start
1041 #undef TARGET_RTX_COSTS
1042 #define TARGET_RTX_COSTS ix86_rtx_costs
1043 #undef TARGET_ADDRESS_COST
1044 #define TARGET_ADDRESS_COST ix86_address_cost
1046 #undef TARGET_FIXED_CONDITION_CODE_REGS
1047 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1048 #undef TARGET_CC_MODES_COMPATIBLE
1049 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1051 #undef TARGET_MACHINE_DEPENDENT_REORG
1052 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1054 #undef TARGET_BUILD_BUILTIN_VA_LIST
1055 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1057 #undef TARGET_MD_ASM_CLOBBERS
1058 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1060 #undef TARGET_PROMOTE_PROTOTYPES
1061 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1062 #undef TARGET_STRUCT_VALUE_RTX
1063 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1064 #undef TARGET_SETUP_INCOMING_VARARGS
1065 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1066 #undef TARGET_MUST_PASS_IN_STACK
1067 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1068 #undef TARGET_PASS_BY_REFERENCE
1069 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1071 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1072 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1074 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1075 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1077 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1078 #undef TARGET_INSERT_ATTRIBUTES
1079 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1082 struct gcc_target targetm = TARGET_INITIALIZER;
1085 /* The svr4 ABI for the i386 says that records and unions are returned
1087 #ifndef DEFAULT_PCC_STRUCT_RETURN
1088 #define DEFAULT_PCC_STRUCT_RETURN 1
1091 /* Sometimes certain combinations of command options do not make
1092 sense on a particular target machine. You can define a macro
1093 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1094 defined, is executed once just after all the command options have
1097 Don't use this macro to turn on various extra optimizations for
1098 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1101 override_options (void)
1104 int ix86_tune_defaulted = 0;
1106 /* Comes from final.c -- no real reason to change it. */
1107 #define MAX_CODE_ALIGN 16
1111 const struct processor_costs *cost; /* Processor costs */
1112 const int target_enable; /* Target flags to enable. */
1113 const int target_disable; /* Target flags to disable. */
1114 const int align_loop; /* Default alignments. */
1115 const int align_loop_max_skip;
1116 const int align_jump;
1117 const int align_jump_max_skip;
1118 const int align_func;
1120 const processor_target_table[PROCESSOR_max] =
1122 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1123 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1124 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1126 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1127 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1128 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1129 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1133 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1136 const char *const name; /* processor name or nickname. */
1137 const enum processor_type processor;
1138 const enum pta_flags
1144 PTA_PREFETCH_SSE = 16,
1150 const processor_alias_table[] =
1152 {"i386", PROCESSOR_I386, 0},
1153 {"i486", PROCESSOR_I486, 0},
1154 {"i586", PROCESSOR_PENTIUM, 0},
1155 {"pentium", PROCESSOR_PENTIUM, 0},
1156 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1157 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1158 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1159 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1160 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1161 {"i686", PROCESSOR_PENTIUMPRO, 0},
1162 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1163 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1164 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1165 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1166 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1167 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"k6", PROCESSOR_K6, PTA_MMX},
1176 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1177 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1178 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1180 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1181 | PTA_3DNOW | PTA_3DNOW_A},
1182 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A | PTA_SSE},
1184 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A | PTA_SSE},
1186 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1189 | PTA_SSE | PTA_SSE2 },
1190 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 int const pta_size = ARRAY_SIZE (processor_alias_table);
1202 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1203 SUBTARGET_OVERRIDE_OPTIONS;
1206 /* Set the default values for switches whose default depends on TARGET_64BIT
1207 in case they weren't overwritten by command line options. */
1210 if (flag_omit_frame_pointer == 2)
1211 flag_omit_frame_pointer = 1;
1212 if (flag_asynchronous_unwind_tables == 2)
1213 flag_asynchronous_unwind_tables = 1;
1214 if (flag_pcc_struct_return == 2)
1215 flag_pcc_struct_return = 0;
1219 if (flag_omit_frame_pointer == 2)
1220 flag_omit_frame_pointer = 0;
1221 if (flag_asynchronous_unwind_tables == 2)
1222 flag_asynchronous_unwind_tables = 0;
1223 if (flag_pcc_struct_return == 2)
1224 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1227 if (!ix86_tune_string && ix86_arch_string)
1228 ix86_tune_string = ix86_arch_string;
1229 if (!ix86_tune_string)
1231 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1232 ix86_tune_defaulted = 1;
1234 if (!ix86_arch_string)
1235 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1237 if (ix86_cmodel_string != 0)
1239 if (!strcmp (ix86_cmodel_string, "small"))
1240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1242 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1243 else if (!strcmp (ix86_cmodel_string, "32"))
1244 ix86_cmodel = CM_32;
1245 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1246 ix86_cmodel = CM_KERNEL;
1247 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1248 ix86_cmodel = CM_MEDIUM;
1249 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1250 ix86_cmodel = CM_LARGE;
1252 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1256 ix86_cmodel = CM_32;
1258 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1260 if (ix86_asm_string != 0)
1262 if (!strcmp (ix86_asm_string, "intel"))
1263 ix86_asm_dialect = ASM_INTEL;
1264 else if (!strcmp (ix86_asm_string, "att"))
1265 ix86_asm_dialect = ASM_ATT;
1267 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1269 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1270 error ("code model %qs not supported in the %s bit mode",
1271 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1272 if (ix86_cmodel == CM_LARGE)
1273 sorry ("code model %<large%> not supported yet");
1274 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1275 sorry ("%i-bit mode not compiled in",
1276 (target_flags & MASK_64BIT) ? 64 : 32);
1278 for (i = 0; i < pta_size; i++)
1279 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1281 ix86_arch = processor_alias_table[i].processor;
1282 /* Default cpu tuning to the architecture. */
1283 ix86_tune = ix86_arch;
1284 if (processor_alias_table[i].flags & PTA_MMX
1285 && !(target_flags_explicit & MASK_MMX))
1286 target_flags |= MASK_MMX;
1287 if (processor_alias_table[i].flags & PTA_3DNOW
1288 && !(target_flags_explicit & MASK_3DNOW))
1289 target_flags |= MASK_3DNOW;
1290 if (processor_alias_table[i].flags & PTA_3DNOW_A
1291 && !(target_flags_explicit & MASK_3DNOW_A))
1292 target_flags |= MASK_3DNOW_A;
1293 if (processor_alias_table[i].flags & PTA_SSE
1294 && !(target_flags_explicit & MASK_SSE))
1295 target_flags |= MASK_SSE;
1296 if (processor_alias_table[i].flags & PTA_SSE2
1297 && !(target_flags_explicit & MASK_SSE2))
1298 target_flags |= MASK_SSE2;
1299 if (processor_alias_table[i].flags & PTA_SSE3
1300 && !(target_flags_explicit & MASK_SSE3))
1301 target_flags |= MASK_SSE3;
1302 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1303 x86_prefetch_sse = true;
1304 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1305 error ("CPU you selected does not support x86-64 "
1311 error ("bad value (%s) for -march= switch", ix86_arch_string);
1313 for (i = 0; i < pta_size; i++)
1314 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1316 ix86_tune = processor_alias_table[i].processor;
1317 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1319 if (ix86_tune_defaulted)
1321 ix86_tune_string = "x86-64";
1322 for (i = 0; i < pta_size; i++)
1323 if (! strcmp (ix86_tune_string,
1324 processor_alias_table[i].name))
1326 ix86_tune = processor_alias_table[i].processor;
1329 error ("CPU you selected does not support x86-64 "
1332 /* Intel CPUs have always interpreted SSE prefetch instructions as
1333 NOPs; so, we can enable SSE prefetch instructions even when
1334 -mtune (rather than -march) points us to a processor that has them.
1335 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1336 higher processors. */
1337 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1338 x86_prefetch_sse = true;
1342 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1345 ix86_cost = &size_cost;
1347 ix86_cost = processor_target_table[ix86_tune].cost;
1348 target_flags |= processor_target_table[ix86_tune].target_enable;
1349 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1351 /* Arrange to set up i386_stack_locals for all functions. */
1352 init_machine_status = ix86_init_machine_status;
1354 /* Validate -mregparm= value. */
1355 if (ix86_regparm_string)
1357 i = atoi (ix86_regparm_string);
1358 if (i < 0 || i > REGPARM_MAX)
1359 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1365 ix86_regparm = REGPARM_MAX;
1367 /* If the user has provided any of the -malign-* options,
1368 warn and use that value only if -falign-* is not set.
1369 Remove this code in GCC 3.2 or later. */
1370 if (ix86_align_loops_string)
1372 warning ("-malign-loops is obsolete, use -falign-loops");
1373 if (align_loops == 0)
1375 i = atoi (ix86_align_loops_string);
1376 if (i < 0 || i > MAX_CODE_ALIGN)
1377 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1379 align_loops = 1 << i;
1383 if (ix86_align_jumps_string)
1385 warning ("-malign-jumps is obsolete, use -falign-jumps");
1386 if (align_jumps == 0)
1388 i = atoi (ix86_align_jumps_string);
1389 if (i < 0 || i > MAX_CODE_ALIGN)
1390 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1392 align_jumps = 1 << i;
1396 if (ix86_align_funcs_string)
1398 warning ("-malign-functions is obsolete, use -falign-functions");
1399 if (align_functions == 0)
1401 i = atoi (ix86_align_funcs_string);
1402 if (i < 0 || i > MAX_CODE_ALIGN)
1403 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1405 align_functions = 1 << i;
1409 /* Default align_* from the processor table. */
1410 if (align_loops == 0)
1412 align_loops = processor_target_table[ix86_tune].align_loop;
1413 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1415 if (align_jumps == 0)
1417 align_jumps = processor_target_table[ix86_tune].align_jump;
1418 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1420 if (align_functions == 0)
1422 align_functions = processor_target_table[ix86_tune].align_func;
1425 /* Validate -mpreferred-stack-boundary= value, or provide default.
1426 The default of 128 bits is for Pentium III's SSE __m128, but we
1427 don't want additional code to keep the stack aligned when
1428 optimizing for code size. */
1429 ix86_preferred_stack_boundary = (optimize_size
1430 ? TARGET_64BIT ? 128 : 32
1432 if (ix86_preferred_stack_boundary_string)
1434 i = atoi (ix86_preferred_stack_boundary_string);
1435 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1436 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1437 TARGET_64BIT ? 4 : 2);
1439 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1442 /* Validate -mbranch-cost= value, or provide default. */
1443 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1444 if (ix86_branch_cost_string)
1446 i = atoi (ix86_branch_cost_string);
1448 error ("-mbranch-cost=%d is not between 0 and 5", i);
1450 ix86_branch_cost = i;
1453 if (ix86_tls_dialect_string)
1455 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1456 ix86_tls_dialect = TLS_DIALECT_GNU;
1457 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1458 ix86_tls_dialect = TLS_DIALECT_SUN;
1460 error ("bad value (%s) for -mtls-dialect= switch",
1461 ix86_tls_dialect_string);
1464 /* Keep nonleaf frame pointers. */
1465 if (flag_omit_frame_pointer)
1466 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1467 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1468 flag_omit_frame_pointer = 1;
1470 /* If we're doing fast math, we don't care about comparison order
1471 wrt NaNs. This lets us use a shorter comparison sequence. */
1472 if (flag_unsafe_math_optimizations)
1473 target_flags &= ~MASK_IEEE_FP;
1475 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1476 since the insns won't need emulation. */
1477 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1478 target_flags &= ~MASK_NO_FANCY_MATH_387;
1480 /* Likewise, if the target doesn't have a 387, or we've specified
1481 software floating point, don't use 387 inline instrinsics. */
1483 target_flags |= MASK_NO_FANCY_MATH_387;
1485 /* Turn on SSE2 builtins for -msse3. */
1487 target_flags |= MASK_SSE2;
1489 /* Turn on SSE builtins for -msse2. */
1491 target_flags |= MASK_SSE;
1493 /* Turn on MMX builtins for -msse. */
1496 target_flags |= MASK_MMX & ~target_flags_explicit;
1497 x86_prefetch_sse = true;
1500 /* Turn on MMX builtins for 3Dnow. */
1502 target_flags |= MASK_MMX;
1506 if (TARGET_ALIGN_DOUBLE)
1507 error ("-malign-double makes no sense in the 64bit mode");
1509 error ("-mrtd calling convention not supported in the 64bit mode");
1511 /* Enable by default the SSE and MMX builtins. Do allow the user to
1512 explicitly disable any of these. In particular, disabling SSE and
1513 MMX for kernel code is extremely useful. */
1515 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1516 & ~target_flags_explicit);
1519 ix86_fpmath = FPMATH_SSE;
1523 ix86_fpmath = FPMATH_387;
1524 /* i386 ABI does not specify red zone. It still makes sense to use it
1525 when programmer takes care to stack from being destroyed. */
1526 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1527 target_flags |= MASK_NO_RED_ZONE;
1530 if (ix86_fpmath_string != 0)
1532 if (! strcmp (ix86_fpmath_string, "387"))
1533 ix86_fpmath = FPMATH_387;
1534 else if (! strcmp (ix86_fpmath_string, "sse"))
1538 warning ("SSE instruction set disabled, using 387 arithmetics");
1539 ix86_fpmath = FPMATH_387;
1542 ix86_fpmath = FPMATH_SSE;
1544 else if (! strcmp (ix86_fpmath_string, "387,sse")
1545 || ! strcmp (ix86_fpmath_string, "sse,387"))
1549 warning ("SSE instruction set disabled, using 387 arithmetics");
1550 ix86_fpmath = FPMATH_387;
1552 else if (!TARGET_80387)
1554 warning ("387 instruction set disabled, using SSE arithmetics");
1555 ix86_fpmath = FPMATH_SSE;
1558 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1561 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1564 /* If the i387 is disabled, then do not return values in it. */
1566 target_flags &= ~MASK_FLOAT_RETURNS;
1568 if ((x86_accumulate_outgoing_args & TUNEMASK)
1569 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1571 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1573 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1576 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1577 p = strchr (internal_label_prefix, 'X');
1578 internal_label_prefix_len = p - internal_label_prefix;
1582 /* When scheduling description is not available, disable scheduler pass
1583 so it won't slow down the compilation and make x87 code slower. */
1584 if (!TARGET_SCHEDULE)
1585 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1589 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1591 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1592 make the problem with not enough registers even worse. */
1593 #ifdef INSN_SCHEDULING
1595 flag_schedule_insns = 0;
1598 /* The default values of these switches depend on the TARGET_64BIT
1599 that is not known at this moment. Mark these values with 2 and
1600 let user the to override these. In case there is no command line option
1601 specifying them, we will set the defaults in override_options. */
1603 flag_omit_frame_pointer = 2;
1604 flag_pcc_struct_return = 2;
1605 flag_asynchronous_unwind_tables = 2;
1606 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1607 SUBTARGET_OPTIMIZATION_OPTIONS;
1611 /* Table of valid machine attributes. */
1612 const struct attribute_spec ix86_attribute_table[] =
1614 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1615 /* Stdcall attribute says callee is responsible for popping arguments
1616 if they are not variable. */
1617 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1618 /* Fastcall attribute says callee is responsible for popping arguments
1619 if they are not variable. */
1620 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1621 /* Cdecl attribute says the callee is a normal C declaration */
1622 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1623 /* Regparm attribute specifies how many integer arguments are to be
1624 passed in registers. */
1625 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1626 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1627 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1628 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1629 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1631 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1632 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1633 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1634 SUBTARGET_ATTRIBUTE_TABLE,
1636 { NULL, 0, 0, false, false, false, NULL }
1639 /* Decide whether we can make a sibling call to a function. DECL is the
1640 declaration of the function being targeted by the call and EXP is the
1641 CALL_EXPR representing the call. */
1644 ix86_function_ok_for_sibcall (tree decl, tree exp)
1646 /* If we are generating position-independent code, we cannot sibcall
1647 optimize any indirect call, or a direct call to a global function,
1648 as the PLT requires %ebx be live. */
1649 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1652 /* If we are returning floats on the 80387 register stack, we cannot
1653 make a sibcall from a function that doesn't return a float to a
1654 function that does or, conversely, from a function that does return
1655 a float to a function that doesn't; the necessary stack adjustment
1656 would not be executed. */
1657 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1658 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1661 /* If this call is indirect, we'll need to be able to use a call-clobbered
1662 register for the address of the target function. Make sure that all
1663 such registers are not used for passing parameters. */
1664 if (!decl && !TARGET_64BIT)
1668 /* We're looking at the CALL_EXPR, we need the type of the function. */
1669 type = TREE_OPERAND (exp, 0); /* pointer expression */
1670 type = TREE_TYPE (type); /* pointer type */
1671 type = TREE_TYPE (type); /* function type */
1673 if (ix86_function_regparm (type, NULL) >= 3)
1675 /* ??? Need to count the actual number of registers to be used,
1676 not the possible number of registers. Fix later. */
1681 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1682 /* Dllimport'd functions are also called indirectly. */
1683 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1684 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1688 /* Otherwise okay. That also includes certain types of indirect calls. */
1692 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1693 arguments as in struct attribute_spec.handler. */
1695 ix86_handle_cdecl_attribute (tree *node, tree name,
1696 tree args ATTRIBUTE_UNUSED,
1697 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1699 if (TREE_CODE (*node) != FUNCTION_TYPE
1700 && TREE_CODE (*node) != METHOD_TYPE
1701 && TREE_CODE (*node) != FIELD_DECL
1702 && TREE_CODE (*node) != TYPE_DECL)
1704 warning ("%qs attribute only applies to functions",
1705 IDENTIFIER_POINTER (name));
1706 *no_add_attrs = true;
1710 if (is_attribute_p ("fastcall", name))
1712 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1714 error ("fastcall and stdcall attributes are not compatible");
1716 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1718 error ("fastcall and regparm attributes are not compatible");
1721 else if (is_attribute_p ("stdcall", name))
1723 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1725 error ("fastcall and stdcall attributes are not compatible");
1732 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1733 *no_add_attrs = true;
1739 /* Handle a "regparm" attribute;
1740 arguments as in struct attribute_spec.handler. */
1742 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1743 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1745 if (TREE_CODE (*node) != FUNCTION_TYPE
1746 && TREE_CODE (*node) != METHOD_TYPE
1747 && TREE_CODE (*node) != FIELD_DECL
1748 && TREE_CODE (*node) != TYPE_DECL)
1750 warning ("%qs attribute only applies to functions",
1751 IDENTIFIER_POINTER (name));
1752 *no_add_attrs = true;
1758 cst = TREE_VALUE (args);
1759 if (TREE_CODE (cst) != INTEGER_CST)
1761 warning ("%qs attribute requires an integer constant argument",
1762 IDENTIFIER_POINTER (name));
1763 *no_add_attrs = true;
1765 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1767 warning ("argument to %qs attribute larger than %d",
1768 IDENTIFIER_POINTER (name), REGPARM_MAX);
1769 *no_add_attrs = true;
1772 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1774 error ("fastcall and regparm attributes are not compatible");
1781 /* Return 0 if the attributes for two types are incompatible, 1 if they
1782 are compatible, and 2 if they are nearly compatible (which causes a
1783 warning to be generated). */
1786 ix86_comp_type_attributes (tree type1, tree type2)
1788 /* Check for mismatch of non-default calling convention. */
1789 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1791 if (TREE_CODE (type1) != FUNCTION_TYPE)
1794 /* Check for mismatched fastcall types */
1795 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1796 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1799 /* Check for mismatched return types (cdecl vs stdcall). */
1800 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1801 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1803 if (ix86_function_regparm (type1, NULL)
1804 != ix86_function_regparm (type2, NULL))
1809 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1810 DECL may be NULL when calling function indirectly
1811 or considering a libcall. */
1814 ix86_function_regparm (tree type, tree decl)
1817 int regparm = ix86_regparm;
1818 bool user_convention = false;
1822 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1825 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1826 user_convention = true;
1829 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1832 user_convention = true;
1835 /* Use register calling convention for local functions when possible. */
1836 if (!TARGET_64BIT && !user_convention && decl
1837 && flag_unit_at_a_time && !profile_flag)
1839 struct cgraph_local_info *i = cgraph_local_info (decl);
1842 /* We can't use regparm(3) for nested functions as these use
1843 static chain pointer in third argument. */
1844 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1854 /* Return true if EAX is live at the start of the function. Used by
1855 ix86_expand_prologue to determine if we need special help before
1856 calling allocate_stack_worker. */
1859 ix86_eax_live_at_start_p (void)
1861 /* Cheat. Don't bother working forward from ix86_function_regparm
1862 to the function type to whether an actual argument is located in
1863 eax. Instead just look at cfg info, which is still close enough
1864 to correct at this point. This gives false positives for broken
1865 functions that might use uninitialized data that happens to be
1866 allocated in eax, but who cares? */
1867 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1870 /* Value is the number of bytes of arguments automatically
1871 popped when returning from a subroutine call.
1872 FUNDECL is the declaration node of the function (as a tree),
1873 FUNTYPE is the data type of the function (as a tree),
1874 or for a library call it is an identifier node for the subroutine name.
1875 SIZE is the number of bytes of arguments passed on the stack.
1877 On the 80386, the RTD insn may be used to pop them if the number
1878 of args is fixed, but if the number is variable then the caller
1879 must pop them all. RTD can't be used for library calls now
1880 because the library is compiled with the Unix compiler.
1881 Use of RTD is a selectable option, since it is incompatible with
1882 standard Unix calling sequences. If the option is not selected,
1883 the caller must always pop the args.
1885 The attribute stdcall is equivalent to RTD on a per module basis. */
1888 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1890 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1892 /* Cdecl functions override -mrtd, and never pop the stack. */
1893 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1895 /* Stdcall and fastcall functions will pop the stack if not
1897 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1898 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1902 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1903 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1904 == void_type_node)))
1908 /* Lose any fake structure return argument if it is passed on the stack. */
1909 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1911 && !KEEP_AGGREGATE_RETURN_POINTER)
1913 int nregs = ix86_function_regparm (funtype, fundecl);
1916 return GET_MODE_SIZE (Pmode);
1922 /* Argument support functions. */
1924 /* Return true when register may be used to pass function parameters. */
1926 ix86_function_arg_regno_p (int regno)
1930 return (regno < REGPARM_MAX
1931 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1932 if (SSE_REGNO_P (regno) && TARGET_SSE)
1934 /* RAX is used as hidden argument to va_arg functions. */
1937 for (i = 0; i < REGPARM_MAX; i++)
1938 if (regno == x86_64_int_parameter_registers[i])
1943 /* Return if we do not know how to pass TYPE solely in registers. */
1946 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1948 if (must_pass_in_stack_var_size_or_pad (mode, type))
1951 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1952 The layout_type routine is crafty and tries to trick us into passing
1953 currently unsupported vector types on the stack by using TImode. */
1954 return (!TARGET_64BIT && mode == TImode
1955 && type && TREE_CODE (type) != VECTOR_TYPE);
1958 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1959 for a call to a function whose data type is FNTYPE.
1960 For a library call, FNTYPE is 0. */
1963 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1964 tree fntype, /* tree ptr for function decl */
1965 rtx libname, /* SYMBOL_REF of library name or 0 */
1968 static CUMULATIVE_ARGS zero_cum;
1969 tree param, next_param;
1971 if (TARGET_DEBUG_ARG)
1973 fprintf (stderr, "\ninit_cumulative_args (");
1975 fprintf (stderr, "fntype code = %s, ret code = %s",
1976 tree_code_name[(int) TREE_CODE (fntype)],
1977 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1979 fprintf (stderr, "no fntype");
1982 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1987 /* Set up the number of registers to use for passing arguments. */
1989 cum->nregs = ix86_function_regparm (fntype, fndecl);
1991 cum->nregs = ix86_regparm;
1993 cum->sse_nregs = SSE_REGPARM_MAX;
1995 cum->mmx_nregs = MMX_REGPARM_MAX;
1996 cum->warn_sse = true;
1997 cum->warn_mmx = true;
1998 cum->maybe_vaarg = false;
2000 /* Use ecx and edx registers if function has fastcall attribute */
2001 if (fntype && !TARGET_64BIT)
2003 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2010 /* Determine if this function has variable arguments. This is
2011 indicated by the last argument being 'void_type_mode' if there
2012 are no variable arguments. If there are variable arguments, then
2013 we won't pass anything in registers in 32-bit mode. */
2015 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2017 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2018 param != 0; param = next_param)
2020 next_param = TREE_CHAIN (param);
2021 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2032 cum->maybe_vaarg = true;
2036 if ((!fntype && !libname)
2037 || (fntype && !TYPE_ARG_TYPES (fntype)))
2038 cum->maybe_vaarg = 1;
2040 if (TARGET_DEBUG_ARG)
2041 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2046 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2047 But in the case of vector types, it is some vector mode.
2049 When we have only some of our vector isa extensions enabled, then there
2050 are some modes for which vector_mode_supported_p is false. For these
2051 modes, the generic vector support in gcc will choose some non-vector mode
2052 in order to implement the type. By computing the natural mode, we'll
2053 select the proper ABI location for the operand and not depend on whatever
2054 the middle-end decides to do with these vector types. */
2056 static enum machine_mode
2057 type_natural_mode (tree type)
2059 enum machine_mode mode = TYPE_MODE (type);
2061 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2063 HOST_WIDE_INT size = int_size_in_bytes (type);
2064 if ((size == 8 || size == 16)
2065 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2066 && TYPE_VECTOR_SUBPARTS (type) > 1)
2068 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2070 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2071 mode = MIN_MODE_VECTOR_FLOAT;
2073 mode = MIN_MODE_VECTOR_INT;
2075 /* Get the mode which has this inner mode and number of units. */
2076 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2077 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2078 && GET_MODE_INNER (mode) == innermode)
2088 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2089 this may not agree with the mode that the type system has chosen for the
2090 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2091 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2094 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2099 if (orig_mode != BLKmode)
2100 tmp = gen_rtx_REG (orig_mode, regno);
2103 tmp = gen_rtx_REG (mode, regno);
2104 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2105 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2111 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2112 of this code is to classify each 8bytes of incoming argument by the register
2113 class and assign registers accordingly. */
2115 /* Return the union class of CLASS1 and CLASS2.
2116 See the x86-64 PS ABI for details. */
2118 static enum x86_64_reg_class
2119 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2121 /* Rule #1: If both classes are equal, this is the resulting class. */
2122 if (class1 == class2)
2125 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2127 if (class1 == X86_64_NO_CLASS)
2129 if (class2 == X86_64_NO_CLASS)
2132 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2133 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2134 return X86_64_MEMORY_CLASS;
2136 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2137 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2138 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2139 return X86_64_INTEGERSI_CLASS;
2140 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2141 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2142 return X86_64_INTEGER_CLASS;
2144 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2146 if (class1 == X86_64_X87_CLASS
2147 || class1 == X86_64_X87UP_CLASS
2148 || class1 == X86_64_COMPLEX_X87_CLASS
2149 || class2 == X86_64_X87_CLASS
2150 || class2 == X86_64_X87UP_CLASS
2151 || class2 == X86_64_COMPLEX_X87_CLASS)
2152 return X86_64_MEMORY_CLASS;
2154 /* Rule #6: Otherwise class SSE is used. */
2155 return X86_64_SSE_CLASS;
2158 /* Classify the argument of type TYPE and mode MODE.
2159 CLASSES will be filled by the register class used to pass each word
2160 of the operand. The number of words is returned. In case the parameter
2161 should be passed in memory, 0 is returned. As a special case for zero
2162 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2164 BIT_OFFSET is used internally for handling records and specifies offset
2165 of the offset in bits modulo 256 to avoid overflow cases.
2167 See the x86-64 PS ABI for details.
2171 classify_argument (enum machine_mode mode, tree type,
2172 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2174 HOST_WIDE_INT bytes =
2175 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2176 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2178 /* Variable sized entities are always passed/returned in memory. */
2182 if (mode != VOIDmode
2183 && targetm.calls.must_pass_in_stack (mode, type))
2186 if (type && AGGREGATE_TYPE_P (type))
2190 enum x86_64_reg_class subclasses[MAX_CLASSES];
2192 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2196 for (i = 0; i < words; i++)
2197 classes[i] = X86_64_NO_CLASS;
2199 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2200 signalize memory class, so handle it as special case. */
2203 classes[0] = X86_64_NO_CLASS;
2207 /* Classify each field of record and merge classes. */
2208 if (TREE_CODE (type) == RECORD_TYPE)
2210 /* For classes first merge in the field of the subclasses. */
2211 if (TYPE_BINFO (type))
2213 tree binfo, base_binfo;
2216 for (binfo = TYPE_BINFO (type), basenum = 0;
2217 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2220 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2221 tree type = BINFO_TYPE (base_binfo);
2223 num = classify_argument (TYPE_MODE (type),
2225 (offset + bit_offset) % 256);
2228 for (i = 0; i < num; i++)
2230 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2232 merge_classes (subclasses[i], classes[i + pos]);
2236 /* And now merge the fields of structure. */
2237 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2239 if (TREE_CODE (field) == FIELD_DECL)
2243 /* Bitfields are always classified as integer. Handle them
2244 early, since later code would consider them to be
2245 misaligned integers. */
2246 if (DECL_BIT_FIELD (field))
2248 for (i = int_bit_position (field) / 8 / 8;
2249 i < (int_bit_position (field)
2250 + tree_low_cst (DECL_SIZE (field), 0)
2253 merge_classes (X86_64_INTEGER_CLASS,
2258 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2259 TREE_TYPE (field), subclasses,
2260 (int_bit_position (field)
2261 + bit_offset) % 256);
2264 for (i = 0; i < num; i++)
2267 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2269 merge_classes (subclasses[i], classes[i + pos]);
2275 /* Arrays are handled as small records. */
2276 else if (TREE_CODE (type) == ARRAY_TYPE)
2279 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2280 TREE_TYPE (type), subclasses, bit_offset);
2284 /* The partial classes are now full classes. */
2285 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2286 subclasses[0] = X86_64_SSE_CLASS;
2287 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2288 subclasses[0] = X86_64_INTEGER_CLASS;
2290 for (i = 0; i < words; i++)
2291 classes[i] = subclasses[i % num];
2293 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2294 else if (TREE_CODE (type) == UNION_TYPE
2295 || TREE_CODE (type) == QUAL_UNION_TYPE)
2297 /* For classes first merge in the field of the subclasses. */
2298 if (TYPE_BINFO (type))
2300 tree binfo, base_binfo;
2303 for (binfo = TYPE_BINFO (type), basenum = 0;
2304 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2307 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2308 tree type = BINFO_TYPE (base_binfo);
2310 num = classify_argument (TYPE_MODE (type),
2312 (offset + (bit_offset % 64)) % 256);
2315 for (i = 0; i < num; i++)
2317 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2319 merge_classes (subclasses[i], classes[i + pos]);
2323 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2325 if (TREE_CODE (field) == FIELD_DECL)
2328 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2329 TREE_TYPE (field), subclasses,
2333 for (i = 0; i < num; i++)
2334 classes[i] = merge_classes (subclasses[i], classes[i]);
2341 /* Final merger cleanup. */
2342 for (i = 0; i < words; i++)
2344 /* If one class is MEMORY, everything should be passed in
2346 if (classes[i] == X86_64_MEMORY_CLASS)
2349 /* The X86_64_SSEUP_CLASS should be always preceded by
2350 X86_64_SSE_CLASS. */
2351 if (classes[i] == X86_64_SSEUP_CLASS
2352 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2353 classes[i] = X86_64_SSE_CLASS;
2355 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2356 if (classes[i] == X86_64_X87UP_CLASS
2357 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2358 classes[i] = X86_64_SSE_CLASS;
2363 /* Compute alignment needed. We align all types to natural boundaries with
2364 exception of XFmode that is aligned to 64bits. */
2365 if (mode != VOIDmode && mode != BLKmode)
2367 int mode_alignment = GET_MODE_BITSIZE (mode);
2370 mode_alignment = 128;
2371 else if (mode == XCmode)
2372 mode_alignment = 256;
2373 if (COMPLEX_MODE_P (mode))
2374 mode_alignment /= 2;
2375 /* Misaligned fields are always returned in memory. */
2376 if (bit_offset % mode_alignment)
2380 /* for V1xx modes, just use the base mode */
2381 if (VECTOR_MODE_P (mode)
2382 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2383 mode = GET_MODE_INNER (mode);
2385 /* Classification of atomic types. */
2395 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2396 classes[0] = X86_64_INTEGERSI_CLASS;
2398 classes[0] = X86_64_INTEGER_CLASS;
2402 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2407 if (!(bit_offset % 64))
2408 classes[0] = X86_64_SSESF_CLASS;
2410 classes[0] = X86_64_SSE_CLASS;
2413 classes[0] = X86_64_SSEDF_CLASS;
2416 classes[0] = X86_64_X87_CLASS;
2417 classes[1] = X86_64_X87UP_CLASS;
2420 classes[0] = X86_64_SSE_CLASS;
2421 classes[1] = X86_64_SSEUP_CLASS;
2424 classes[0] = X86_64_SSE_CLASS;
2427 classes[0] = X86_64_SSEDF_CLASS;
2428 classes[1] = X86_64_SSEDF_CLASS;
2431 classes[0] = X86_64_COMPLEX_X87_CLASS;
2434 /* This modes is larger than 16 bytes. */
2442 classes[0] = X86_64_SSE_CLASS;
2443 classes[1] = X86_64_SSEUP_CLASS;
2449 classes[0] = X86_64_SSE_CLASS;
2455 if (VECTOR_MODE_P (mode))
2459 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2461 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2462 classes[0] = X86_64_INTEGERSI_CLASS;
2464 classes[0] = X86_64_INTEGER_CLASS;
2465 classes[1] = X86_64_INTEGER_CLASS;
2466 return 1 + (bytes > 8);
2473 /* Examine the argument and return set number of register required in each
2474 class. Return 0 iff parameter should be passed in memory. */
2476 examine_argument (enum machine_mode mode, tree type, int in_return,
2477 int *int_nregs, int *sse_nregs)
2479 enum x86_64_reg_class class[MAX_CLASSES];
2480 int n = classify_argument (mode, type, class, 0);
2486 for (n--; n >= 0; n--)
2489 case X86_64_INTEGER_CLASS:
2490 case X86_64_INTEGERSI_CLASS:
2493 case X86_64_SSE_CLASS:
2494 case X86_64_SSESF_CLASS:
2495 case X86_64_SSEDF_CLASS:
2498 case X86_64_NO_CLASS:
2499 case X86_64_SSEUP_CLASS:
2501 case X86_64_X87_CLASS:
2502 case X86_64_X87UP_CLASS:
2506 case X86_64_COMPLEX_X87_CLASS:
2507 return in_return ? 2 : 0;
2508 case X86_64_MEMORY_CLASS:
2514 /* Construct container for the argument used by GCC interface. See
2515 FUNCTION_ARG for the detailed description. */
2518 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2519 tree type, int in_return, int nintregs, int nsseregs,
2520 const int *intreg, int sse_regno)
2522 enum machine_mode tmpmode;
2524 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2525 enum x86_64_reg_class class[MAX_CLASSES];
2529 int needed_sseregs, needed_intregs;
2530 rtx exp[MAX_CLASSES];
2533 n = classify_argument (mode, type, class, 0);
2534 if (TARGET_DEBUG_ARG)
2537 fprintf (stderr, "Memory class\n");
2540 fprintf (stderr, "Classes:");
2541 for (i = 0; i < n; i++)
2543 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2545 fprintf (stderr, "\n");
2550 if (!examine_argument (mode, type, in_return, &needed_intregs,
2553 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2556 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2557 some less clueful developer tries to use floating-point anyway. */
2558 if (needed_sseregs && !TARGET_SSE)
2560 static bool issued_error;
2563 issued_error = true;
2565 error ("SSE register return with SSE disabled");
2567 error ("SSE register argument with SSE disabled");
2572 /* First construct simple cases. Avoid SCmode, since we want to use
2573 single register to pass this type. */
2574 if (n == 1 && mode != SCmode)
2577 case X86_64_INTEGER_CLASS:
2578 case X86_64_INTEGERSI_CLASS:
2579 return gen_rtx_REG (mode, intreg[0]);
2580 case X86_64_SSE_CLASS:
2581 case X86_64_SSESF_CLASS:
2582 case X86_64_SSEDF_CLASS:
2583 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2584 case X86_64_X87_CLASS:
2585 case X86_64_COMPLEX_X87_CLASS:
2586 return gen_rtx_REG (mode, FIRST_STACK_REG);
2587 case X86_64_NO_CLASS:
2588 /* Zero sized array, struct or class. */
2593 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2595 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2597 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2598 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2599 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2600 && class[1] == X86_64_INTEGER_CLASS
2601 && (mode == CDImode || mode == TImode || mode == TFmode)
2602 && intreg[0] + 1 == intreg[1])
2603 return gen_rtx_REG (mode, intreg[0]);
2605 /* Otherwise figure out the entries of the PARALLEL. */
2606 for (i = 0; i < n; i++)
2610 case X86_64_NO_CLASS:
2612 case X86_64_INTEGER_CLASS:
2613 case X86_64_INTEGERSI_CLASS:
2614 /* Merge TImodes on aligned occasions here too. */
2615 if (i * 8 + 8 > bytes)
2616 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2617 else if (class[i] == X86_64_INTEGERSI_CLASS)
2621 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2622 if (tmpmode == BLKmode)
2624 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2625 gen_rtx_REG (tmpmode, *intreg),
2629 case X86_64_SSESF_CLASS:
2630 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2631 gen_rtx_REG (SFmode,
2632 SSE_REGNO (sse_regno)),
2636 case X86_64_SSEDF_CLASS:
2637 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2638 gen_rtx_REG (DFmode,
2639 SSE_REGNO (sse_regno)),
2643 case X86_64_SSE_CLASS:
2644 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2648 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2649 gen_rtx_REG (tmpmode,
2650 SSE_REGNO (sse_regno)),
2652 if (tmpmode == TImode)
2661 /* Empty aligned struct, union or class. */
2665 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2666 for (i = 0; i < nexps; i++)
2667 XVECEXP (ret, 0, i) = exp [i];
2671 /* Update the data in CUM to advance over an argument
2672 of mode MODE and data type TYPE.
2673 (TYPE is null for libcalls where that information may not be available.) */
2676 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2677 tree type, int named)
2680 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2681 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2684 mode = type_natural_mode (type);
2686 if (TARGET_DEBUG_ARG)
2687 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2688 "mode=%s, named=%d)\n\n",
2689 words, cum->words, cum->nregs, cum->sse_nregs,
2690 GET_MODE_NAME (mode), named);
2694 int int_nregs, sse_nregs;
2695 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2696 cum->words += words;
2697 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2699 cum->nregs -= int_nregs;
2700 cum->sse_nregs -= sse_nregs;
2701 cum->regno += int_nregs;
2702 cum->sse_regno += sse_nregs;
2705 cum->words += words;
2723 cum->words += words;
2724 cum->nregs -= words;
2725 cum->regno += words;
2727 if (cum->nregs <= 0)
2741 if (!type || !AGGREGATE_TYPE_P (type))
2743 cum->sse_words += words;
2744 cum->sse_nregs -= 1;
2745 cum->sse_regno += 1;
2746 if (cum->sse_nregs <= 0)
2758 if (!type || !AGGREGATE_TYPE_P (type))
2760 cum->mmx_words += words;
2761 cum->mmx_nregs -= 1;
2762 cum->mmx_regno += 1;
2763 if (cum->mmx_nregs <= 0)
2774 /* Define where to put the arguments to a function.
2775 Value is zero to push the argument on the stack,
2776 or a hard register in which to store the argument.
2778 MODE is the argument's machine mode.
2779 TYPE is the data type of the argument (as a tree).
2780 This is null for libcalls where that information may
2782 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2783 the preceding args and about the function being called.
2784 NAMED is nonzero if this argument is a named parameter
2785 (otherwise it is an extra parameter matching an ellipsis). */
2788 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2789 tree type, int named)
2791 enum machine_mode mode = orig_mode;
2794 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2795 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2796 static bool warnedsse, warnedmmx;
2798 /* To simplify the code below, represent vector types with a vector mode
2799 even if MMX/SSE are not active. */
2800 if (type && TREE_CODE (type) == VECTOR_TYPE)
2801 mode = type_natural_mode (type);
2803 /* Handle a hidden AL argument containing number of registers for varargs
2804 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2806 if (mode == VOIDmode)
2809 return GEN_INT (cum->maybe_vaarg
2810 ? (cum->sse_nregs < 0
2818 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2820 &x86_64_int_parameter_registers [cum->regno],
2825 /* For now, pass fp/complex values on the stack. */
2837 if (words <= cum->nregs)
2839 int regno = cum->regno;
2841 /* Fastcall allocates the first two DWORD (SImode) or
2842 smaller arguments to ECX and EDX. */
2845 if (mode == BLKmode || mode == DImode)
2848 /* ECX not EAX is the first allocated register. */
2852 ret = gen_rtx_REG (mode, regno);
2862 if (!type || !AGGREGATE_TYPE_P (type))
2864 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2867 warning ("SSE vector argument without SSE enabled "
2871 ret = gen_reg_or_parallel (mode, orig_mode,
2872 cum->sse_regno + FIRST_SSE_REG);
2879 if (!type || !AGGREGATE_TYPE_P (type))
2881 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2884 warning ("MMX vector argument without MMX enabled "
2888 ret = gen_reg_or_parallel (mode, orig_mode,
2889 cum->mmx_regno + FIRST_MMX_REG);
2894 if (TARGET_DEBUG_ARG)
2897 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2898 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2901 print_simple_rtl (stderr, ret);
2903 fprintf (stderr, ", stack");
2905 fprintf (stderr, " )\n");
2911 /* A C expression that indicates when an argument must be passed by
2912 reference. If nonzero for an argument, a copy of that argument is
2913 made in memory and a pointer to the argument is passed instead of
2914 the argument itself. The pointer is passed in whatever way is
2915 appropriate for passing a pointer to that type. */
2918 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2919 enum machine_mode mode ATTRIBUTE_UNUSED,
2920 tree type, bool named ATTRIBUTE_UNUSED)
2925 if (type && int_size_in_bytes (type) == -1)
2927 if (TARGET_DEBUG_ARG)
2928 fprintf (stderr, "function_arg_pass_by_reference\n");
2935 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2936 ABI. Only called if TARGET_SSE. */
2938 contains_128bit_aligned_vector_p (tree type)
2940 enum machine_mode mode = TYPE_MODE (type);
2941 if (SSE_REG_MODE_P (mode)
2942 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2944 if (TYPE_ALIGN (type) < 128)
2947 if (AGGREGATE_TYPE_P (type))
2949 /* Walk the aggregates recursively. */
2950 if (TREE_CODE (type) == RECORD_TYPE
2951 || TREE_CODE (type) == UNION_TYPE
2952 || TREE_CODE (type) == QUAL_UNION_TYPE)
2956 if (TYPE_BINFO (type))
2958 tree binfo, base_binfo;
2961 for (binfo = TYPE_BINFO (type), i = 0;
2962 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2963 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2966 /* And now merge the fields of structure. */
2967 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2969 if (TREE_CODE (field) == FIELD_DECL
2970 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2974 /* Just for use if some languages passes arrays by value. */
2975 else if (TREE_CODE (type) == ARRAY_TYPE)
2977 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2986 /* Gives the alignment boundary, in bits, of an argument with the
2987 specified mode and type. */
2990 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2994 align = TYPE_ALIGN (type);
2996 align = GET_MODE_ALIGNMENT (mode);
2997 if (align < PARM_BOUNDARY)
2998 align = PARM_BOUNDARY;
3001 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3002 make an exception for SSE modes since these require 128bit
3005 The handling here differs from field_alignment. ICC aligns MMX
3006 arguments to 4 byte boundaries, while structure fields are aligned
3007 to 8 byte boundaries. */
3009 align = PARM_BOUNDARY;
3012 if (!SSE_REG_MODE_P (mode))
3013 align = PARM_BOUNDARY;
3017 if (!contains_128bit_aligned_vector_p (type))
3018 align = PARM_BOUNDARY;
3026 /* Return true if N is a possible register number of function value. */
3028 ix86_function_value_regno_p (int regno)
3032 return ((regno) == 0
3033 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3034 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3036 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3037 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3038 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3041 /* Define how to find the value returned by a function.
3042 VALTYPE is the data type of the value (as a tree).
3043 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3044 otherwise, FUNC is 0. */
3046 ix86_function_value (tree valtype)
3048 enum machine_mode natmode = type_natural_mode (valtype);
3052 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3053 1, REGPARM_MAX, SSE_REGPARM_MAX,
3054 x86_64_int_return_registers, 0);
3055 /* For zero sized structures, construct_container return NULL, but we
3056 need to keep rest of compiler happy by returning meaningful value. */
3058 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3062 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
3065 /* Return false iff type is returned in memory. */
3067 ix86_return_in_memory (tree type)
3069 int needed_intregs, needed_sseregs, size;
3070 enum machine_mode mode = type_natural_mode (type);
3073 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3075 if (mode == BLKmode)
3078 size = int_size_in_bytes (type);
3080 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3083 if (VECTOR_MODE_P (mode) || mode == TImode)
3085 /* User-created vectors small enough to fit in EAX. */
3089 /* MMX/3dNow values are returned on the stack, since we've
3090 got to EMMS/FEMMS before returning. */
3094 /* SSE values are returned in XMM0, except when it doesn't exist. */
3096 return (TARGET_SSE ? 0 : 1);
3107 /* When returning SSE vector types, we have a choice of either
3108 (1) being abi incompatible with a -march switch, or
3109 (2) generating an error.
3110 Given no good solution, I think the safest thing is one warning.
3111 The user won't be able to use -Werror, but....
3113 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3114 called in response to actually generating a caller or callee that
3115 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3116 via aggregate_value_p for general type probing from tree-ssa. */
3119 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3123 if (!TARGET_SSE && type && !warned)
3125 /* Look at the return type of the function, not the function type. */
3126 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3129 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3132 warning ("SSE vector return without SSE enabled changes the ABI");
3139 /* Define how to find the value returned by a library function
3140 assuming the value has mode MODE. */
3142 ix86_libcall_value (enum machine_mode mode)
3153 return gen_rtx_REG (mode, FIRST_SSE_REG);
3156 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3160 return gen_rtx_REG (mode, 0);
3164 return gen_rtx_REG (mode, ix86_value_regno (mode));
3167 /* Given a mode, return the register to use for a return value. */
3170 ix86_value_regno (enum machine_mode mode)
3172 /* Floating point return values in %st(0). */
3173 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3174 return FIRST_FLOAT_REG;
3175 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3176 we prevent this case when sse is not available. */
3177 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3178 return FIRST_SSE_REG;
3179 /* Everything else in %eax. */
3183 /* Create the va_list data type. */
3186 ix86_build_builtin_va_list (void)
3188 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3190 /* For i386 we use plain pointer to argument area. */
3192 return build_pointer_type (char_type_node);
3194 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3195 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3197 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3198 unsigned_type_node);
3199 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3200 unsigned_type_node);
3201 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3203 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3206 DECL_FIELD_CONTEXT (f_gpr) = record;
3207 DECL_FIELD_CONTEXT (f_fpr) = record;
3208 DECL_FIELD_CONTEXT (f_ovf) = record;
3209 DECL_FIELD_CONTEXT (f_sav) = record;
3211 TREE_CHAIN (record) = type_decl;
3212 TYPE_NAME (record) = type_decl;
3213 TYPE_FIELDS (record) = f_gpr;
3214 TREE_CHAIN (f_gpr) = f_fpr;
3215 TREE_CHAIN (f_fpr) = f_ovf;
3216 TREE_CHAIN (f_ovf) = f_sav;
3218 layout_type (record);
3220 /* The correct type is an array type of one element. */
3221 return build_array_type (record, build_index_type (size_zero_node));
3224 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3227 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3228 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3231 CUMULATIVE_ARGS next_cum;
3232 rtx save_area = NULL_RTX, mem;
3245 /* Indicate to allocate space on the stack for varargs save area. */
3246 ix86_save_varrargs_registers = 1;
3248 cfun->stack_alignment_needed = 128;
3250 fntype = TREE_TYPE (current_function_decl);
3251 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3252 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3253 != void_type_node));
3255 /* For varargs, we do not want to skip the dummy va_dcl argument.
3256 For stdargs, we do want to skip the last named argument. */
3259 function_arg_advance (&next_cum, mode, type, 1);
3262 save_area = frame_pointer_rtx;
3264 set = get_varargs_alias_set ();
3266 for (i = next_cum.regno; i < ix86_regparm; i++)
3268 mem = gen_rtx_MEM (Pmode,
3269 plus_constant (save_area, i * UNITS_PER_WORD));
3270 set_mem_alias_set (mem, set);
3271 emit_move_insn (mem, gen_rtx_REG (Pmode,
3272 x86_64_int_parameter_registers[i]));
3275 if (next_cum.sse_nregs)
3277 /* Now emit code to save SSE registers. The AX parameter contains number
3278 of SSE parameter registers used to call this function. We use
3279 sse_prologue_save insn template that produces computed jump across
3280 SSE saves. We need some preparation work to get this working. */
3282 label = gen_label_rtx ();
3283 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3285 /* Compute address to jump to :
3286 label - 5*eax + nnamed_sse_arguments*5 */
3287 tmp_reg = gen_reg_rtx (Pmode);
3288 nsse_reg = gen_reg_rtx (Pmode);
3289 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3290 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3291 gen_rtx_MULT (Pmode, nsse_reg,
3293 if (next_cum.sse_regno)
3296 gen_rtx_CONST (DImode,
3297 gen_rtx_PLUS (DImode,
3299 GEN_INT (next_cum.sse_regno * 4))));
3301 emit_move_insn (nsse_reg, label_ref);
3302 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3304 /* Compute address of memory block we save into. We always use pointer
3305 pointing 127 bytes after first byte to store - this is needed to keep
3306 instruction size limited by 4 bytes. */
3307 tmp_reg = gen_reg_rtx (Pmode);
3308 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3309 plus_constant (save_area,
3310 8 * REGPARM_MAX + 127)));
3311 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3312 set_mem_alias_set (mem, set);
3313 set_mem_align (mem, BITS_PER_WORD);
3315 /* And finally do the dirty job! */
3316 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3317 GEN_INT (next_cum.sse_regno), label));
3322 /* Implement va_start. */
3325 ix86_va_start (tree valist, rtx nextarg)
3327 HOST_WIDE_INT words, n_gpr, n_fpr;
3328 tree f_gpr, f_fpr, f_ovf, f_sav;
3329 tree gpr, fpr, ovf, sav, t;
3331 /* Only 64bit target needs something special. */
3334 std_expand_builtin_va_start (valist, nextarg);
3338 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3339 f_fpr = TREE_CHAIN (f_gpr);
3340 f_ovf = TREE_CHAIN (f_fpr);
3341 f_sav = TREE_CHAIN (f_ovf);
3343 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3344 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3345 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3346 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3347 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3349 /* Count number of gp and fp argument registers used. */
3350 words = current_function_args_info.words;
3351 n_gpr = current_function_args_info.regno;
3352 n_fpr = current_function_args_info.sse_regno;
3354 if (TARGET_DEBUG_ARG)
3355 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3356 (int) words, (int) n_gpr, (int) n_fpr);
3358 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3359 build_int_cst (NULL_TREE, n_gpr * 8));
3360 TREE_SIDE_EFFECTS (t) = 1;
3361 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3363 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3364 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3365 TREE_SIDE_EFFECTS (t) = 1;
3366 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3368 /* Find the overflow area. */
3369 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3371 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3372 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3373 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3374 TREE_SIDE_EFFECTS (t) = 1;
3375 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3377 /* Find the register save area.
3378 Prologue of the function save it right above stack frame. */
3379 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3380 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3381 TREE_SIDE_EFFECTS (t) = 1;
3382 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3385 /* Implement va_arg. */
3388 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3390 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3391 tree f_gpr, f_fpr, f_ovf, f_sav;
3392 tree gpr, fpr, ovf, sav, t;
3394 tree lab_false, lab_over = NULL_TREE;
3399 enum machine_mode nat_mode;
3401 /* Only 64bit target needs something special. */
3403 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3405 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3406 f_fpr = TREE_CHAIN (f_gpr);
3407 f_ovf = TREE_CHAIN (f_fpr);
3408 f_sav = TREE_CHAIN (f_ovf);
3410 valist = build_va_arg_indirect_ref (valist);
3411 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3412 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3413 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3414 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3416 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3418 type = build_pointer_type (type);
3419 size = int_size_in_bytes (type);
3420 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3422 nat_mode = type_natural_mode (type);
3423 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3424 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3426 /* Pull the value out of the saved registers. */
3428 addr = create_tmp_var (ptr_type_node, "addr");
3429 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3433 int needed_intregs, needed_sseregs;
3435 tree int_addr, sse_addr;
3437 lab_false = create_artificial_label ();
3438 lab_over = create_artificial_label ();
3440 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3442 need_temp = (!REG_P (container)
3443 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3444 || TYPE_ALIGN (type) > 128));
3446 /* In case we are passing structure, verify that it is consecutive block
3447 on the register save area. If not we need to do moves. */
3448 if (!need_temp && !REG_P (container))
3450 /* Verify that all registers are strictly consecutive */
3451 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3455 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3457 rtx slot = XVECEXP (container, 0, i);
3458 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3459 || INTVAL (XEXP (slot, 1)) != i * 16)
3467 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3469 rtx slot = XVECEXP (container, 0, i);
3470 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3471 || INTVAL (XEXP (slot, 1)) != i * 8)
3483 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3484 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3485 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3486 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3489 /* First ensure that we fit completely in registers. */
3492 t = build_int_cst (TREE_TYPE (gpr),
3493 (REGPARM_MAX - needed_intregs + 1) * 8);
3494 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3495 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3496 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3497 gimplify_and_add (t, pre_p);
3501 t = build_int_cst (TREE_TYPE (fpr),
3502 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3504 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3505 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3506 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3507 gimplify_and_add (t, pre_p);
3510 /* Compute index to start of area used for integer regs. */
3513 /* int_addr = gpr + sav; */
3514 t = fold_convert (ptr_type_node, gpr);
3515 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3516 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3517 gimplify_and_add (t, pre_p);
3521 /* sse_addr = fpr + sav; */
3522 t = fold_convert (ptr_type_node, fpr);
3523 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3524 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3525 gimplify_and_add (t, pre_p);
3530 tree temp = create_tmp_var (type, "va_arg_tmp");
3533 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3534 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3535 gimplify_and_add (t, pre_p);
3537 for (i = 0; i < XVECLEN (container, 0); i++)
3539 rtx slot = XVECEXP (container, 0, i);
3540 rtx reg = XEXP (slot, 0);
3541 enum machine_mode mode = GET_MODE (reg);
3542 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3543 tree addr_type = build_pointer_type (piece_type);
3546 tree dest_addr, dest;
3548 if (SSE_REGNO_P (REGNO (reg)))
3550 src_addr = sse_addr;
3551 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3555 src_addr = int_addr;
3556 src_offset = REGNO (reg) * 8;
3558 src_addr = fold_convert (addr_type, src_addr);
3559 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3560 size_int (src_offset)));
3561 src = build_va_arg_indirect_ref (src_addr);
3563 dest_addr = fold_convert (addr_type, addr);
3564 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3565 size_int (INTVAL (XEXP (slot, 1)))));
3566 dest = build_va_arg_indirect_ref (dest_addr);
3568 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3569 gimplify_and_add (t, pre_p);
3575 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3576 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3577 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3578 gimplify_and_add (t, pre_p);
3582 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3583 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3584 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3585 gimplify_and_add (t, pre_p);
3588 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3589 gimplify_and_add (t, pre_p);
3591 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3592 append_to_statement_list (t, pre_p);
3595 /* ... otherwise out of the overflow area. */
3597 /* Care for on-stack alignment if needed. */
3598 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3602 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3603 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3604 build_int_cst (TREE_TYPE (ovf), align - 1));
3605 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3606 build_int_cst (TREE_TYPE (t), -align));
3608 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3610 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3611 gimplify_and_add (t2, pre_p);
3613 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3614 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3615 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3616 gimplify_and_add (t, pre_p);
3620 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3621 append_to_statement_list (t, pre_p);
3624 ptrtype = build_pointer_type (type);
3625 addr = fold_convert (ptrtype, addr);
3628 addr = build_va_arg_indirect_ref (addr);
3629 return build_va_arg_indirect_ref (addr);
3632 /* Return nonzero if OPNUM's MEM should be matched
3633 in movabs* patterns. */
3636 ix86_check_movabs (rtx insn, int opnum)
3640 set = PATTERN (insn);
3641 if (GET_CODE (set) == PARALLEL)
3642 set = XVECEXP (set, 0, 0);
3643 if (GET_CODE (set) != SET)
3645 mem = XEXP (set, opnum);
3646 while (GET_CODE (mem) == SUBREG)
3647 mem = SUBREG_REG (mem);
3648 if (GET_CODE (mem) != MEM)
3650 return (volatile_ok || !MEM_VOLATILE_P (mem));
3653 /* Initialize the table of extra 80387 mathematical constants. */
3656 init_ext_80387_constants (void)
3658 static const char * cst[5] =
3660 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3661 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3662 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3663 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3664 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3668 for (i = 0; i < 5; i++)
3670 real_from_string (&ext_80387_constants_table[i], cst[i]);
3671 /* Ensure each constant is rounded to XFmode precision. */
3672 real_convert (&ext_80387_constants_table[i],
3673 XFmode, &ext_80387_constants_table[i]);
3676 ext_80387_constants_init = 1;
3679 /* Return true if the constant is something that can be loaded with
3680 a special instruction. */
3683 standard_80387_constant_p (rtx x)
3685 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3688 if (x == CONST0_RTX (GET_MODE (x)))
3690 if (x == CONST1_RTX (GET_MODE (x)))
3693 /* For XFmode constants, try to find a special 80387 instruction when
3694 optimizing for size or on those CPUs that benefit from them. */
3695 if (GET_MODE (x) == XFmode
3696 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3701 if (! ext_80387_constants_init)
3702 init_ext_80387_constants ();
3704 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3705 for (i = 0; i < 5; i++)
3706 if (real_identical (&r, &ext_80387_constants_table[i]))
3713 /* Return the opcode of the special instruction to be used to load
3717 standard_80387_constant_opcode (rtx x)
3719 switch (standard_80387_constant_p (x))
3739 /* Return the CONST_DOUBLE representing the 80387 constant that is
3740 loaded by the specified special instruction. The argument IDX
3741 matches the return value from standard_80387_constant_p. */
3744 standard_80387_constant_rtx (int idx)
3748 if (! ext_80387_constants_init)
3749 init_ext_80387_constants ();
3765 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3769 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3772 standard_sse_constant_p (rtx x)
3774 if (x == const0_rtx)
3776 return (x == CONST0_RTX (GET_MODE (x)));
3779 /* Returns 1 if OP contains a symbol reference */
3782 symbolic_reference_mentioned_p (rtx op)
3787 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3790 fmt = GET_RTX_FORMAT (GET_CODE (op));
3791 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3797 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3798 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3802 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3809 /* Return 1 if it is appropriate to emit `ret' instructions in the
3810 body of a function. Do this only if the epilogue is simple, needing a
3811 couple of insns. Prior to reloading, we can't tell how many registers
3812 must be saved, so return 0 then. Return 0 if there is no frame
3813 marker to de-allocate. */
3816 ix86_can_use_return_insn_p (void)
3818 struct ix86_frame frame;
3820 if (! reload_completed || frame_pointer_needed)
3823 /* Don't allow more than 32 pop, since that's all we can do
3824 with one instruction. */
3825 if (current_function_pops_args
3826 && current_function_args_size >= 32768)
3829 ix86_compute_frame_layout (&frame);
3830 return frame.to_allocate == 0 && frame.nregs == 0;
3833 /* Value should be nonzero if functions must have frame pointers.
3834 Zero means the frame pointer need not be set up (and parms may
3835 be accessed via the stack pointer) in functions that seem suitable. */
3838 ix86_frame_pointer_required (void)
3840 /* If we accessed previous frames, then the generated code expects
3841 to be able to access the saved ebp value in our frame. */
3842 if (cfun->machine->accesses_prev_frame)
3845 /* Several x86 os'es need a frame pointer for other reasons,
3846 usually pertaining to setjmp. */
3847 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3850 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3851 the frame pointer by default. Turn it back on now if we've not
3852 got a leaf function. */
3853 if (TARGET_OMIT_LEAF_FRAME_POINTER
3854 && (!current_function_is_leaf))
3857 if (current_function_profile)
3863 /* Record that the current function accesses previous call frames. */
3866 ix86_setup_frame_addresses (void)
3868 cfun->machine->accesses_prev_frame = 1;
3871 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3872 # define USE_HIDDEN_LINKONCE 1
3874 # define USE_HIDDEN_LINKONCE 0
3877 static int pic_labels_used;
3879 /* Fills in the label name that should be used for a pc thunk for
3880 the given register. */
3883 get_pc_thunk_name (char name[32], unsigned int regno)
3885 if (USE_HIDDEN_LINKONCE)
3886 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3888 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3892 /* This function generates code for -fpic that loads %ebx with
3893 the return address of the caller and then returns. */
3896 ix86_file_end (void)
3901 for (regno = 0; regno < 8; ++regno)
3905 if (! ((pic_labels_used >> regno) & 1))
3908 get_pc_thunk_name (name, regno);
3910 if (USE_HIDDEN_LINKONCE)
3914 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3916 TREE_PUBLIC (decl) = 1;
3917 TREE_STATIC (decl) = 1;
3918 DECL_ONE_ONLY (decl) = 1;
3920 (*targetm.asm_out.unique_section) (decl, 0);
3921 named_section (decl, NULL, 0);
3923 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3924 fputs ("\t.hidden\t", asm_out_file);
3925 assemble_name (asm_out_file, name);
3926 fputc ('\n', asm_out_file);
3927 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3932 ASM_OUTPUT_LABEL (asm_out_file, name);
3935 xops[0] = gen_rtx_REG (SImode, regno);
3936 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3937 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3938 output_asm_insn ("ret", xops);
3941 if (NEED_INDICATE_EXEC_STACK)
3942 file_end_indicate_exec_stack ();
3945 /* Emit code for the SET_GOT patterns. */
3948 output_set_got (rtx dest)
3953 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3955 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3957 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3960 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3962 output_asm_insn ("call\t%a2", xops);
3965 /* Output the "canonical" label name ("Lxx$pb") here too. This
3966 is what will be referred to by the Mach-O PIC subsystem. */
3967 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3969 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3970 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3973 output_asm_insn ("pop{l}\t%0", xops);
3978 get_pc_thunk_name (name, REGNO (dest));
3979 pic_labels_used |= 1 << REGNO (dest);
3981 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3982 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3983 output_asm_insn ("call\t%X2", xops);
3986 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3987 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3988 else if (!TARGET_MACHO)
3989 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3994 /* Generate an "push" pattern for input ARG. */
3999 return gen_rtx_SET (VOIDmode,
4001 gen_rtx_PRE_DEC (Pmode,
4002 stack_pointer_rtx)),
4006 /* Return >= 0 if there is an unused call-clobbered register available
4007 for the entire function. */
4010 ix86_select_alt_pic_regnum (void)
4012 if (current_function_is_leaf && !current_function_profile)
4015 for (i = 2; i >= 0; --i)
4016 if (!regs_ever_live[i])
4020 return INVALID_REGNUM;
4023 /* Return 1 if we need to save REGNO. */
4025 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4027 if (pic_offset_table_rtx
4028 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4029 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4030 || current_function_profile
4031 || current_function_calls_eh_return
4032 || current_function_uses_const_pool))
4034 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4039 if (current_function_calls_eh_return && maybe_eh_return)
4044 unsigned test = EH_RETURN_DATA_REGNO (i);
4045 if (test == INVALID_REGNUM)
4052 return (regs_ever_live[regno]
4053 && !call_used_regs[regno]
4054 && !fixed_regs[regno]
4055 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4058 /* Return number of registers to be saved on the stack. */
4061 ix86_nsaved_regs (void)
4066 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4067 if (ix86_save_reg (regno, true))
4072 /* Return the offset between two registers, one to be eliminated, and the other
4073 its replacement, at the start of a routine. */
4076 ix86_initial_elimination_offset (int from, int to)
4078 struct ix86_frame frame;
4079 ix86_compute_frame_layout (&frame);
4081 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4082 return frame.hard_frame_pointer_offset;
4083 else if (from == FRAME_POINTER_REGNUM
4084 && to == HARD_FRAME_POINTER_REGNUM)
4085 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4088 if (to != STACK_POINTER_REGNUM)
4090 else if (from == ARG_POINTER_REGNUM)
4091 return frame.stack_pointer_offset;
4092 else if (from != FRAME_POINTER_REGNUM)
4095 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4099 /* Fill structure ix86_frame about frame of currently computed function. */
4102 ix86_compute_frame_layout (struct ix86_frame *frame)
4104 HOST_WIDE_INT total_size;
4105 unsigned int stack_alignment_needed;
4106 HOST_WIDE_INT offset;
4107 unsigned int preferred_alignment;
4108 HOST_WIDE_INT size = get_frame_size ();
4110 frame->nregs = ix86_nsaved_regs ();
4113 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4114 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4116 /* During reload iteration the amount of registers saved can change.
4117 Recompute the value as needed. Do not recompute when amount of registers
4118 didn't change as reload does mutiple calls to the function and does not
4119 expect the decision to change within single iteration. */
4121 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4123 int count = frame->nregs;
4125 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4126 /* The fast prologue uses move instead of push to save registers. This
4127 is significantly longer, but also executes faster as modern hardware
4128 can execute the moves in parallel, but can't do that for push/pop.
4130 Be careful about choosing what prologue to emit: When function takes
4131 many instructions to execute we may use slow version as well as in
4132 case function is known to be outside hot spot (this is known with
4133 feedback only). Weight the size of function by number of registers
4134 to save as it is cheap to use one or two push instructions but very
4135 slow to use many of them. */
4137 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4138 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4139 || (flag_branch_probabilities
4140 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4141 cfun->machine->use_fast_prologue_epilogue = false;
4143 cfun->machine->use_fast_prologue_epilogue
4144 = !expensive_function_p (count);
4146 if (TARGET_PROLOGUE_USING_MOVE
4147 && cfun->machine->use_fast_prologue_epilogue)
4148 frame->save_regs_using_mov = true;
4150 frame->save_regs_using_mov = false;
4153 /* Skip return address and saved base pointer. */
4154 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4156 frame->hard_frame_pointer_offset = offset;
4158 /* Do some sanity checking of stack_alignment_needed and
4159 preferred_alignment, since i386 port is the only using those features
4160 that may break easily. */
4162 if (size && !stack_alignment_needed)
4164 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4166 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4168 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4171 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4172 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4174 /* Register save area */
4175 offset += frame->nregs * UNITS_PER_WORD;
4178 if (ix86_save_varrargs_registers)
4180 offset += X86_64_VARARGS_SIZE;
4181 frame->va_arg_size = X86_64_VARARGS_SIZE;
4184 frame->va_arg_size = 0;
4186 /* Align start of frame for local function. */
4187 frame->padding1 = ((offset + stack_alignment_needed - 1)
4188 & -stack_alignment_needed) - offset;
4190 offset += frame->padding1;
4192 /* Frame pointer points here. */
4193 frame->frame_pointer_offset = offset;
4197 /* Add outgoing arguments area. Can be skipped if we eliminated
4198 all the function calls as dead code.
4199 Skipping is however impossible when function calls alloca. Alloca
4200 expander assumes that last current_function_outgoing_args_size
4201 of stack frame are unused. */
4202 if (ACCUMULATE_OUTGOING_ARGS
4203 && (!current_function_is_leaf || current_function_calls_alloca))
4205 offset += current_function_outgoing_args_size;
4206 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4209 frame->outgoing_arguments_size = 0;
4211 /* Align stack boundary. Only needed if we're calling another function
4213 if (!current_function_is_leaf || current_function_calls_alloca)
4214 frame->padding2 = ((offset + preferred_alignment - 1)
4215 & -preferred_alignment) - offset;
4217 frame->padding2 = 0;
4219 offset += frame->padding2;
4221 /* We've reached end of stack frame. */
4222 frame->stack_pointer_offset = offset;
4224 /* Size prologue needs to allocate. */
4225 frame->to_allocate =
4226 (size + frame->padding1 + frame->padding2
4227 + frame->outgoing_arguments_size + frame->va_arg_size);
4229 if ((!frame->to_allocate && frame->nregs <= 1)
4230 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4231 frame->save_regs_using_mov = false;
4233 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4234 && current_function_is_leaf)
4236 frame->red_zone_size = frame->to_allocate;
4237 if (frame->save_regs_using_mov)
4238 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4239 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4240 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4243 frame->red_zone_size = 0;
4244 frame->to_allocate -= frame->red_zone_size;
4245 frame->stack_pointer_offset -= frame->red_zone_size;
4247 fprintf (stderr, "nregs: %i\n", frame->nregs);
4248 fprintf (stderr, "size: %i\n", size);
4249 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4250 fprintf (stderr, "padding1: %i\n", frame->padding1);
4251 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4252 fprintf (stderr, "padding2: %i\n", frame->padding2);
4253 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4254 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4255 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4256 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4257 frame->hard_frame_pointer_offset);
4258 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4262 /* Emit code to save registers in the prologue. */
4265 ix86_emit_save_regs (void)
4270 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4271 if (ix86_save_reg (regno, true))
4273 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4274 RTX_FRAME_RELATED_P (insn) = 1;
4278 /* Emit code to save registers using MOV insns. First register
4279 is restored from POINTER + OFFSET. */
4281 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4286 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4287 if (ix86_save_reg (regno, true))
4289 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4291 gen_rtx_REG (Pmode, regno));
4292 RTX_FRAME_RELATED_P (insn) = 1;
4293 offset += UNITS_PER_WORD;
4297 /* Expand prologue or epilogue stack adjustment.
4298 The pattern exist to put a dependency on all ebp-based memory accesses.
4299 STYLE should be negative if instructions should be marked as frame related,
4300 zero if %r11 register is live and cannot be freely used and positive
4304 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4309 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4310 else if (x86_64_immediate_operand (offset, DImode))
4311 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4315 /* r11 is used by indirect sibcall return as well, set before the
4316 epilogue and used after the epilogue. ATM indirect sibcall
4317 shouldn't be used together with huge frame sizes in one
4318 function because of the frame_size check in sibcall.c. */
4321 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4322 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4324 RTX_FRAME_RELATED_P (insn) = 1;
4325 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4329 RTX_FRAME_RELATED_P (insn) = 1;
4332 /* Expand the prologue into a bunch of separate insns. */
4335 ix86_expand_prologue (void)
4339 struct ix86_frame frame;
4340 HOST_WIDE_INT allocate;
4342 ix86_compute_frame_layout (&frame);
4344 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4345 slower on all targets. Also sdb doesn't like it. */
4347 if (frame_pointer_needed)
4349 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4350 RTX_FRAME_RELATED_P (insn) = 1;
4352 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4353 RTX_FRAME_RELATED_P (insn) = 1;
4356 allocate = frame.to_allocate;
4358 if (!frame.save_regs_using_mov)
4359 ix86_emit_save_regs ();
4361 allocate += frame.nregs * UNITS_PER_WORD;
4363 /* When using red zone we may start register saving before allocating
4364 the stack frame saving one cycle of the prologue. */
4365 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4366 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4367 : stack_pointer_rtx,
4368 -frame.nregs * UNITS_PER_WORD);
4372 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4373 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4374 GEN_INT (-allocate), -1);
4377 /* Only valid for Win32. */
4378 rtx eax = gen_rtx_REG (SImode, 0);
4379 bool eax_live = ix86_eax_live_at_start_p ();
4387 emit_insn (gen_push (eax));
4391 emit_move_insn (eax, GEN_INT (allocate));
4393 insn = emit_insn (gen_allocate_stack_worker (eax));
4394 RTX_FRAME_RELATED_P (insn) = 1;
4395 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4396 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4397 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4398 t, REG_NOTES (insn));
4402 if (frame_pointer_needed)
4403 t = plus_constant (hard_frame_pointer_rtx,
4406 - frame.nregs * UNITS_PER_WORD);
4408 t = plus_constant (stack_pointer_rtx, allocate);
4409 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4413 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4415 if (!frame_pointer_needed || !frame.to_allocate)
4416 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4418 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4419 -frame.nregs * UNITS_PER_WORD);
4422 pic_reg_used = false;
4423 if (pic_offset_table_rtx
4424 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4425 || current_function_profile))
4427 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4429 if (alt_pic_reg_used != INVALID_REGNUM)
4430 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4432 pic_reg_used = true;
4437 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4439 /* Even with accurate pre-reload life analysis, we can wind up
4440 deleting all references to the pic register after reload.
4441 Consider if cross-jumping unifies two sides of a branch
4442 controlled by a comparison vs the only read from a global.
4443 In which case, allow the set_got to be deleted, though we're
4444 too late to do anything about the ebx save in the prologue. */
4445 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4448 /* Prevent function calls from be scheduled before the call to mcount.
4449 In the pic_reg_used case, make sure that the got load isn't deleted. */
4450 if (current_function_profile)
4451 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4454 /* Emit code to restore saved registers using MOV insns. First register
4455 is restored from POINTER + OFFSET. */
4457 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4458 int maybe_eh_return)
4461 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4463 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4464 if (ix86_save_reg (regno, maybe_eh_return))
4466 /* Ensure that adjust_address won't be forced to produce pointer
4467 out of range allowed by x86-64 instruction set. */
4468 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4472 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4473 emit_move_insn (r11, GEN_INT (offset));
4474 emit_insn (gen_adddi3 (r11, r11, pointer));
4475 base_address = gen_rtx_MEM (Pmode, r11);
4478 emit_move_insn (gen_rtx_REG (Pmode, regno),
4479 adjust_address (base_address, Pmode, offset));
4480 offset += UNITS_PER_WORD;
4484 /* Restore function stack, frame, and registers. */
4487 ix86_expand_epilogue (int style)
4490 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4491 struct ix86_frame frame;
4492 HOST_WIDE_INT offset;
4494 ix86_compute_frame_layout (&frame);
4496 /* Calculate start of saved registers relative to ebp. Special care
4497 must be taken for the normal return case of a function using
4498 eh_return: the eax and edx registers are marked as saved, but not
4499 restored along this path. */
4500 offset = frame.nregs;
4501 if (current_function_calls_eh_return && style != 2)
4503 offset *= -UNITS_PER_WORD;
4505 /* If we're only restoring one register and sp is not valid then
4506 using a move instruction to restore the register since it's
4507 less work than reloading sp and popping the register.
4509 The default code result in stack adjustment using add/lea instruction,
4510 while this code results in LEAVE instruction (or discrete equivalent),
4511 so it is profitable in some other cases as well. Especially when there
4512 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4513 and there is exactly one register to pop. This heuristic may need some
4514 tuning in future. */
4515 if ((!sp_valid && frame.nregs <= 1)
4516 || (TARGET_EPILOGUE_USING_MOVE
4517 && cfun->machine->use_fast_prologue_epilogue
4518 && (frame.nregs > 1 || frame.to_allocate))
4519 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4520 || (frame_pointer_needed && TARGET_USE_LEAVE
4521 && cfun->machine->use_fast_prologue_epilogue
4522 && frame.nregs == 1)
4523 || current_function_calls_eh_return)
4525 /* Restore registers. We can use ebp or esp to address the memory
4526 locations. If both are available, default to ebp, since offsets
4527 are known to be small. Only exception is esp pointing directly to the
4528 end of block of saved registers, where we may simplify addressing
4531 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4532 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4533 frame.to_allocate, style == 2);
4535 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4536 offset, style == 2);
4538 /* eh_return epilogues need %ecx added to the stack pointer. */
4541 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4543 if (frame_pointer_needed)
4545 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4546 tmp = plus_constant (tmp, UNITS_PER_WORD);
4547 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4549 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4550 emit_move_insn (hard_frame_pointer_rtx, tmp);
4552 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4557 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4558 tmp = plus_constant (tmp, (frame.to_allocate
4559 + frame.nregs * UNITS_PER_WORD));
4560 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4563 else if (!frame_pointer_needed)
4564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4565 GEN_INT (frame.to_allocate
4566 + frame.nregs * UNITS_PER_WORD),
4568 /* If not an i386, mov & pop is faster than "leave". */
4569 else if (TARGET_USE_LEAVE || optimize_size
4570 || !cfun->machine->use_fast_prologue_epilogue)
4571 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4574 pro_epilogue_adjust_stack (stack_pointer_rtx,
4575 hard_frame_pointer_rtx,
4578 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4580 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4585 /* First step is to deallocate the stack frame so that we can
4586 pop the registers. */
4589 if (!frame_pointer_needed)
4591 pro_epilogue_adjust_stack (stack_pointer_rtx,
4592 hard_frame_pointer_rtx,
4593 GEN_INT (offset), style);
4595 else if (frame.to_allocate)
4596 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4597 GEN_INT (frame.to_allocate), style);
4599 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4600 if (ix86_save_reg (regno, false))
4603 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4605 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4607 if (frame_pointer_needed)
4609 /* Leave results in shorter dependency chains on CPUs that are
4610 able to grok it fast. */
4611 if (TARGET_USE_LEAVE)
4612 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4613 else if (TARGET_64BIT)
4614 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4616 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4620 /* Sibcall epilogues don't want a return instruction. */
4624 if (current_function_pops_args && current_function_args_size)
4626 rtx popc = GEN_INT (current_function_pops_args);
4628 /* i386 can only pop 64K bytes. If asked to pop more, pop
4629 return address, do explicit add, and jump indirectly to the
4632 if (current_function_pops_args >= 65536)
4634 rtx ecx = gen_rtx_REG (SImode, 2);
4636 /* There is no "pascal" calling convention in 64bit ABI. */
4640 emit_insn (gen_popsi1 (ecx));
4641 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4642 emit_jump_insn (gen_return_indirect_internal (ecx));
4645 emit_jump_insn (gen_return_pop_internal (popc));
4648 emit_jump_insn (gen_return_internal ());
4651 /* Reset from the function's potential modifications. */
4654 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4655 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4657 if (pic_offset_table_rtx)
4658 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4661 /* Extract the parts of an RTL expression that is a valid memory address
4662 for an instruction. Return 0 if the structure of the address is
4663 grossly off. Return -1 if the address contains ASHIFT, so it is not
4664 strictly valid, but still used for computing length of lea instruction. */
4667 ix86_decompose_address (rtx addr, struct ix86_address *out)
4669 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4670 rtx base_reg, index_reg;
4671 HOST_WIDE_INT scale = 1;
4672 rtx scale_rtx = NULL_RTX;
4674 enum ix86_address_seg seg = SEG_DEFAULT;
4676 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4678 else if (GET_CODE (addr) == PLUS)
4688 addends[n++] = XEXP (op, 1);
4691 while (GET_CODE (op) == PLUS);
4696 for (i = n; i >= 0; --i)
4699 switch (GET_CODE (op))
4704 index = XEXP (op, 0);
4705 scale_rtx = XEXP (op, 1);
4709 if (XINT (op, 1) == UNSPEC_TP
4710 && TARGET_TLS_DIRECT_SEG_REFS
4711 && seg == SEG_DEFAULT)
4712 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4741 else if (GET_CODE (addr) == MULT)
4743 index = XEXP (addr, 0); /* index*scale */
4744 scale_rtx = XEXP (addr, 1);
4746 else if (GET_CODE (addr) == ASHIFT)
4750 /* We're called for lea too, which implements ashift on occasion. */
4751 index = XEXP (addr, 0);
4752 tmp = XEXP (addr, 1);
4753 if (GET_CODE (tmp) != CONST_INT)
4755 scale = INTVAL (tmp);
4756 if ((unsigned HOST_WIDE_INT) scale > 3)
4762 disp = addr; /* displacement */
4764 /* Extract the integral value of scale. */
4767 if (GET_CODE (scale_rtx) != CONST_INT)
4769 scale = INTVAL (scale_rtx);
4772 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4773 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4775 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4776 if (base_reg && index_reg && scale == 1
4777 && (index_reg == arg_pointer_rtx
4778 || index_reg == frame_pointer_rtx
4779 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4782 tmp = base, base = index, index = tmp;
4783 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4786 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4787 if ((base_reg == hard_frame_pointer_rtx
4788 || base_reg == frame_pointer_rtx
4789 || base_reg == arg_pointer_rtx) && !disp)
4792 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4793 Avoid this by transforming to [%esi+0]. */
4794 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4795 && base_reg && !index_reg && !disp
4797 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4800 /* Special case: encode reg+reg instead of reg*2. */
4801 if (!base && index && scale && scale == 2)
4802 base = index, base_reg = index_reg, scale = 1;
4804 /* Special case: scaling cannot be encoded without base or displacement. */
4805 if (!base && !disp && index && scale != 1)
4817 /* Return cost of the memory address x.
4818 For i386, it is better to use a complex address than let gcc copy
4819 the address into a reg and make a new pseudo. But not if the address
4820 requires to two regs - that would mean more pseudos with longer
4823 ix86_address_cost (rtx x)
4825 struct ix86_address parts;
4828 if (!ix86_decompose_address (x, &parts))
4831 if (parts.base && GET_CODE (parts.base) == SUBREG)
4832 parts.base = SUBREG_REG (parts.base);
4833 if (parts.index && GET_CODE (parts.index) == SUBREG)
4834 parts.index = SUBREG_REG (parts.index);
4836 /* More complex memory references are better. */
4837 if (parts.disp && parts.disp != const0_rtx)
4839 if (parts.seg != SEG_DEFAULT)
4842 /* Attempt to minimize number of registers in the address. */
4844 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4846 && (!REG_P (parts.index)
4847 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4851 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4853 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4854 && parts.base != parts.index)
4857 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4858 since it's predecode logic can't detect the length of instructions
4859 and it degenerates to vector decoded. Increase cost of such
4860 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4861 to split such addresses or even refuse such addresses at all.
4863 Following addressing modes are affected:
4868 The first and last case may be avoidable by explicitly coding the zero in
4869 memory address, but I don't have AMD-K6 machine handy to check this
4873 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4874 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4875 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4881 /* If X is a machine specific address (i.e. a symbol or label being
4882 referenced as a displacement from the GOT implemented using an
4883 UNSPEC), then return the base term. Otherwise return X. */
4886 ix86_find_base_term (rtx x)
4892 if (GET_CODE (x) != CONST)
4895 if (GET_CODE (term) == PLUS
4896 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4897 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4898 term = XEXP (term, 0);
4899 if (GET_CODE (term) != UNSPEC
4900 || XINT (term, 1) != UNSPEC_GOTPCREL)
4903 term = XVECEXP (term, 0, 0);
4905 if (GET_CODE (term) != SYMBOL_REF
4906 && GET_CODE (term) != LABEL_REF)
4912 term = ix86_delegitimize_address (x);
4914 if (GET_CODE (term) != SYMBOL_REF
4915 && GET_CODE (term) != LABEL_REF)
4921 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4922 this is used for to form addresses to local data when -fPIC is in
4926 darwin_local_data_pic (rtx disp)
4928 if (GET_CODE (disp) == MINUS)
4930 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4931 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4932 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4934 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4935 if (! strcmp (sym_name, "<pic base>"))
4943 /* Determine if a given RTX is a valid constant. We already know this
4944 satisfies CONSTANT_P. */
4947 legitimate_constant_p (rtx x)
4949 switch (GET_CODE (x))
4954 if (GET_CODE (x) == PLUS)
4956 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4961 if (TARGET_MACHO && darwin_local_data_pic (x))
4964 /* Only some unspecs are valid as "constants". */
4965 if (GET_CODE (x) == UNSPEC)
4966 switch (XINT (x, 1))
4970 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4972 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4977 /* We must have drilled down to a symbol. */
4978 if (!symbolic_operand (x, Pmode))
4983 /* TLS symbols are never valid. */
4984 if (tls_symbolic_operand (x, Pmode))
4992 /* Otherwise we handle everything else in the move patterns. */
4996 /* Determine if it's legal to put X into the constant pool. This
4997 is not possible for the address of thread-local symbols, which
4998 is checked above. */
5001 ix86_cannot_force_const_mem (rtx x)
5003 return !legitimate_constant_p (x);
5006 /* Determine if a given RTX is a valid constant address. */
5009 constant_address_p (rtx x)
5011 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5014 /* Nonzero if the constant value X is a legitimate general operand
5015 when generating PIC code. It is given that flag_pic is on and
5016 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5019 legitimate_pic_operand_p (rtx x)
5023 switch (GET_CODE (x))
5026 inner = XEXP (x, 0);
5028 /* Only some unspecs are valid as "constants". */
5029 if (GET_CODE (inner) == UNSPEC)
5030 switch (XINT (inner, 1))
5033 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5041 return legitimate_pic_address_disp_p (x);
5048 /* Determine if a given CONST RTX is a valid memory displacement
5052 legitimate_pic_address_disp_p (rtx disp)
5056 /* In 64bit mode we can allow direct addresses of symbols and labels
5057 when they are not dynamic symbols. */
5060 /* TLS references should always be enclosed in UNSPEC. */
5061 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5063 if (GET_CODE (disp) == SYMBOL_REF
5064 && ix86_cmodel == CM_SMALL_PIC
5065 && SYMBOL_REF_LOCAL_P (disp))
5067 if (GET_CODE (disp) == LABEL_REF)
5069 if (GET_CODE (disp) == CONST
5070 && GET_CODE (XEXP (disp, 0)) == PLUS)
5072 rtx op0 = XEXP (XEXP (disp, 0), 0);
5073 rtx op1 = XEXP (XEXP (disp, 0), 1);
5075 /* TLS references should always be enclosed in UNSPEC. */
5076 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5078 if (((GET_CODE (op0) == SYMBOL_REF
5079 && ix86_cmodel == CM_SMALL_PIC
5080 && SYMBOL_REF_LOCAL_P (op0))
5081 || GET_CODE (op0) == LABEL_REF)
5082 && GET_CODE (op1) == CONST_INT
5083 && INTVAL (op1) < 16*1024*1024
5084 && INTVAL (op1) >= -16*1024*1024)
5088 if (GET_CODE (disp) != CONST)
5090 disp = XEXP (disp, 0);
5094 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5095 of GOT tables. We should not need these anyway. */
5096 if (GET_CODE (disp) != UNSPEC
5097 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5100 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5101 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5107 if (GET_CODE (disp) == PLUS)
5109 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5111 disp = XEXP (disp, 0);
5115 if (TARGET_MACHO && darwin_local_data_pic (disp))
5118 if (GET_CODE (disp) != UNSPEC)
5121 switch (XINT (disp, 1))
5126 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5128 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5129 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5130 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5132 case UNSPEC_GOTTPOFF:
5133 case UNSPEC_GOTNTPOFF:
5134 case UNSPEC_INDNTPOFF:
5137 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5139 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5141 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5147 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5148 memory address for an instruction. The MODE argument is the machine mode
5149 for the MEM expression that wants to use this address.
5151 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5152 convert common non-canonical forms to canonical form so that they will
5156 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5158 struct ix86_address parts;
5159 rtx base, index, disp;
5160 HOST_WIDE_INT scale;
5161 const char *reason = NULL;
5162 rtx reason_rtx = NULL_RTX;
5164 if (TARGET_DEBUG_ADDR)
5167 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5168 GET_MODE_NAME (mode), strict);
5172 if (ix86_decompose_address (addr, &parts) <= 0)
5174 reason = "decomposition failed";
5179 index = parts.index;
5181 scale = parts.scale;
5183 /* Validate base register.
5185 Don't allow SUBREG's that span more than a word here. It can lead to spill
5186 failures when the base is one word out of a two word structure, which is
5187 represented internally as a DImode int. */
5196 else if (GET_CODE (base) == SUBREG
5197 && REG_P (SUBREG_REG (base))
5198 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5200 reg = SUBREG_REG (base);
5203 reason = "base is not a register";
5207 if (GET_MODE (base) != Pmode)
5209 reason = "base is not in Pmode";
5213 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5214 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5216 reason = "base is not valid";
5221 /* Validate index register.
5223 Don't allow SUBREG's that span more than a word here -- same as above. */
5232 else if (GET_CODE (index) == SUBREG
5233 && REG_P (SUBREG_REG (index))
5234 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5236 reg = SUBREG_REG (index);
5239 reason = "index is not a register";
5243 if (GET_MODE (index) != Pmode)
5245 reason = "index is not in Pmode";
5249 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5250 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5252 reason = "index is not valid";
5257 /* Validate scale factor. */
5260 reason_rtx = GEN_INT (scale);
5263 reason = "scale without index";
5267 if (scale != 2 && scale != 4 && scale != 8)
5269 reason = "scale is not a valid multiplier";
5274 /* Validate displacement. */
5279 if (GET_CODE (disp) == CONST
5280 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5281 switch (XINT (XEXP (disp, 0), 1))
5285 case UNSPEC_GOTPCREL:
5288 goto is_legitimate_pic;
5290 case UNSPEC_GOTTPOFF:
5291 case UNSPEC_GOTNTPOFF:
5292 case UNSPEC_INDNTPOFF:
5298 reason = "invalid address unspec";
5302 else if (flag_pic && (SYMBOLIC_CONST (disp)
5304 && !machopic_operand_p (disp)
5309 if (TARGET_64BIT && (index || base))
5311 /* foo@dtpoff(%rX) is ok. */
5312 if (GET_CODE (disp) != CONST
5313 || GET_CODE (XEXP (disp, 0)) != PLUS
5314 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5315 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5316 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5317 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5319 reason = "non-constant pic memory reference";
5323 else if (! legitimate_pic_address_disp_p (disp))
5325 reason = "displacement is an invalid pic construct";
5329 /* This code used to verify that a symbolic pic displacement
5330 includes the pic_offset_table_rtx register.
5332 While this is good idea, unfortunately these constructs may
5333 be created by "adds using lea" optimization for incorrect
5342 This code is nonsensical, but results in addressing
5343 GOT table with pic_offset_table_rtx base. We can't
5344 just refuse it easily, since it gets matched by
5345 "addsi3" pattern, that later gets split to lea in the
5346 case output register differs from input. While this
5347 can be handled by separate addsi pattern for this case
5348 that never results in lea, this seems to be easier and
5349 correct fix for crash to disable this test. */
5351 else if (GET_CODE (disp) != LABEL_REF
5352 && GET_CODE (disp) != CONST_INT
5353 && (GET_CODE (disp) != CONST
5354 || !legitimate_constant_p (disp))
5355 && (GET_CODE (disp) != SYMBOL_REF
5356 || !legitimate_constant_p (disp)))
5358 reason = "displacement is not constant";
5361 else if (TARGET_64BIT
5362 && !x86_64_immediate_operand (disp, VOIDmode))
5364 reason = "displacement is out of range";
5369 /* Everything looks valid. */
5370 if (TARGET_DEBUG_ADDR)
5371 fprintf (stderr, "Success.\n");
5375 if (TARGET_DEBUG_ADDR)
5377 fprintf (stderr, "Error: %s\n", reason);
5378 debug_rtx (reason_rtx);
5383 /* Return an unique alias set for the GOT. */
5385 static HOST_WIDE_INT
5386 ix86_GOT_alias_set (void)
5388 static HOST_WIDE_INT set = -1;
5390 set = new_alias_set ();
5394 /* Return a legitimate reference for ORIG (an address) using the
5395 register REG. If REG is 0, a new pseudo is generated.
5397 There are two types of references that must be handled:
5399 1. Global data references must load the address from the GOT, via
5400 the PIC reg. An insn is emitted to do this load, and the reg is
5403 2. Static data references, constant pool addresses, and code labels
5404 compute the address as an offset from the GOT, whose base is in
5405 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5406 differentiate them from global data objects. The returned
5407 address is the PIC reg + an unspec constant.
5409 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5410 reg also appears in the address. */
5413 legitimize_pic_address (rtx orig, rtx reg)
5421 reg = gen_reg_rtx (Pmode);
5422 /* Use the generic Mach-O PIC machinery. */
5423 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5426 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5428 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5430 /* This symbol may be referenced via a displacement from the PIC
5431 base address (@GOTOFF). */
5433 if (reload_in_progress)
5434 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5435 if (GET_CODE (addr) == CONST)
5436 addr = XEXP (addr, 0);
5437 if (GET_CODE (addr) == PLUS)
5439 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5440 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5443 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5444 new = gen_rtx_CONST (Pmode, new);
5445 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5449 emit_move_insn (reg, new);
5453 else if (GET_CODE (addr) == SYMBOL_REF)
5457 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5458 new = gen_rtx_CONST (Pmode, new);
5459 new = gen_const_mem (Pmode, new);
5460 set_mem_alias_set (new, ix86_GOT_alias_set ());
5463 reg = gen_reg_rtx (Pmode);
5464 /* Use directly gen_movsi, otherwise the address is loaded
5465 into register for CSE. We don't want to CSE this addresses,
5466 instead we CSE addresses from the GOT table, so skip this. */
5467 emit_insn (gen_movsi (reg, new));
5472 /* This symbol must be referenced via a load from the
5473 Global Offset Table (@GOT). */
5475 if (reload_in_progress)
5476 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5477 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5478 new = gen_rtx_CONST (Pmode, new);
5479 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5480 new = gen_const_mem (Pmode, new);
5481 set_mem_alias_set (new, ix86_GOT_alias_set ());
5484 reg = gen_reg_rtx (Pmode);
5485 emit_move_insn (reg, new);
5491 if (GET_CODE (addr) == CONST)
5493 addr = XEXP (addr, 0);
5495 /* We must match stuff we generate before. Assume the only
5496 unspecs that can get here are ours. Not that we could do
5497 anything with them anyway.... */
5498 if (GET_CODE (addr) == UNSPEC
5499 || (GET_CODE (addr) == PLUS
5500 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5502 if (GET_CODE (addr) != PLUS)
5505 if (GET_CODE (addr) == PLUS)
5507 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5509 /* Check first to see if this is a constant offset from a @GOTOFF
5510 symbol reference. */
5511 if (local_symbolic_operand (op0, Pmode)
5512 && GET_CODE (op1) == CONST_INT)
5516 if (reload_in_progress)
5517 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5518 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5520 new = gen_rtx_PLUS (Pmode, new, op1);
5521 new = gen_rtx_CONST (Pmode, new);
5522 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5526 emit_move_insn (reg, new);
5532 if (INTVAL (op1) < -16*1024*1024
5533 || INTVAL (op1) >= 16*1024*1024)
5534 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5539 base = legitimize_pic_address (XEXP (addr, 0), reg);
5540 new = legitimize_pic_address (XEXP (addr, 1),
5541 base == reg ? NULL_RTX : reg);
5543 if (GET_CODE (new) == CONST_INT)
5544 new = plus_constant (base, INTVAL (new));
5547 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5549 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5550 new = XEXP (new, 1);
5552 new = gen_rtx_PLUS (Pmode, base, new);
5560 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5563 get_thread_pointer (int to_reg)
5567 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5571 reg = gen_reg_rtx (Pmode);
5572 insn = gen_rtx_SET (VOIDmode, reg, tp);
5573 insn = emit_insn (insn);
5578 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5579 false if we expect this to be used for a memory address and true if
5580 we expect to load the address into a register. */
5583 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5585 rtx dest, base, off, pic;
5590 case TLS_MODEL_GLOBAL_DYNAMIC:
5591 dest = gen_reg_rtx (Pmode);
5594 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5597 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5598 insns = get_insns ();
5601 emit_libcall_block (insns, dest, rax, x);
5604 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5607 case TLS_MODEL_LOCAL_DYNAMIC:
5608 base = gen_reg_rtx (Pmode);
5611 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5614 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5615 insns = get_insns ();
5618 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5619 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5620 emit_libcall_block (insns, base, rax, note);
5623 emit_insn (gen_tls_local_dynamic_base_32 (base));
5625 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5626 off = gen_rtx_CONST (Pmode, off);
5628 return gen_rtx_PLUS (Pmode, base, off);
5630 case TLS_MODEL_INITIAL_EXEC:
5634 type = UNSPEC_GOTNTPOFF;
5638 if (reload_in_progress)
5639 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5640 pic = pic_offset_table_rtx;
5641 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5643 else if (!TARGET_GNU_TLS)
5645 pic = gen_reg_rtx (Pmode);
5646 emit_insn (gen_set_got (pic));
5647 type = UNSPEC_GOTTPOFF;
5652 type = UNSPEC_INDNTPOFF;
5655 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5656 off = gen_rtx_CONST (Pmode, off);
5658 off = gen_rtx_PLUS (Pmode, pic, off);
5659 off = gen_const_mem (Pmode, off);
5660 set_mem_alias_set (off, ix86_GOT_alias_set ());
5662 if (TARGET_64BIT || TARGET_GNU_TLS)
5664 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5665 off = force_reg (Pmode, off);
5666 return gen_rtx_PLUS (Pmode, base, off);
5670 base = get_thread_pointer (true);
5671 dest = gen_reg_rtx (Pmode);
5672 emit_insn (gen_subsi3 (dest, base, off));
5676 case TLS_MODEL_LOCAL_EXEC:
5677 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5678 (TARGET_64BIT || TARGET_GNU_TLS)
5679 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5680 off = gen_rtx_CONST (Pmode, off);
5682 if (TARGET_64BIT || TARGET_GNU_TLS)
5684 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5685 return gen_rtx_PLUS (Pmode, base, off);
5689 base = get_thread_pointer (true);
5690 dest = gen_reg_rtx (Pmode);
5691 emit_insn (gen_subsi3 (dest, base, off));
5702 /* Try machine-dependent ways of modifying an illegitimate address
5703 to be legitimate. If we find one, return the new, valid address.
5704 This macro is used in only one place: `memory_address' in explow.c.
5706 OLDX is the address as it was before break_out_memory_refs was called.
5707 In some cases it is useful to look at this to decide what needs to be done.
5709 MODE and WIN are passed so that this macro can use
5710 GO_IF_LEGITIMATE_ADDRESS.
5712 It is always safe for this macro to do nothing. It exists to recognize
5713 opportunities to optimize the output.
5715 For the 80386, we handle X+REG by loading X into a register R and
5716 using R+REG. R will go in a general reg and indexing will be used.
5717 However, if REG is a broken-out memory address or multiplication,
5718 nothing needs to be done because REG can certainly go in a general reg.
5720 When -fpic is used, special handling is needed for symbolic references.
5721 See comments by legitimize_pic_address in i386.c for details. */
5724 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5729 if (TARGET_DEBUG_ADDR)
5731 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5732 GET_MODE_NAME (mode));
5736 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5738 return legitimize_tls_address (x, log, false);
5739 if (GET_CODE (x) == CONST
5740 && GET_CODE (XEXP (x, 0)) == PLUS
5741 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5742 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5744 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5745 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5748 if (flag_pic && SYMBOLIC_CONST (x))
5749 return legitimize_pic_address (x, 0);
5751 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5752 if (GET_CODE (x) == ASHIFT
5753 && GET_CODE (XEXP (x, 1)) == CONST_INT
5754 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5757 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5758 GEN_INT (1 << log));
5761 if (GET_CODE (x) == PLUS)
5763 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5765 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5766 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5767 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5770 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5771 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5772 GEN_INT (1 << log));
5775 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5776 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5777 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5780 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5781 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5782 GEN_INT (1 << log));
5785 /* Put multiply first if it isn't already. */
5786 if (GET_CODE (XEXP (x, 1)) == MULT)
5788 rtx tmp = XEXP (x, 0);
5789 XEXP (x, 0) = XEXP (x, 1);
5794 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5795 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5796 created by virtual register instantiation, register elimination, and
5797 similar optimizations. */
5798 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5801 x = gen_rtx_PLUS (Pmode,
5802 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5803 XEXP (XEXP (x, 1), 0)),
5804 XEXP (XEXP (x, 1), 1));
5808 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5809 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5810 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5811 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5812 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5813 && CONSTANT_P (XEXP (x, 1)))
5816 rtx other = NULL_RTX;
5818 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5820 constant = XEXP (x, 1);
5821 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5823 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5825 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5826 other = XEXP (x, 1);
5834 x = gen_rtx_PLUS (Pmode,
5835 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5836 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5837 plus_constant (other, INTVAL (constant)));
5841 if (changed && legitimate_address_p (mode, x, FALSE))
5844 if (GET_CODE (XEXP (x, 0)) == MULT)
5847 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5850 if (GET_CODE (XEXP (x, 1)) == MULT)
5853 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5857 && GET_CODE (XEXP (x, 1)) == REG
5858 && GET_CODE (XEXP (x, 0)) == REG)
5861 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5864 x = legitimize_pic_address (x, 0);
5867 if (changed && legitimate_address_p (mode, x, FALSE))
5870 if (GET_CODE (XEXP (x, 0)) == REG)
5872 rtx temp = gen_reg_rtx (Pmode);
5873 rtx val = force_operand (XEXP (x, 1), temp);
5875 emit_move_insn (temp, val);
5881 else if (GET_CODE (XEXP (x, 1)) == REG)
5883 rtx temp = gen_reg_rtx (Pmode);
5884 rtx val = force_operand (XEXP (x, 0), temp);
5886 emit_move_insn (temp, val);
5896 /* Print an integer constant expression in assembler syntax. Addition
5897 and subtraction are the only arithmetic that may appear in these
5898 expressions. FILE is the stdio stream to write to, X is the rtx, and
5899 CODE is the operand print code from the output string. */
5902 output_pic_addr_const (FILE *file, rtx x, int code)
5906 switch (GET_CODE (x))
5916 /* Mark the decl as referenced so that cgraph will output the function. */
5917 if (SYMBOL_REF_DECL (x))
5918 mark_decl_referenced (SYMBOL_REF_DECL (x));
5920 assemble_name (file, XSTR (x, 0));
5921 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5922 fputs ("@PLT", file);
5929 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5930 assemble_name (asm_out_file, buf);
5934 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5938 /* This used to output parentheses around the expression,
5939 but that does not work on the 386 (either ATT or BSD assembler). */
5940 output_pic_addr_const (file, XEXP (x, 0), code);
5944 if (GET_MODE (x) == VOIDmode)
5946 /* We can use %d if the number is <32 bits and positive. */
5947 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5948 fprintf (file, "0x%lx%08lx",
5949 (unsigned long) CONST_DOUBLE_HIGH (x),
5950 (unsigned long) CONST_DOUBLE_LOW (x));
5952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5955 /* We can't handle floating point constants;
5956 PRINT_OPERAND must handle them. */
5957 output_operand_lossage ("floating constant misused");
5961 /* Some assemblers need integer constants to appear first. */
5962 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5964 output_pic_addr_const (file, XEXP (x, 0), code);
5966 output_pic_addr_const (file, XEXP (x, 1), code);
5968 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5970 output_pic_addr_const (file, XEXP (x, 1), code);
5972 output_pic_addr_const (file, XEXP (x, 0), code);
5980 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5981 output_pic_addr_const (file, XEXP (x, 0), code);
5983 output_pic_addr_const (file, XEXP (x, 1), code);
5985 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5989 if (XVECLEN (x, 0) != 1)
5991 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5992 switch (XINT (x, 1))
5995 fputs ("@GOT", file);
5998 fputs ("@GOTOFF", file);
6000 case UNSPEC_GOTPCREL:
6001 fputs ("@GOTPCREL(%rip)", file);
6003 case UNSPEC_GOTTPOFF:
6004 /* FIXME: This might be @TPOFF in Sun ld too. */
6005 fputs ("@GOTTPOFF", file);
6008 fputs ("@TPOFF", file);
6012 fputs ("@TPOFF", file);
6014 fputs ("@NTPOFF", file);
6017 fputs ("@DTPOFF", file);
6019 case UNSPEC_GOTNTPOFF:
6021 fputs ("@GOTTPOFF(%rip)", file);
6023 fputs ("@GOTNTPOFF", file);
6025 case UNSPEC_INDNTPOFF:
6026 fputs ("@INDNTPOFF", file);
6029 output_operand_lossage ("invalid UNSPEC as operand");
6035 output_operand_lossage ("invalid expression as operand");
6039 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6040 We need to emit DTP-relative relocations. */
6043 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6045 fputs (ASM_LONG, file);
6046 output_addr_const (file, x);
6047 fputs ("@DTPOFF", file);
6053 fputs (", 0", file);
6060 /* In the name of slightly smaller debug output, and to cater to
6061 general assembler losage, recognize PIC+GOTOFF and turn it back
6062 into a direct symbol reference. */
6065 ix86_delegitimize_address (rtx orig_x)
6069 if (GET_CODE (x) == MEM)
6074 if (GET_CODE (x) != CONST
6075 || GET_CODE (XEXP (x, 0)) != UNSPEC
6076 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6077 || GET_CODE (orig_x) != MEM)
6079 return XVECEXP (XEXP (x, 0), 0, 0);
6082 if (GET_CODE (x) != PLUS
6083 || GET_CODE (XEXP (x, 1)) != CONST)
6086 if (GET_CODE (XEXP (x, 0)) == REG
6087 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6088 /* %ebx + GOT/GOTOFF */
6090 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6092 /* %ebx + %reg * scale + GOT/GOTOFF */
6094 if (GET_CODE (XEXP (y, 0)) == REG
6095 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6097 else if (GET_CODE (XEXP (y, 1)) == REG
6098 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6102 if (GET_CODE (y) != REG
6103 && GET_CODE (y) != MULT
6104 && GET_CODE (y) != ASHIFT)
6110 x = XEXP (XEXP (x, 1), 0);
6111 if (GET_CODE (x) == UNSPEC
6112 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6113 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6116 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6117 return XVECEXP (x, 0, 0);
6120 if (GET_CODE (x) == PLUS
6121 && GET_CODE (XEXP (x, 0)) == UNSPEC
6122 && GET_CODE (XEXP (x, 1)) == CONST_INT
6123 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6124 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6125 && GET_CODE (orig_x) != MEM)))
6127 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6129 return gen_rtx_PLUS (Pmode, y, x);
6137 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6142 if (mode == CCFPmode || mode == CCFPUmode)
6144 enum rtx_code second_code, bypass_code;
6145 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6146 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6148 code = ix86_fp_compare_code_to_integer (code);
6152 code = reverse_condition (code);
6163 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6168 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6169 Those same assemblers have the same but opposite losage on cmov. */
6172 suffix = fp ? "nbe" : "a";
6175 if (mode == CCNOmode || mode == CCGOCmode)
6177 else if (mode == CCmode || mode == CCGCmode)
6188 if (mode == CCNOmode || mode == CCGOCmode)
6190 else if (mode == CCmode || mode == CCGCmode)
6199 suffix = fp ? "nb" : "ae";
6202 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6212 suffix = fp ? "u" : "p";
6215 suffix = fp ? "nu" : "np";
6220 fputs (suffix, file);
6223 /* Print the name of register X to FILE based on its machine mode and number.
6224 If CODE is 'w', pretend the mode is HImode.
6225 If CODE is 'b', pretend the mode is QImode.
6226 If CODE is 'k', pretend the mode is SImode.
6227 If CODE is 'q', pretend the mode is DImode.
6228 If CODE is 'h', pretend the reg is the `high' byte register.
6229 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6232 print_reg (rtx x, int code, FILE *file)
6234 if (REGNO (x) == ARG_POINTER_REGNUM
6235 || REGNO (x) == FRAME_POINTER_REGNUM
6236 || REGNO (x) == FLAGS_REG
6237 || REGNO (x) == FPSR_REG)
6240 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6243 if (code == 'w' || MMX_REG_P (x))
6245 else if (code == 'b')
6247 else if (code == 'k')
6249 else if (code == 'q')
6251 else if (code == 'y')
6253 else if (code == 'h')
6256 code = GET_MODE_SIZE (GET_MODE (x));
6258 /* Irritatingly, AMD extended registers use different naming convention
6259 from the normal registers. */
6260 if (REX_INT_REG_P (x))
6267 error ("extended registers have no high halves");
6270 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6273 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6276 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6279 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6282 error ("unsupported operand size for extended register");
6290 if (STACK_TOP_P (x))
6292 fputs ("st(0)", file);
6299 if (! ANY_FP_REG_P (x))
6300 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6305 fputs (hi_reg_name[REGNO (x)], file);
6308 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6310 fputs (qi_reg_name[REGNO (x)], file);
6313 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6315 fputs (qi_high_reg_name[REGNO (x)], file);
6322 /* Locate some local-dynamic symbol still in use by this function
6323 so that we can print its name in some tls_local_dynamic_base
6327 get_some_local_dynamic_name (void)
6331 if (cfun->machine->some_ld_name)
6332 return cfun->machine->some_ld_name;
6334 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6336 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6337 return cfun->machine->some_ld_name;
6343 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6347 if (GET_CODE (x) == SYMBOL_REF
6348 && local_dynamic_symbolic_operand (x, Pmode))
6350 cfun->machine->some_ld_name = XSTR (x, 0);
6358 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6359 C -- print opcode suffix for set/cmov insn.
6360 c -- like C, but print reversed condition
6361 F,f -- likewise, but for floating-point.
6362 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6364 R -- print the prefix for register names.
6365 z -- print the opcode suffix for the size of the current operand.
6366 * -- print a star (in certain assembler syntax)
6367 A -- print an absolute memory reference.
6368 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6369 s -- print a shift double count, followed by the assemblers argument
6371 b -- print the QImode name of the register for the indicated operand.
6372 %b0 would print %al if operands[0] is reg 0.
6373 w -- likewise, print the HImode name of the register.
6374 k -- likewise, print the SImode name of the register.
6375 q -- likewise, print the DImode name of the register.
6376 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6377 y -- print "st(0)" instead of "st" as a register.
6378 D -- print condition for SSE cmp instruction.
6379 P -- if PIC, print an @PLT suffix.
6380 X -- don't print any sort of PIC '@' suffix for a symbol.
6381 & -- print some in-use local-dynamic symbol name.
6382 H -- print a memory address offset by 8; used for sse high-parts
6386 print_operand (FILE *file, rtx x, int code)
6393 if (ASSEMBLER_DIALECT == ASM_ATT)
6398 assemble_name (file, get_some_local_dynamic_name ());
6402 if (ASSEMBLER_DIALECT == ASM_ATT)
6404 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6406 /* Intel syntax. For absolute addresses, registers should not
6407 be surrounded by braces. */
6408 if (GET_CODE (x) != REG)
6411 PRINT_OPERAND (file, x, 0);
6419 PRINT_OPERAND (file, x, 0);
6424 if (ASSEMBLER_DIALECT == ASM_ATT)
6429 if (ASSEMBLER_DIALECT == ASM_ATT)
6434 if (ASSEMBLER_DIALECT == ASM_ATT)
6439 if (ASSEMBLER_DIALECT == ASM_ATT)
6444 if (ASSEMBLER_DIALECT == ASM_ATT)
6449 if (ASSEMBLER_DIALECT == ASM_ATT)
6454 /* 387 opcodes don't get size suffixes if the operands are
6456 if (STACK_REG_P (x))
6459 /* Likewise if using Intel opcodes. */
6460 if (ASSEMBLER_DIALECT == ASM_INTEL)
6463 /* This is the size of op from size of operand. */
6464 switch (GET_MODE_SIZE (GET_MODE (x)))
6467 #ifdef HAVE_GAS_FILDS_FISTS
6473 if (GET_MODE (x) == SFmode)
6488 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6490 #ifdef GAS_MNEMONICS
6516 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6518 PRINT_OPERAND (file, x, 0);
6524 /* Little bit of braindamage here. The SSE compare instructions
6525 does use completely different names for the comparisons that the
6526 fp conditional moves. */
6527 switch (GET_CODE (x))
6542 fputs ("unord", file);
6546 fputs ("neq", file);
6550 fputs ("nlt", file);
6554 fputs ("nle", file);
6557 fputs ("ord", file);
6565 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6566 if (ASSEMBLER_DIALECT == ASM_ATT)
6568 switch (GET_MODE (x))
6570 case HImode: putc ('w', file); break;
6572 case SFmode: putc ('l', file); break;
6574 case DFmode: putc ('q', file); break;
6582 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6585 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6586 if (ASSEMBLER_DIALECT == ASM_ATT)
6589 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6592 /* Like above, but reverse condition */
6594 /* Check to see if argument to %c is really a constant
6595 and not a condition code which needs to be reversed. */
6596 if (!COMPARISON_P (x))
6598 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6601 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6604 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6605 if (ASSEMBLER_DIALECT == ASM_ATT)
6608 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6612 /* It doesn't actually matter what mode we use here, as we're
6613 only going to use this for printing. */
6614 x = adjust_address_nv (x, DImode, 8);
6621 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6624 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6627 int pred_val = INTVAL (XEXP (x, 0));
6629 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6630 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6632 int taken = pred_val > REG_BR_PROB_BASE / 2;
6633 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6635 /* Emit hints only in the case default branch prediction
6636 heuristics would fail. */
6637 if (taken != cputaken)
6639 /* We use 3e (DS) prefix for taken branches and
6640 2e (CS) prefix for not taken branches. */
6642 fputs ("ds ; ", file);
6644 fputs ("cs ; ", file);
6651 output_operand_lossage ("invalid operand code '%c'", code);
6655 if (GET_CODE (x) == REG)
6656 print_reg (x, code, file);
6658 else if (GET_CODE (x) == MEM)
6660 /* No `byte ptr' prefix for call instructions. */
6661 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6664 switch (GET_MODE_SIZE (GET_MODE (x)))
6666 case 1: size = "BYTE"; break;
6667 case 2: size = "WORD"; break;
6668 case 4: size = "DWORD"; break;
6669 case 8: size = "QWORD"; break;
6670 case 12: size = "XWORD"; break;
6671 case 16: size = "XMMWORD"; break;
6676 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6679 else if (code == 'w')
6681 else if (code == 'k')
6685 fputs (" PTR ", file);
6689 /* Avoid (%rip) for call operands. */
6690 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6691 && GET_CODE (x) != CONST_INT)
6692 output_addr_const (file, x);
6693 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6694 output_operand_lossage ("invalid constraints for operand");
6699 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6704 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6705 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6707 if (ASSEMBLER_DIALECT == ASM_ATT)
6709 fprintf (file, "0x%08lx", l);
6712 /* These float cases don't actually occur as immediate operands. */
6713 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6717 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6718 fprintf (file, "%s", dstr);
6721 else if (GET_CODE (x) == CONST_DOUBLE
6722 && GET_MODE (x) == XFmode)
6726 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6727 fprintf (file, "%s", dstr);
6732 /* We have patterns that allow zero sets of memory, for instance.
6733 In 64-bit mode, we should probably support all 8-byte vectors,
6734 since we can in fact encode that into an immediate. */
6735 if (GET_CODE (x) == CONST_VECTOR)
6737 if (x == CONST0_RTX (GET_MODE (x)))
6745 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6747 if (ASSEMBLER_DIALECT == ASM_ATT)
6750 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6751 || GET_CODE (x) == LABEL_REF)
6753 if (ASSEMBLER_DIALECT == ASM_ATT)
6756 fputs ("OFFSET FLAT:", file);
6759 if (GET_CODE (x) == CONST_INT)
6760 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6762 output_pic_addr_const (file, x, code);
6764 output_addr_const (file, x);
6768 /* Print a memory operand whose address is ADDR. */
6771 print_operand_address (FILE *file, rtx addr)
6773 struct ix86_address parts;
6774 rtx base, index, disp;
6777 if (! ix86_decompose_address (addr, &parts))
6781 index = parts.index;
6783 scale = parts.scale;
6791 if (USER_LABEL_PREFIX[0] == 0)
6793 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6799 if (!base && !index)
6801 /* Displacement only requires special attention. */
6803 if (GET_CODE (disp) == CONST_INT)
6805 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6807 if (USER_LABEL_PREFIX[0] == 0)
6809 fputs ("ds:", file);
6811 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6814 output_pic_addr_const (file, disp, 0);
6816 output_addr_const (file, disp);
6818 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6820 && ((GET_CODE (disp) == SYMBOL_REF
6821 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6822 || GET_CODE (disp) == LABEL_REF
6823 || (GET_CODE (disp) == CONST
6824 && GET_CODE (XEXP (disp, 0)) == PLUS
6825 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6826 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6827 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6828 fputs ("(%rip)", file);
6832 if (ASSEMBLER_DIALECT == ASM_ATT)
6837 output_pic_addr_const (file, disp, 0);
6838 else if (GET_CODE (disp) == LABEL_REF)
6839 output_asm_label (disp);
6841 output_addr_const (file, disp);
6846 print_reg (base, 0, file);
6850 print_reg (index, 0, file);
6852 fprintf (file, ",%d", scale);
6858 rtx offset = NULL_RTX;
6862 /* Pull out the offset of a symbol; print any symbol itself. */
6863 if (GET_CODE (disp) == CONST
6864 && GET_CODE (XEXP (disp, 0)) == PLUS
6865 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6867 offset = XEXP (XEXP (disp, 0), 1);
6868 disp = gen_rtx_CONST (VOIDmode,
6869 XEXP (XEXP (disp, 0), 0));
6873 output_pic_addr_const (file, disp, 0);
6874 else if (GET_CODE (disp) == LABEL_REF)
6875 output_asm_label (disp);
6876 else if (GET_CODE (disp) == CONST_INT)
6879 output_addr_const (file, disp);
6885 print_reg (base, 0, file);
6888 if (INTVAL (offset) >= 0)
6890 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6894 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6901 print_reg (index, 0, file);
6903 fprintf (file, "*%d", scale);
6911 output_addr_const_extra (FILE *file, rtx x)
6915 if (GET_CODE (x) != UNSPEC)
6918 op = XVECEXP (x, 0, 0);
6919 switch (XINT (x, 1))
6921 case UNSPEC_GOTTPOFF:
6922 output_addr_const (file, op);
6923 /* FIXME: This might be @TPOFF in Sun ld. */
6924 fputs ("@GOTTPOFF", file);
6927 output_addr_const (file, op);
6928 fputs ("@TPOFF", file);
6931 output_addr_const (file, op);
6933 fputs ("@TPOFF", file);
6935 fputs ("@NTPOFF", file);
6938 output_addr_const (file, op);
6939 fputs ("@DTPOFF", file);
6941 case UNSPEC_GOTNTPOFF:
6942 output_addr_const (file, op);
6944 fputs ("@GOTTPOFF(%rip)", file);
6946 fputs ("@GOTNTPOFF", file);
6948 case UNSPEC_INDNTPOFF:
6949 output_addr_const (file, op);
6950 fputs ("@INDNTPOFF", file);
6960 /* Split one or more DImode RTL references into pairs of SImode
6961 references. The RTL can be REG, offsettable MEM, integer constant, or
6962 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6963 split and "num" is its length. lo_half and hi_half are output arrays
6964 that parallel "operands". */
6967 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6971 rtx op = operands[num];
6973 /* simplify_subreg refuse to split volatile memory addresses,
6974 but we still have to handle it. */
6975 if (GET_CODE (op) == MEM)
6977 lo_half[num] = adjust_address (op, SImode, 0);
6978 hi_half[num] = adjust_address (op, SImode, 4);
6982 lo_half[num] = simplify_gen_subreg (SImode, op,
6983 GET_MODE (op) == VOIDmode
6984 ? DImode : GET_MODE (op), 0);
6985 hi_half[num] = simplify_gen_subreg (SImode, op,
6986 GET_MODE (op) == VOIDmode
6987 ? DImode : GET_MODE (op), 4);
6991 /* Split one or more TImode RTL references into pairs of SImode
6992 references. The RTL can be REG, offsettable MEM, integer constant, or
6993 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6994 split and "num" is its length. lo_half and hi_half are output arrays
6995 that parallel "operands". */
6998 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7002 rtx op = operands[num];
7004 /* simplify_subreg refuse to split volatile memory addresses, but we
7005 still have to handle it. */
7006 if (GET_CODE (op) == MEM)
7008 lo_half[num] = adjust_address (op, DImode, 0);
7009 hi_half[num] = adjust_address (op, DImode, 8);
7013 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7014 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7019 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7020 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7021 is the expression of the binary operation. The output may either be
7022 emitted here, or returned to the caller, like all output_* functions.
7024 There is no guarantee that the operands are the same mode, as they
7025 might be within FLOAT or FLOAT_EXTEND expressions. */
7027 #ifndef SYSV386_COMPAT
7028 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7029 wants to fix the assemblers because that causes incompatibility
7030 with gcc. No-one wants to fix gcc because that causes
7031 incompatibility with assemblers... You can use the option of
7032 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7033 #define SYSV386_COMPAT 1
7037 output_387_binary_op (rtx insn, rtx *operands)
7039 static char buf[30];
7042 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7044 #ifdef ENABLE_CHECKING
7045 /* Even if we do not want to check the inputs, this documents input
7046 constraints. Which helps in understanding the following code. */
7047 if (STACK_REG_P (operands[0])
7048 && ((REG_P (operands[1])
7049 && REGNO (operands[0]) == REGNO (operands[1])
7050 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7051 || (REG_P (operands[2])
7052 && REGNO (operands[0]) == REGNO (operands[2])
7053 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7054 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7060 switch (GET_CODE (operands[3]))
7063 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7064 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7072 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7073 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7081 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7082 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7090 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7091 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7105 if (GET_MODE (operands[0]) == SFmode)
7106 strcat (buf, "ss\t{%2, %0|%0, %2}");
7108 strcat (buf, "sd\t{%2, %0|%0, %2}");
7113 switch (GET_CODE (operands[3]))
7117 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7119 rtx temp = operands[2];
7120 operands[2] = operands[1];
7124 /* know operands[0] == operands[1]. */
7126 if (GET_CODE (operands[2]) == MEM)
7132 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7134 if (STACK_TOP_P (operands[0]))
7135 /* How is it that we are storing to a dead operand[2]?
7136 Well, presumably operands[1] is dead too. We can't
7137 store the result to st(0) as st(0) gets popped on this
7138 instruction. Instead store to operands[2] (which I
7139 think has to be st(1)). st(1) will be popped later.
7140 gcc <= 2.8.1 didn't have this check and generated
7141 assembly code that the Unixware assembler rejected. */
7142 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7144 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7148 if (STACK_TOP_P (operands[0]))
7149 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7151 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7156 if (GET_CODE (operands[1]) == MEM)
7162 if (GET_CODE (operands[2]) == MEM)
7168 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7171 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7172 derived assemblers, confusingly reverse the direction of
7173 the operation for fsub{r} and fdiv{r} when the
7174 destination register is not st(0). The Intel assembler
7175 doesn't have this brain damage. Read !SYSV386_COMPAT to
7176 figure out what the hardware really does. */
7177 if (STACK_TOP_P (operands[0]))
7178 p = "{p\t%0, %2|rp\t%2, %0}";
7180 p = "{rp\t%2, %0|p\t%0, %2}";
7182 if (STACK_TOP_P (operands[0]))
7183 /* As above for fmul/fadd, we can't store to st(0). */
7184 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7186 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7191 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7194 if (STACK_TOP_P (operands[0]))
7195 p = "{rp\t%0, %1|p\t%1, %0}";
7197 p = "{p\t%1, %0|rp\t%0, %1}";
7199 if (STACK_TOP_P (operands[0]))
7200 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7202 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7207 if (STACK_TOP_P (operands[0]))
7209 if (STACK_TOP_P (operands[1]))
7210 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7212 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7215 else if (STACK_TOP_P (operands[1]))
7218 p = "{\t%1, %0|r\t%0, %1}";
7220 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7226 p = "{r\t%2, %0|\t%0, %2}";
7228 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7241 /* Output code to initialize control word copies used by trunc?f?i and
7242 rounding patterns. CURRENT_MODE is set to current control word,
7243 while NEW_MODE is set to new control word. */
7246 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7248 rtx reg = gen_reg_rtx (HImode);
7250 emit_insn (gen_x86_fnstcw_1 (current_mode));
7251 emit_move_insn (reg, current_mode);
7253 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7259 /* round down toward -oo */
7260 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7264 /* round up toward +oo */
7265 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7269 /* round toward zero (truncate) */
7270 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7273 case I387_CW_MASK_PM:
7274 /* mask precision exception for nearbyint() */
7275 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7287 /* round down toward -oo */
7288 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7289 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7293 /* round up toward +oo */
7294 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7295 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7299 /* round toward zero (truncate) */
7300 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7303 case I387_CW_MASK_PM:
7304 /* mask precision exception for nearbyint() */
7305 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7313 emit_move_insn (new_mode, reg);
7316 /* Output code for INSN to convert a float to a signed int. OPERANDS
7317 are the insn operands. The output may be [HSD]Imode and the input
7318 operand may be [SDX]Fmode. */
7321 output_fix_trunc (rtx insn, rtx *operands)
7323 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7324 int dimode_p = GET_MODE (operands[0]) == DImode;
7326 /* Jump through a hoop or two for DImode, since the hardware has no
7327 non-popping instruction. We used to do this a different way, but
7328 that was somewhat fragile and broke with post-reload splitters. */
7329 if (dimode_p && !stack_top_dies)
7330 output_asm_insn ("fld\t%y1", operands);
7332 if (!STACK_TOP_P (operands[1]))
7335 if (GET_CODE (operands[0]) != MEM)
7338 output_asm_insn ("fldcw\t%3", operands);
7339 if (stack_top_dies || dimode_p)
7340 output_asm_insn ("fistp%z0\t%0", operands);
7342 output_asm_insn ("fist%z0\t%0", operands);
7343 output_asm_insn ("fldcw\t%2", operands);
7348 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7349 should be used. UNORDERED_P is true when fucom should be used. */
7352 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7355 rtx cmp_op0, cmp_op1;
7356 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7360 cmp_op0 = operands[0];
7361 cmp_op1 = operands[1];
7365 cmp_op0 = operands[1];
7366 cmp_op1 = operands[2];
7371 if (GET_MODE (operands[0]) == SFmode)
7373 return "ucomiss\t{%1, %0|%0, %1}";
7375 return "comiss\t{%1, %0|%0, %1}";
7378 return "ucomisd\t{%1, %0|%0, %1}";
7380 return "comisd\t{%1, %0|%0, %1}";
7383 if (! STACK_TOP_P (cmp_op0))
7386 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7388 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7392 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7393 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7396 return "ftst\n\tfnstsw\t%0";
7399 if (STACK_REG_P (cmp_op1)
7401 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7402 && REGNO (cmp_op1) != FIRST_STACK_REG)
7404 /* If both the top of the 387 stack dies, and the other operand
7405 is also a stack register that dies, then this must be a
7406 `fcompp' float compare */
7410 /* There is no double popping fcomi variant. Fortunately,
7411 eflags is immune from the fstp's cc clobbering. */
7413 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7415 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7416 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7421 return "fucompp\n\tfnstsw\t%0";
7423 return "fcompp\n\tfnstsw\t%0";
7428 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7430 static const char * const alt[16] =
7432 "fcom%z2\t%y2\n\tfnstsw\t%0",
7433 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7434 "fucom%z2\t%y2\n\tfnstsw\t%0",
7435 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7437 "ficom%z2\t%y2\n\tfnstsw\t%0",
7438 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7442 "fcomi\t{%y1, %0|%0, %y1}",
7443 "fcomip\t{%y1, %0|%0, %y1}",
7444 "fucomi\t{%y1, %0|%0, %y1}",
7445 "fucomip\t{%y1, %0|%0, %y1}",
7456 mask = eflags_p << 3;
7457 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7458 mask |= unordered_p << 1;
7459 mask |= stack_top_dies;
7472 ix86_output_addr_vec_elt (FILE *file, int value)
7474 const char *directive = ASM_LONG;
7479 directive = ASM_QUAD;
7485 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7489 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7492 fprintf (file, "%s%s%d-%s%d\n",
7493 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7494 else if (HAVE_AS_GOTOFF_IN_DATA)
7495 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7497 else if (TARGET_MACHO)
7499 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7500 machopic_output_function_base_name (file);
7501 fprintf(file, "\n");
7505 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7506 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7509 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7513 ix86_expand_clear (rtx dest)
7517 /* We play register width games, which are only valid after reload. */
7518 if (!reload_completed)
7521 /* Avoid HImode and its attendant prefix byte. */
7522 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7523 dest = gen_rtx_REG (SImode, REGNO (dest));
7525 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7527 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7528 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7530 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7531 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7537 /* X is an unchanging MEM. If it is a constant pool reference, return
7538 the constant pool rtx, else NULL. */
7541 maybe_get_pool_constant (rtx x)
7543 x = ix86_delegitimize_address (XEXP (x, 0));
7545 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7546 return get_pool_constant (x);
7552 ix86_expand_move (enum machine_mode mode, rtx operands[])
7554 int strict = (reload_in_progress || reload_completed);
7556 enum tls_model model;
7561 if (GET_CODE (op1) == SYMBOL_REF)
7563 model = SYMBOL_REF_TLS_MODEL (op1);
7566 op1 = legitimize_tls_address (op1, model, true);
7567 op1 = force_operand (op1, op0);
7572 else if (GET_CODE (op1) == CONST
7573 && GET_CODE (XEXP (op1, 0)) == PLUS
7574 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7576 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7579 rtx addend = XEXP (XEXP (op1, 0), 1);
7580 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7581 op1 = force_operand (op1, NULL);
7582 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7583 op0, 1, OPTAB_DIRECT);
7589 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7594 rtx temp = ((reload_in_progress
7595 || ((op0 && GET_CODE (op0) == REG)
7597 ? op0 : gen_reg_rtx (Pmode));
7598 op1 = machopic_indirect_data_reference (op1, temp);
7599 op1 = machopic_legitimize_pic_address (op1, mode,
7600 temp == op1 ? 0 : temp);
7602 else if (MACHOPIC_INDIRECT)
7603 op1 = machopic_indirect_data_reference (op1, 0);
7607 if (GET_CODE (op0) == MEM)
7608 op1 = force_reg (Pmode, op1);
7610 op1 = legitimize_address (op1, op1, Pmode);
7611 #endif /* TARGET_MACHO */
7615 if (GET_CODE (op0) == MEM
7616 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7617 || !push_operand (op0, mode))
7618 && GET_CODE (op1) == MEM)
7619 op1 = force_reg (mode, op1);
7621 if (push_operand (op0, mode)
7622 && ! general_no_elim_operand (op1, mode))
7623 op1 = copy_to_mode_reg (mode, op1);
7625 /* Force large constants in 64bit compilation into register
7626 to get them CSEed. */
7627 if (TARGET_64BIT && mode == DImode
7628 && immediate_operand (op1, mode)
7629 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7630 && !register_operand (op0, mode)
7631 && optimize && !reload_completed && !reload_in_progress)
7632 op1 = copy_to_mode_reg (mode, op1);
7634 if (FLOAT_MODE_P (mode))
7636 /* If we are loading a floating point constant to a register,
7637 force the value to memory now, since we'll get better code
7638 out the back end. */
7642 else if (GET_CODE (op1) == CONST_DOUBLE)
7644 op1 = validize_mem (force_const_mem (mode, op1));
7645 if (!register_operand (op0, mode))
7647 rtx temp = gen_reg_rtx (mode);
7648 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7649 emit_move_insn (op0, temp);
7656 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7660 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7662 rtx op0 = operands[0], op1 = operands[1];
7664 /* Force constants other than zero into memory. We do not know how
7665 the instructions used to build constants modify the upper 64 bits
7666 of the register, once we have that information we may be able
7667 to handle some of them more efficiently. */
7668 if ((reload_in_progress | reload_completed) == 0
7669 && register_operand (op0, mode)
7670 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7671 op1 = validize_mem (force_const_mem (mode, op1));
7673 /* Make operand1 a register if it isn't already. */
7675 && !register_operand (op0, mode)
7676 && !register_operand (op1, mode))
7678 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7682 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7685 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7686 straight to ix86_expand_vector_move. */
7689 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7698 /* If we're optimizing for size, movups is the smallest. */
7701 op0 = gen_lowpart (V4SFmode, op0);
7702 op1 = gen_lowpart (V4SFmode, op1);
7703 emit_insn (gen_sse_movups (op0, op1));
7707 /* ??? If we have typed data, then it would appear that using
7708 movdqu is the only way to get unaligned data loaded with
7710 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7712 op0 = gen_lowpart (V16QImode, op0);
7713 op1 = gen_lowpart (V16QImode, op1);
7714 emit_insn (gen_sse2_movdqu (op0, op1));
7718 if (TARGET_SSE2 && mode == V2DFmode)
7722 /* When SSE registers are split into halves, we can avoid
7723 writing to the top half twice. */
7724 if (TARGET_SSE_SPLIT_REGS)
7726 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7731 /* ??? Not sure about the best option for the Intel chips.
7732 The following would seem to satisfy; the register is
7733 entirely cleared, breaking the dependency chain. We
7734 then store to the upper half, with a dependency depth
7735 of one. A rumor has it that Intel recommends two movsd
7736 followed by an unpacklpd, but this is unconfirmed. And
7737 given that the dependency depth of the unpacklpd would
7738 still be one, I'm not sure why this would be better. */
7739 zero = CONST0_RTX (V2DFmode);
7742 m = adjust_address (op1, DFmode, 0);
7743 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7744 m = adjust_address (op1, DFmode, 8);
7745 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7749 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7750 emit_move_insn (op0, CONST0_RTX (mode));
7752 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7754 if (mode != V4SFmode)
7755 op0 = gen_lowpart (V4SFmode, op0);
7756 m = adjust_address (op1, V2SFmode, 0);
7757 emit_insn (gen_sse_loadlps (op0, op0, m));
7758 m = adjust_address (op1, V2SFmode, 8);
7759 emit_insn (gen_sse_loadhps (op0, op0, m));
7762 else if (MEM_P (op0))
7764 /* If we're optimizing for size, movups is the smallest. */
7767 op0 = gen_lowpart (V4SFmode, op0);
7768 op1 = gen_lowpart (V4SFmode, op1);
7769 emit_insn (gen_sse_movups (op0, op1));
7773 /* ??? Similar to above, only less clear because of quote
7774 typeless stores unquote. */
7775 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7776 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7778 op0 = gen_lowpart (V16QImode, op0);
7779 op1 = gen_lowpart (V16QImode, op1);
7780 emit_insn (gen_sse2_movdqu (op0, op1));
7784 if (TARGET_SSE2 && mode == V2DFmode)
7786 m = adjust_address (op0, DFmode, 0);
7787 emit_insn (gen_sse2_storelpd (m, op1));
7788 m = adjust_address (op0, DFmode, 8);
7789 emit_insn (gen_sse2_storehpd (m, op1));
7793 if (mode != V4SFmode)
7794 op1 = gen_lowpart (V4SFmode, op1);
7795 m = adjust_address (op0, V2SFmode, 0);
7796 emit_insn (gen_sse_storelps (m, op1));
7797 m = adjust_address (op0, V2SFmode, 8);
7798 emit_insn (gen_sse_storehps (m, op1));
7805 /* Expand a push in MODE. This is some mode for which we do not support
7806 proper push instructions, at least from the registers that we expect
7807 the value to live in. */
7810 ix86_expand_push (enum machine_mode mode, rtx x)
7814 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7815 GEN_INT (-GET_MODE_SIZE (mode)),
7816 stack_pointer_rtx, 1, OPTAB_DIRECT);
7817 if (tmp != stack_pointer_rtx)
7818 emit_move_insn (stack_pointer_rtx, tmp);
7820 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7821 emit_move_insn (tmp, x);
7824 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7825 destination to use for the operation. If different from the true
7826 destination in operands[0], a copy operation will be required. */
7829 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7832 int matching_memory;
7833 rtx src1, src2, dst;
7839 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7840 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7841 && (rtx_equal_p (dst, src2)
7842 || immediate_operand (src1, mode)))
7849 /* If the destination is memory, and we do not have matching source
7850 operands, do things in registers. */
7851 matching_memory = 0;
7852 if (GET_CODE (dst) == MEM)
7854 if (rtx_equal_p (dst, src1))
7855 matching_memory = 1;
7856 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7857 && rtx_equal_p (dst, src2))
7858 matching_memory = 2;
7860 dst = gen_reg_rtx (mode);
7863 /* Both source operands cannot be in memory. */
7864 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7866 if (matching_memory != 2)
7867 src2 = force_reg (mode, src2);
7869 src1 = force_reg (mode, src1);
7872 /* If the operation is not commutable, source 1 cannot be a constant
7873 or non-matching memory. */
7874 if ((CONSTANT_P (src1)
7875 || (!matching_memory && GET_CODE (src1) == MEM))
7876 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7877 src1 = force_reg (mode, src1);
7879 /* If optimizing, copy to regs to improve CSE */
7880 if (optimize && ! no_new_pseudos)
7882 if (GET_CODE (dst) == MEM)
7883 dst = gen_reg_rtx (mode);
7884 if (GET_CODE (src1) == MEM)
7885 src1 = force_reg (mode, src1);
7886 if (GET_CODE (src2) == MEM)
7887 src2 = force_reg (mode, src2);
7890 src1 = operands[1] = src1;
7891 src2 = operands[2] = src2;
7895 /* Similarly, but assume that the destination has already been
7899 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7900 enum machine_mode mode, rtx operands[])
7902 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7903 gcc_assert (dst == operands[0]);
7906 /* Attempt to expand a binary operator. Make the expansion closer to the
7907 actual machine, then just general_operand, which will allow 3 separate
7908 memory references (one output, two input) in a single insn. */
7911 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7914 rtx src1, src2, dst, op, clob;
7916 dst = ix86_fixup_binary_operands (code, mode, operands);
7920 /* Emit the instruction. */
7922 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7923 if (reload_in_progress)
7925 /* Reload doesn't know about the flags register, and doesn't know that
7926 it doesn't want to clobber it. We can only do this with PLUS. */
7933 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7934 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7937 /* Fix up the destination if needed. */
7938 if (dst != operands[0])
7939 emit_move_insn (operands[0], dst);
7942 /* Return TRUE or FALSE depending on whether the binary operator meets the
7943 appropriate constraints. */
7946 ix86_binary_operator_ok (enum rtx_code code,
7947 enum machine_mode mode ATTRIBUTE_UNUSED,
7950 /* Both source operands cannot be in memory. */
7951 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7953 /* If the operation is not commutable, source 1 cannot be a constant. */
7954 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7956 /* If the destination is memory, we must have a matching source operand. */
7957 if (GET_CODE (operands[0]) == MEM
7958 && ! (rtx_equal_p (operands[0], operands[1])
7959 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7960 && rtx_equal_p (operands[0], operands[2]))))
7962 /* If the operation is not commutable and the source 1 is memory, we must
7963 have a matching destination. */
7964 if (GET_CODE (operands[1]) == MEM
7965 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7966 && ! rtx_equal_p (operands[0], operands[1]))
7971 /* Attempt to expand a unary operator. Make the expansion closer to the
7972 actual machine, then just general_operand, which will allow 2 separate
7973 memory references (one output, one input) in a single insn. */
7976 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7979 int matching_memory;
7980 rtx src, dst, op, clob;
7985 /* If the destination is memory, and we do not have matching source
7986 operands, do things in registers. */
7987 matching_memory = 0;
7990 if (rtx_equal_p (dst, src))
7991 matching_memory = 1;
7993 dst = gen_reg_rtx (mode);
7996 /* When source operand is memory, destination must match. */
7997 if (MEM_P (src) && !matching_memory)
7998 src = force_reg (mode, src);
8000 /* If optimizing, copy to regs to improve CSE. */
8001 if (optimize && ! no_new_pseudos)
8003 if (GET_CODE (dst) == MEM)
8004 dst = gen_reg_rtx (mode);
8005 if (GET_CODE (src) == MEM)
8006 src = force_reg (mode, src);
8009 /* Emit the instruction. */
8011 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8012 if (reload_in_progress || code == NOT)
8014 /* Reload doesn't know about the flags register, and doesn't know that
8015 it doesn't want to clobber it. */
8022 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8023 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8026 /* Fix up the destination if needed. */
8027 if (dst != operands[0])
8028 emit_move_insn (operands[0], dst);
8031 /* Return TRUE or FALSE depending on whether the unary operator meets the
8032 appropriate constraints. */
8035 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8036 enum machine_mode mode ATTRIBUTE_UNUSED,
8037 rtx operands[2] ATTRIBUTE_UNUSED)
8039 /* If one of operands is memory, source and destination must match. */
8040 if ((GET_CODE (operands[0]) == MEM
8041 || GET_CODE (operands[1]) == MEM)
8042 && ! rtx_equal_p (operands[0], operands[1]))
8047 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8048 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8049 true, then replicate the mask for all elements of the vector register.
8050 If INVERT is true, then create a mask excluding the sign bit. */
8053 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8055 enum machine_mode vec_mode;
8056 HOST_WIDE_INT hi, lo;
8061 /* Find the sign bit, sign extended to 2*HWI. */
8063 lo = 0x80000000, hi = lo < 0;
8064 else if (HOST_BITS_PER_WIDE_INT >= 64)
8065 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8067 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8072 /* Force this value into the low part of a fp vector constant. */
8073 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8074 mask = gen_lowpart (mode, mask);
8079 v = gen_rtvec (4, mask, mask, mask, mask);
8081 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8082 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8083 vec_mode = V4SFmode;
8088 v = gen_rtvec (2, mask, mask);
8090 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8091 vec_mode = V2DFmode;
8094 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8097 /* Generate code for floating point ABS or NEG. */
8100 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8103 rtx mask, set, use, clob, dst, src;
8104 bool matching_memory;
8105 bool use_sse = false;
8106 bool vector_mode = VECTOR_MODE_P (mode);
8107 enum machine_mode elt_mode = mode;
8111 elt_mode = GET_MODE_INNER (mode);
8114 else if (TARGET_SSE_MATH)
8115 use_sse = SSE_FLOAT_MODE_P (mode);
8117 /* NEG and ABS performed with SSE use bitwise mask operations.
8118 Create the appropriate mask now. */
8120 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8123 /* When not using SSE, we don't use the mask, but prefer to keep the
8124 same general form of the insn pattern to reduce duplication when
8125 it comes time to split. */
8132 /* If the destination is memory, and we don't have matching source
8133 operands, do things in registers. */
8134 matching_memory = false;
8137 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8138 matching_memory = true;
8140 dst = gen_reg_rtx (mode);
8142 if (MEM_P (src) && !matching_memory)
8143 src = force_reg (mode, src);
8147 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8148 set = gen_rtx_SET (VOIDmode, dst, set);
8153 set = gen_rtx_fmt_e (code, mode, src);
8154 set = gen_rtx_SET (VOIDmode, dst, set);
8155 use = gen_rtx_USE (VOIDmode, mask);
8156 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8157 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8160 if (dst != operands[0])
8161 emit_move_insn (operands[0], dst);
8164 /* Expand a copysign operation. Special case operand 0 being a constant. */
8167 ix86_expand_copysign (rtx operands[])
8169 enum machine_mode mode, vmode;
8170 rtx dest, op0, op1, mask, nmask;
8176 mode = GET_MODE (dest);
8177 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8179 if (GET_CODE (op0) == CONST_DOUBLE)
8183 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8184 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8186 if (op0 == CONST0_RTX (mode))
8187 op0 = CONST0_RTX (vmode);
8191 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8192 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8194 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8195 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8198 mask = ix86_build_signbit_mask (mode, 0, 0);
8201 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8203 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8207 nmask = ix86_build_signbit_mask (mode, 0, 1);
8208 mask = ix86_build_signbit_mask (mode, 0, 0);
8211 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8213 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8217 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8218 be a constant, and so has already been expanded into a vector constant. */
8221 ix86_split_copysign_const (rtx operands[])
8223 enum machine_mode mode, vmode;
8224 rtx dest, op0, op1, mask, x;
8231 mode = GET_MODE (dest);
8232 vmode = GET_MODE (mask);
8234 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8235 x = gen_rtx_AND (vmode, dest, mask);
8236 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8238 if (op0 != CONST0_RTX (vmode))
8240 x = gen_rtx_IOR (vmode, dest, op0);
8241 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8245 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8246 so we have to do two masks. */
8249 ix86_split_copysign_var (rtx operands[])
8251 enum machine_mode mode, vmode;
8252 rtx dest, scratch, op0, op1, mask, nmask, x;
8255 scratch = operands[1];
8258 nmask = operands[4];
8261 mode = GET_MODE (dest);
8262 vmode = GET_MODE (mask);
8264 if (rtx_equal_p (op0, op1))
8266 /* Shouldn't happen often (it's useless, obviously), but when it does
8267 we'd generate incorrect code if we continue below. */
8268 emit_move_insn (dest, op0);
8272 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8274 gcc_assert (REGNO (op1) == REGNO (scratch));
8276 x = gen_rtx_AND (vmode, scratch, mask);
8277 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8280 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8281 x = gen_rtx_NOT (vmode, dest);
8282 x = gen_rtx_AND (vmode, x, op0);
8283 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8287 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8289 x = gen_rtx_AND (vmode, scratch, mask);
8291 else /* alternative 2,4 */
8293 gcc_assert (REGNO (mask) == REGNO (scratch));
8294 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8295 x = gen_rtx_AND (vmode, scratch, op1);
8297 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8299 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8301 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8302 x = gen_rtx_AND (vmode, dest, nmask);
8304 else /* alternative 3,4 */
8306 gcc_assert (REGNO (nmask) == REGNO (dest));
8308 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8309 x = gen_rtx_AND (vmode, dest, op0);
8311 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8314 x = gen_rtx_IOR (vmode, dest, scratch);
8315 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8318 /* Return TRUE or FALSE depending on whether the first SET in INSN
8319 has source and destination with matching CC modes, and that the
8320 CC mode is at least as constrained as REQ_MODE. */
8323 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8326 enum machine_mode set_mode;
8328 set = PATTERN (insn);
8329 if (GET_CODE (set) == PARALLEL)
8330 set = XVECEXP (set, 0, 0);
8331 if (GET_CODE (set) != SET)
8333 if (GET_CODE (SET_SRC (set)) != COMPARE)
8336 set_mode = GET_MODE (SET_DEST (set));
8340 if (req_mode != CCNOmode
8341 && (req_mode != CCmode
8342 || XEXP (SET_SRC (set), 1) != const0_rtx))
8346 if (req_mode == CCGCmode)
8350 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8354 if (req_mode == CCZmode)
8364 return (GET_MODE (SET_SRC (set)) == set_mode);
8367 /* Generate insn patterns to do an integer compare of OPERANDS. */
8370 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8372 enum machine_mode cmpmode;
8375 cmpmode = SELECT_CC_MODE (code, op0, op1);
8376 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8378 /* This is very simple, but making the interface the same as in the
8379 FP case makes the rest of the code easier. */
8380 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8381 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8383 /* Return the test that should be put into the flags user, i.e.
8384 the bcc, scc, or cmov instruction. */
8385 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8388 /* Figure out whether to use ordered or unordered fp comparisons.
8389 Return the appropriate mode to use. */
8392 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8394 /* ??? In order to make all comparisons reversible, we do all comparisons
8395 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8396 all forms trapping and nontrapping comparisons, we can make inequality
8397 comparisons trapping again, since it results in better code when using
8398 FCOM based compares. */
8399 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8403 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8405 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8406 return ix86_fp_compare_mode (code);
8409 /* Only zero flag is needed. */
8411 case NE: /* ZF!=0 */
8413 /* Codes needing carry flag. */
8414 case GEU: /* CF=0 */
8415 case GTU: /* CF=0 & ZF=0 */
8416 case LTU: /* CF=1 */
8417 case LEU: /* CF=1 | ZF=1 */
8419 /* Codes possibly doable only with sign flag when
8420 comparing against zero. */
8421 case GE: /* SF=OF or SF=0 */
8422 case LT: /* SF<>OF or SF=1 */
8423 if (op1 == const0_rtx)
8426 /* For other cases Carry flag is not required. */
8428 /* Codes doable only with sign flag when comparing
8429 against zero, but we miss jump instruction for it
8430 so we need to use relational tests against overflow
8431 that thus needs to be zero. */
8432 case GT: /* ZF=0 & SF=OF */
8433 case LE: /* ZF=1 | SF<>OF */
8434 if (op1 == const0_rtx)
8438 /* strcmp pattern do (use flags) and combine may ask us for proper
8447 /* Return the fixed registers used for condition codes. */
8450 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8457 /* If two condition code modes are compatible, return a condition code
8458 mode which is compatible with both. Otherwise, return
8461 static enum machine_mode
8462 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8467 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8470 if ((m1 == CCGCmode && m2 == CCGOCmode)
8471 || (m1 == CCGOCmode && m2 == CCGCmode))
8499 /* These are only compatible with themselves, which we already
8505 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8508 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8510 enum rtx_code swapped_code = swap_condition (code);
8511 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8512 || (ix86_fp_comparison_cost (swapped_code)
8513 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8516 /* Swap, force into registers, or otherwise massage the two operands
8517 to a fp comparison. The operands are updated in place; the new
8518 comparison code is returned. */
8520 static enum rtx_code
8521 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8523 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8524 rtx op0 = *pop0, op1 = *pop1;
8525 enum machine_mode op_mode = GET_MODE (op0);
8526 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8528 /* All of the unordered compare instructions only work on registers.
8529 The same is true of the fcomi compare instructions. The same is
8530 true of the XFmode compare instructions if not comparing with
8531 zero (ftst insn is used in this case). */
8534 && (fpcmp_mode == CCFPUmode
8535 || (op_mode == XFmode
8536 && ! (standard_80387_constant_p (op0) == 1
8537 || standard_80387_constant_p (op1) == 1))
8538 || ix86_use_fcomi_compare (code)))
8540 op0 = force_reg (op_mode, op0);
8541 op1 = force_reg (op_mode, op1);
8545 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8546 things around if they appear profitable, otherwise force op0
8549 if (standard_80387_constant_p (op0) == 0
8550 || (GET_CODE (op0) == MEM
8551 && ! (standard_80387_constant_p (op1) == 0
8552 || GET_CODE (op1) == MEM)))
8555 tmp = op0, op0 = op1, op1 = tmp;
8556 code = swap_condition (code);
8559 if (GET_CODE (op0) != REG)
8560 op0 = force_reg (op_mode, op0);
8562 if (CONSTANT_P (op1))
8564 int tmp = standard_80387_constant_p (op1);
8566 op1 = validize_mem (force_const_mem (op_mode, op1));
8570 op1 = force_reg (op_mode, op1);
8573 op1 = force_reg (op_mode, op1);
8577 /* Try to rearrange the comparison to make it cheaper. */
8578 if (ix86_fp_comparison_cost (code)
8579 > ix86_fp_comparison_cost (swap_condition (code))
8580 && (GET_CODE (op1) == REG || !no_new_pseudos))
8583 tmp = op0, op0 = op1, op1 = tmp;
8584 code = swap_condition (code);
8585 if (GET_CODE (op0) != REG)
8586 op0 = force_reg (op_mode, op0);
8594 /* Convert comparison codes we use to represent FP comparison to integer
8595 code that will result in proper branch. Return UNKNOWN if no such code
8599 ix86_fp_compare_code_to_integer (enum rtx_code code)
8628 /* Split comparison code CODE into comparisons we can do using branch
8629 instructions. BYPASS_CODE is comparison code for branch that will
8630 branch around FIRST_CODE and SECOND_CODE. If some of branches
8631 is not required, set value to UNKNOWN.
8632 We never require more than two branches. */
8635 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8636 enum rtx_code *first_code,
8637 enum rtx_code *second_code)
8640 *bypass_code = UNKNOWN;
8641 *second_code = UNKNOWN;
8643 /* The fcomi comparison sets flags as follows:
8653 case GT: /* GTU - CF=0 & ZF=0 */
8654 case GE: /* GEU - CF=0 */
8655 case ORDERED: /* PF=0 */
8656 case UNORDERED: /* PF=1 */
8657 case UNEQ: /* EQ - ZF=1 */
8658 case UNLT: /* LTU - CF=1 */
8659 case UNLE: /* LEU - CF=1 | ZF=1 */
8660 case LTGT: /* EQ - ZF=0 */
8662 case LT: /* LTU - CF=1 - fails on unordered */
8664 *bypass_code = UNORDERED;
8666 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8668 *bypass_code = UNORDERED;
8670 case EQ: /* EQ - ZF=1 - fails on unordered */
8672 *bypass_code = UNORDERED;
8674 case NE: /* NE - ZF=0 - fails on unordered */
8676 *second_code = UNORDERED;
8678 case UNGE: /* GEU - CF=0 - fails on unordered */
8680 *second_code = UNORDERED;
8682 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8684 *second_code = UNORDERED;
8689 if (!TARGET_IEEE_FP)
8691 *second_code = UNKNOWN;
8692 *bypass_code = UNKNOWN;
8696 /* Return cost of comparison done fcom + arithmetics operations on AX.
8697 All following functions do use number of instructions as a cost metrics.
8698 In future this should be tweaked to compute bytes for optimize_size and
8699 take into account performance of various instructions on various CPUs. */
8701 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8703 if (!TARGET_IEEE_FP)
8705 /* The cost of code output by ix86_expand_fp_compare. */
8733 /* Return cost of comparison done using fcomi operation.
8734 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8736 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8738 enum rtx_code bypass_code, first_code, second_code;
8739 /* Return arbitrarily high cost when instruction is not supported - this
8740 prevents gcc from using it. */
8743 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8744 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8747 /* Return cost of comparison done using sahf operation.
8748 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8750 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8752 enum rtx_code bypass_code, first_code, second_code;
8753 /* Return arbitrarily high cost when instruction is not preferred - this
8754 avoids gcc from using it. */
8755 if (!TARGET_USE_SAHF && !optimize_size)
8757 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8758 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8761 /* Compute cost of the comparison done using any method.
8762 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8764 ix86_fp_comparison_cost (enum rtx_code code)
8766 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8769 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8770 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8772 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8773 if (min > sahf_cost)
8775 if (min > fcomi_cost)
8780 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8783 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8784 rtx *second_test, rtx *bypass_test)
8786 enum machine_mode fpcmp_mode, intcmp_mode;
8788 int cost = ix86_fp_comparison_cost (code);
8789 enum rtx_code bypass_code, first_code, second_code;
8791 fpcmp_mode = ix86_fp_compare_mode (code);
8792 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8795 *second_test = NULL_RTX;
8797 *bypass_test = NULL_RTX;
8799 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8801 /* Do fcomi/sahf based test when profitable. */
8802 if ((bypass_code == UNKNOWN || bypass_test)
8803 && (second_code == UNKNOWN || second_test)
8804 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8808 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8809 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8815 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8816 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8818 scratch = gen_reg_rtx (HImode);
8819 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8820 emit_insn (gen_x86_sahf_1 (scratch));
8823 /* The FP codes work out to act like unsigned. */
8824 intcmp_mode = fpcmp_mode;
8826 if (bypass_code != UNKNOWN)
8827 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8828 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8830 if (second_code != UNKNOWN)
8831 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8832 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8837 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8838 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8839 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8841 scratch = gen_reg_rtx (HImode);
8842 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8844 /* In the unordered case, we have to check C2 for NaN's, which
8845 doesn't happen to work out to anything nice combination-wise.
8846 So do some bit twiddling on the value we've got in AH to come
8847 up with an appropriate set of condition codes. */
8849 intcmp_mode = CCNOmode;
8854 if (code == GT || !TARGET_IEEE_FP)
8856 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8861 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8862 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8863 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8864 intcmp_mode = CCmode;
8870 if (code == LT && TARGET_IEEE_FP)
8872 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8873 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8874 intcmp_mode = CCmode;
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8885 if (code == GE || !TARGET_IEEE_FP)
8887 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8892 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8893 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8900 if (code == LE && TARGET_IEEE_FP)
8902 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8903 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8904 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8905 intcmp_mode = CCmode;
8910 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8916 if (code == EQ && TARGET_IEEE_FP)
8918 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8919 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8920 intcmp_mode = CCmode;
8925 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8932 if (code == NE && TARGET_IEEE_FP)
8934 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8935 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8941 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8947 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8951 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8960 /* Return the test that should be put into the flags user, i.e.
8961 the bcc, scc, or cmov instruction. */
8962 return gen_rtx_fmt_ee (code, VOIDmode,
8963 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8968 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8971 op0 = ix86_compare_op0;
8972 op1 = ix86_compare_op1;
8975 *second_test = NULL_RTX;
8977 *bypass_test = NULL_RTX;
8979 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8980 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8981 second_test, bypass_test);
8983 ret = ix86_expand_int_compare (code, op0, op1);
8988 /* Return true if the CODE will result in nontrivial jump sequence. */
8990 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8992 enum rtx_code bypass_code, first_code, second_code;
8995 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8996 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9000 ix86_expand_branch (enum rtx_code code, rtx label)
9004 switch (GET_MODE (ix86_compare_op0))
9010 tmp = ix86_expand_compare (code, NULL, NULL);
9011 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9012 gen_rtx_LABEL_REF (VOIDmode, label),
9014 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9023 enum rtx_code bypass_code, first_code, second_code;
9025 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9028 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9030 /* Check whether we will use the natural sequence with one jump. If
9031 so, we can expand jump early. Otherwise delay expansion by
9032 creating compound insn to not confuse optimizers. */
9033 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9036 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9037 gen_rtx_LABEL_REF (VOIDmode, label),
9038 pc_rtx, NULL_RTX, NULL_RTX);
9042 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9043 ix86_compare_op0, ix86_compare_op1);
9044 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9045 gen_rtx_LABEL_REF (VOIDmode, label),
9047 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9049 use_fcomi = ix86_use_fcomi_compare (code);
9050 vec = rtvec_alloc (3 + !use_fcomi);
9051 RTVEC_ELT (vec, 0) = tmp;
9053 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9055 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9058 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9060 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9068 /* Expand DImode branch into multiple compare+branch. */
9070 rtx lo[2], hi[2], label2;
9071 enum rtx_code code1, code2, code3;
9073 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9075 tmp = ix86_compare_op0;
9076 ix86_compare_op0 = ix86_compare_op1;
9077 ix86_compare_op1 = tmp;
9078 code = swap_condition (code);
9080 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9081 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9083 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9084 avoid two branches. This costs one extra insn, so disable when
9085 optimizing for size. */
9087 if ((code == EQ || code == NE)
9089 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9094 if (hi[1] != const0_rtx)
9095 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9096 NULL_RTX, 0, OPTAB_WIDEN);
9099 if (lo[1] != const0_rtx)
9100 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9101 NULL_RTX, 0, OPTAB_WIDEN);
9103 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9104 NULL_RTX, 0, OPTAB_WIDEN);
9106 ix86_compare_op0 = tmp;
9107 ix86_compare_op1 = const0_rtx;
9108 ix86_expand_branch (code, label);
9112 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9113 op1 is a constant and the low word is zero, then we can just
9114 examine the high word. */
9116 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9119 case LT: case LTU: case GE: case GEU:
9120 ix86_compare_op0 = hi[0];
9121 ix86_compare_op1 = hi[1];
9122 ix86_expand_branch (code, label);
9128 /* Otherwise, we need two or three jumps. */
9130 label2 = gen_label_rtx ();
9133 code2 = swap_condition (code);
9134 code3 = unsigned_condition (code);
9138 case LT: case GT: case LTU: case GTU:
9141 case LE: code1 = LT; code2 = GT; break;
9142 case GE: code1 = GT; code2 = LT; break;
9143 case LEU: code1 = LTU; code2 = GTU; break;
9144 case GEU: code1 = GTU; code2 = LTU; break;
9146 case EQ: code1 = UNKNOWN; code2 = NE; break;
9147 case NE: code2 = UNKNOWN; break;
9155 * if (hi(a) < hi(b)) goto true;
9156 * if (hi(a) > hi(b)) goto false;
9157 * if (lo(a) < lo(b)) goto true;
9161 ix86_compare_op0 = hi[0];
9162 ix86_compare_op1 = hi[1];
9164 if (code1 != UNKNOWN)
9165 ix86_expand_branch (code1, label);
9166 if (code2 != UNKNOWN)
9167 ix86_expand_branch (code2, label2);
9169 ix86_compare_op0 = lo[0];
9170 ix86_compare_op1 = lo[1];
9171 ix86_expand_branch (code3, label);
9173 if (code2 != UNKNOWN)
9174 emit_label (label2);
9183 /* Split branch based on floating point condition. */
9185 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9186 rtx target1, rtx target2, rtx tmp, rtx pushed)
9189 rtx label = NULL_RTX;
9191 int bypass_probability = -1, second_probability = -1, probability = -1;
9194 if (target2 != pc_rtx)
9197 code = reverse_condition_maybe_unordered (code);
9202 condition = ix86_expand_fp_compare (code, op1, op2,
9203 tmp, &second, &bypass);
9205 /* Remove pushed operand from stack. */
9207 ix86_free_from_memory (GET_MODE (pushed));
9209 if (split_branch_probability >= 0)
9211 /* Distribute the probabilities across the jumps.
9212 Assume the BYPASS and SECOND to be always test
9214 probability = split_branch_probability;
9216 /* Value of 1 is low enough to make no need for probability
9217 to be updated. Later we may run some experiments and see
9218 if unordered values are more frequent in practice. */
9220 bypass_probability = 1;
9222 second_probability = 1;
9224 if (bypass != NULL_RTX)
9226 label = gen_label_rtx ();
9227 i = emit_jump_insn (gen_rtx_SET
9229 gen_rtx_IF_THEN_ELSE (VOIDmode,
9231 gen_rtx_LABEL_REF (VOIDmode,
9234 if (bypass_probability >= 0)
9236 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9237 GEN_INT (bypass_probability),
9240 i = emit_jump_insn (gen_rtx_SET
9242 gen_rtx_IF_THEN_ELSE (VOIDmode,
9243 condition, target1, target2)));
9244 if (probability >= 0)
9246 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9247 GEN_INT (probability),
9249 if (second != NULL_RTX)
9251 i = emit_jump_insn (gen_rtx_SET
9253 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9255 if (second_probability >= 0)
9257 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9258 GEN_INT (second_probability),
9261 if (label != NULL_RTX)
9266 ix86_expand_setcc (enum rtx_code code, rtx dest)
9268 rtx ret, tmp, tmpreg, equiv;
9269 rtx second_test, bypass_test;
9271 if (GET_MODE (ix86_compare_op0) == DImode
9273 return 0; /* FAIL */
9275 if (GET_MODE (dest) != QImode)
9278 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9279 PUT_MODE (ret, QImode);
9284 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9285 if (bypass_test || second_test)
9287 rtx test = second_test;
9289 rtx tmp2 = gen_reg_rtx (QImode);
9296 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9298 PUT_MODE (test, QImode);
9299 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9302 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9304 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9307 /* Attach a REG_EQUAL note describing the comparison result. */
9308 equiv = simplify_gen_relational (code, QImode,
9309 GET_MODE (ix86_compare_op0),
9310 ix86_compare_op0, ix86_compare_op1);
9311 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9313 return 1; /* DONE */
9316 /* Expand comparison setting or clearing carry flag. Return true when
9317 successful and set pop for the operation. */
9319 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9321 enum machine_mode mode =
9322 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9324 /* Do not handle DImode compares that go trought special path. Also we can't
9325 deal with FP compares yet. This is possible to add. */
9326 if ((mode == DImode && !TARGET_64BIT))
9328 if (FLOAT_MODE_P (mode))
9330 rtx second_test = NULL, bypass_test = NULL;
9331 rtx compare_op, compare_seq;
9333 /* Shortcut: following common codes never translate into carry flag compares. */
9334 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9335 || code == ORDERED || code == UNORDERED)
9338 /* These comparisons require zero flag; swap operands so they won't. */
9339 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9345 code = swap_condition (code);
9348 /* Try to expand the comparison and verify that we end up with carry flag
9349 based comparison. This is fails to be true only when we decide to expand
9350 comparison using arithmetic that is not too common scenario. */
9352 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9353 &second_test, &bypass_test);
9354 compare_seq = get_insns ();
9357 if (second_test || bypass_test)
9359 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9360 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9361 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9363 code = GET_CODE (compare_op);
9364 if (code != LTU && code != GEU)
9366 emit_insn (compare_seq);
9370 if (!INTEGRAL_MODE_P (mode))
9378 /* Convert a==0 into (unsigned)a<1. */
9381 if (op1 != const0_rtx)
9384 code = (code == EQ ? LTU : GEU);
9387 /* Convert a>b into b<a or a>=b-1. */
9390 if (GET_CODE (op1) == CONST_INT)
9392 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9393 /* Bail out on overflow. We still can swap operands but that
9394 would force loading of the constant into register. */
9395 if (op1 == const0_rtx
9396 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9398 code = (code == GTU ? GEU : LTU);
9405 code = (code == GTU ? LTU : GEU);
9409 /* Convert a>=0 into (unsigned)a<0x80000000. */
9412 if (mode == DImode || op1 != const0_rtx)
9414 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9415 code = (code == LT ? GEU : LTU);
9419 if (mode == DImode || op1 != constm1_rtx)
9421 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9422 code = (code == LE ? GEU : LTU);
9428 /* Swapping operands may cause constant to appear as first operand. */
9429 if (!nonimmediate_operand (op0, VOIDmode))
9433 op0 = force_reg (mode, op0);
9435 ix86_compare_op0 = op0;
9436 ix86_compare_op1 = op1;
9437 *pop = ix86_expand_compare (code, NULL, NULL);
9438 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9444 ix86_expand_int_movcc (rtx operands[])
9446 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9447 rtx compare_seq, compare_op;
9448 rtx second_test, bypass_test;
9449 enum machine_mode mode = GET_MODE (operands[0]);
9450 bool sign_bit_compare_p = false;;
9453 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9454 compare_seq = get_insns ();
9457 compare_code = GET_CODE (compare_op);
9459 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9460 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9461 sign_bit_compare_p = true;
9463 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9464 HImode insns, we'd be swallowed in word prefix ops. */
9466 if ((mode != HImode || TARGET_FAST_PREFIX)
9467 && (mode != DImode || TARGET_64BIT)
9468 && GET_CODE (operands[2]) == CONST_INT
9469 && GET_CODE (operands[3]) == CONST_INT)
9471 rtx out = operands[0];
9472 HOST_WIDE_INT ct = INTVAL (operands[2]);
9473 HOST_WIDE_INT cf = INTVAL (operands[3]);
9477 /* Sign bit compares are better done using shifts than we do by using
9479 if (sign_bit_compare_p
9480 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9481 ix86_compare_op1, &compare_op))
9483 /* Detect overlap between destination and compare sources. */
9486 if (!sign_bit_compare_p)
9490 compare_code = GET_CODE (compare_op);
9492 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9493 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9496 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9499 /* To simplify rest of code, restrict to the GEU case. */
9500 if (compare_code == LTU)
9502 HOST_WIDE_INT tmp = ct;
9505 compare_code = reverse_condition (compare_code);
9506 code = reverse_condition (code);
9511 PUT_CODE (compare_op,
9512 reverse_condition_maybe_unordered
9513 (GET_CODE (compare_op)));
9515 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9519 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9520 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9521 tmp = gen_reg_rtx (mode);
9524 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9526 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9530 if (code == GT || code == GE)
9531 code = reverse_condition (code);
9534 HOST_WIDE_INT tmp = ct;
9539 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9540 ix86_compare_op1, VOIDmode, 0, -1);
9553 tmp = expand_simple_binop (mode, PLUS,
9555 copy_rtx (tmp), 1, OPTAB_DIRECT);
9566 tmp = expand_simple_binop (mode, IOR,
9568 copy_rtx (tmp), 1, OPTAB_DIRECT);
9570 else if (diff == -1 && ct)
9580 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9582 tmp = expand_simple_binop (mode, PLUS,
9583 copy_rtx (tmp), GEN_INT (cf),
9584 copy_rtx (tmp), 1, OPTAB_DIRECT);
9592 * andl cf - ct, dest
9602 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9605 tmp = expand_simple_binop (mode, AND,
9607 gen_int_mode (cf - ct, mode),
9608 copy_rtx (tmp), 1, OPTAB_DIRECT);
9610 tmp = expand_simple_binop (mode, PLUS,
9611 copy_rtx (tmp), GEN_INT (ct),
9612 copy_rtx (tmp), 1, OPTAB_DIRECT);
9615 if (!rtx_equal_p (tmp, out))
9616 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9618 return 1; /* DONE */
9624 tmp = ct, ct = cf, cf = tmp;
9626 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9628 /* We may be reversing unordered compare to normal compare, that
9629 is not valid in general (we may convert non-trapping condition
9630 to trapping one), however on i386 we currently emit all
9631 comparisons unordered. */
9632 compare_code = reverse_condition_maybe_unordered (compare_code);
9633 code = reverse_condition_maybe_unordered (code);
9637 compare_code = reverse_condition (compare_code);
9638 code = reverse_condition (code);
9642 compare_code = UNKNOWN;
9643 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9644 && GET_CODE (ix86_compare_op1) == CONST_INT)
9646 if (ix86_compare_op1 == const0_rtx
9647 && (code == LT || code == GE))
9648 compare_code = code;
9649 else if (ix86_compare_op1 == constm1_rtx)
9653 else if (code == GT)
9658 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9659 if (compare_code != UNKNOWN
9660 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9661 && (cf == -1 || ct == -1))
9663 /* If lea code below could be used, only optimize
9664 if it results in a 2 insn sequence. */
9666 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9667 || diff == 3 || diff == 5 || diff == 9)
9668 || (compare_code == LT && ct == -1)
9669 || (compare_code == GE && cf == -1))
9672 * notl op1 (if necessary)
9680 code = reverse_condition (code);
9683 out = emit_store_flag (out, code, ix86_compare_op0,
9684 ix86_compare_op1, VOIDmode, 0, -1);
9686 out = expand_simple_binop (mode, IOR,
9688 out, 1, OPTAB_DIRECT);
9689 if (out != operands[0])
9690 emit_move_insn (operands[0], out);
9692 return 1; /* DONE */
9697 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9698 || diff == 3 || diff == 5 || diff == 9)
9699 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9701 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9707 * lea cf(dest*(ct-cf)),dest
9711 * This also catches the degenerate setcc-only case.
9717 out = emit_store_flag (out, code, ix86_compare_op0,
9718 ix86_compare_op1, VOIDmode, 0, 1);
9721 /* On x86_64 the lea instruction operates on Pmode, so we need
9722 to get arithmetics done in proper mode to match. */
9724 tmp = copy_rtx (out);
9728 out1 = copy_rtx (out);
9729 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9733 tmp = gen_rtx_PLUS (mode, tmp, out1);
9739 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9742 if (!rtx_equal_p (tmp, out))
9745 out = force_operand (tmp, copy_rtx (out));
9747 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9749 if (!rtx_equal_p (out, operands[0]))
9750 emit_move_insn (operands[0], copy_rtx (out));
9752 return 1; /* DONE */
9756 * General case: Jumpful:
9757 * xorl dest,dest cmpl op1, op2
9758 * cmpl op1, op2 movl ct, dest
9760 * decl dest movl cf, dest
9761 * andl (cf-ct),dest 1:
9766 * This is reasonably steep, but branch mispredict costs are
9767 * high on modern cpus, so consider failing only if optimizing
9771 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9772 && BRANCH_COST >= 2)
9778 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9779 /* We may be reversing unordered compare to normal compare,
9780 that is not valid in general (we may convert non-trapping
9781 condition to trapping one), however on i386 we currently
9782 emit all comparisons unordered. */
9783 code = reverse_condition_maybe_unordered (code);
9786 code = reverse_condition (code);
9787 if (compare_code != UNKNOWN)
9788 compare_code = reverse_condition (compare_code);
9792 if (compare_code != UNKNOWN)
9794 /* notl op1 (if needed)
9799 For x < 0 (resp. x <= -1) there will be no notl,
9800 so if possible swap the constants to get rid of the
9802 True/false will be -1/0 while code below (store flag
9803 followed by decrement) is 0/-1, so the constants need
9804 to be exchanged once more. */
9806 if (compare_code == GE || !cf)
9808 code = reverse_condition (code);
9813 HOST_WIDE_INT tmp = cf;
9818 out = emit_store_flag (out, code, ix86_compare_op0,
9819 ix86_compare_op1, VOIDmode, 0, -1);
9823 out = emit_store_flag (out, code, ix86_compare_op0,
9824 ix86_compare_op1, VOIDmode, 0, 1);
9826 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9827 copy_rtx (out), 1, OPTAB_DIRECT);
9830 out = expand_simple_binop (mode, AND, copy_rtx (out),
9831 gen_int_mode (cf - ct, mode),
9832 copy_rtx (out), 1, OPTAB_DIRECT);
9834 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9835 copy_rtx (out), 1, OPTAB_DIRECT);
9836 if (!rtx_equal_p (out, operands[0]))
9837 emit_move_insn (operands[0], copy_rtx (out));
9839 return 1; /* DONE */
9843 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9845 /* Try a few things more with specific constants and a variable. */
9848 rtx var, orig_out, out, tmp;
9850 if (BRANCH_COST <= 2)
9851 return 0; /* FAIL */
9853 /* If one of the two operands is an interesting constant, load a
9854 constant with the above and mask it in with a logical operation. */
9856 if (GET_CODE (operands[2]) == CONST_INT)
9859 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9860 operands[3] = constm1_rtx, op = and_optab;
9861 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9862 operands[3] = const0_rtx, op = ior_optab;
9864 return 0; /* FAIL */
9866 else if (GET_CODE (operands[3]) == CONST_INT)
9869 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9870 operands[2] = constm1_rtx, op = and_optab;
9871 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9872 operands[2] = const0_rtx, op = ior_optab;
9874 return 0; /* FAIL */
9877 return 0; /* FAIL */
9879 orig_out = operands[0];
9880 tmp = gen_reg_rtx (mode);
9883 /* Recurse to get the constant loaded. */
9884 if (ix86_expand_int_movcc (operands) == 0)
9885 return 0; /* FAIL */
9887 /* Mask in the interesting variable. */
9888 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9890 if (!rtx_equal_p (out, orig_out))
9891 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9893 return 1; /* DONE */
9897 * For comparison with above,
9907 if (! nonimmediate_operand (operands[2], mode))
9908 operands[2] = force_reg (mode, operands[2]);
9909 if (! nonimmediate_operand (operands[3], mode))
9910 operands[3] = force_reg (mode, operands[3]);
9912 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9914 rtx tmp = gen_reg_rtx (mode);
9915 emit_move_insn (tmp, operands[3]);
9918 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9920 rtx tmp = gen_reg_rtx (mode);
9921 emit_move_insn (tmp, operands[2]);
9925 if (! register_operand (operands[2], VOIDmode)
9927 || ! register_operand (operands[3], VOIDmode)))
9928 operands[2] = force_reg (mode, operands[2]);
9931 && ! register_operand (operands[3], VOIDmode))
9932 operands[3] = force_reg (mode, operands[3]);
9934 emit_insn (compare_seq);
9935 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9936 gen_rtx_IF_THEN_ELSE (mode,
9937 compare_op, operands[2],
9940 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9941 gen_rtx_IF_THEN_ELSE (mode,
9943 copy_rtx (operands[3]),
9944 copy_rtx (operands[0]))));
9946 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9947 gen_rtx_IF_THEN_ELSE (mode,
9949 copy_rtx (operands[2]),
9950 copy_rtx (operands[0]))));
9952 return 1; /* DONE */
9956 ix86_expand_fp_movcc (rtx operands[])
9958 enum machine_mode mode = GET_MODE (operands[0]);
9959 enum rtx_code code = GET_CODE (operands[1]);
9960 rtx tmp, compare_op, second_test, bypass_test;
9962 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9964 rtx cmp_op0, cmp_op1, if_true, if_false;
9966 enum machine_mode vmode, cmode;
9967 bool is_minmax = false;
9969 cmp_op0 = ix86_compare_op0;
9970 cmp_op1 = ix86_compare_op1;
9971 if_true = operands[2];
9972 if_false = operands[3];
9974 /* Since we've no cmove for sse registers, don't force bad register
9975 allocation just to gain access to it. Deny movcc when the
9976 comparison mode doesn't match the move mode. */
9977 cmode = GET_MODE (cmp_op0);
9978 if (cmode == VOIDmode)
9979 cmode = GET_MODE (cmp_op1);
9983 /* We have no LTGT as an operator. We could implement it with
9984 NE & ORDERED, but this requires an extra temporary. It's
9985 not clear that it's worth it. */
9986 if (code == LTGT || code == UNEQ)
9989 /* Massage condition to satisfy sse_comparison_operator. Try
9990 to canonicalize the destination operand to be first in the
9991 comparison - this helps reload to avoid extra moves. */
9992 if (!sse_comparison_operator (operands[1], VOIDmode)
9993 || (COMMUTATIVE_P (operands[1])
9994 && rtx_equal_p (operands[0], cmp_op1)))
9999 code = swap_condition (code);
10002 /* Detect conditional moves that exactly match min/max operational
10003 semantics. Note that this is IEEE safe, as long as we don't
10004 interchange the operands. Which is why we keep this in the form
10005 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
10006 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
10008 if (((cmp_op0 == if_true && cmp_op1 == if_false)
10009 || (cmp_op0 == if_false && cmp_op1 == if_true)))
10016 if_true = if_false;
10022 if (mode == SFmode)
10024 else if (mode == DFmode)
10027 gcc_unreachable ();
10029 cmp_op0 = force_reg (mode, cmp_op0);
10030 if (!nonimmediate_operand (cmp_op1, mode))
10031 cmp_op1 = force_reg (mode, cmp_op1);
10033 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10034 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
10036 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
10037 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
10041 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
10042 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10049 /* The floating point conditional move instructions don't directly
10050 support conditions resulting from a signed integer comparison. */
10052 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10054 /* The floating point conditional move instructions don't directly
10055 support signed integer comparisons. */
10057 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10059 if (second_test != NULL || bypass_test != NULL)
10061 tmp = gen_reg_rtx (QImode);
10062 ix86_expand_setcc (code, tmp);
10064 ix86_compare_op0 = tmp;
10065 ix86_compare_op1 = const0_rtx;
10066 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10068 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10070 tmp = gen_reg_rtx (mode);
10071 emit_move_insn (tmp, operands[3]);
10074 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10076 tmp = gen_reg_rtx (mode);
10077 emit_move_insn (tmp, operands[2]);
10081 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10082 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10083 operands[2], operands[3])));
10085 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10086 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10087 operands[3], operands[0])));
10089 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10090 gen_rtx_IF_THEN_ELSE (mode, second_test,
10091 operands[2], operands[0])));
10097 ix86_split_sse_movcc (rtx operands[])
10099 rtx dest, scratch, cmp, op_true, op_false, x;
10100 enum machine_mode mode, vmode;
10102 /* Note that the operator CMP has been set up with matching constraints
10103 such that dest is valid for the comparison. Unless one of the true
10104 or false operands are zero, the true operand has already been placed
10106 dest = operands[0];
10107 scratch = operands[1];
10108 op_true = operands[2];
10109 op_false = operands[3];
10112 mode = GET_MODE (dest);
10113 vmode = GET_MODE (scratch);
10115 /* We need to make sure that the TRUE and FALSE operands are out of the
10116 way of the destination. Marking the destination earlyclobber doesn't
10117 work, since we want matching constraints for the actual comparison, so
10118 at some point we always wind up having to do a copy ourselves here.
10119 We very much prefer the TRUE value to be in SCRATCH. If it turns out
10120 that FALSE overlaps DEST, then we invert the comparison so that we
10121 still only have to do one move. */
10122 if (rtx_equal_p (op_false, dest))
10124 enum rtx_code code;
10126 if (rtx_equal_p (op_true, dest))
10128 /* ??? Really ought not happen. It means some optimizer managed
10129 to prove the operands were identical, but failed to fold the
10130 conditional move to a straight move. Do so here, because
10131 otherwise we'll generate incorrect code. And since they're
10132 both already in the destination register, nothing to do. */
10136 x = gen_rtx_REG (mode, REGNO (scratch));
10137 emit_move_insn (x, op_false);
10138 op_false = op_true;
10141 code = GET_CODE (cmp);
10142 code = reverse_condition_maybe_unordered (code);
10143 cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
10145 else if (op_true == CONST0_RTX (mode))
10147 else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
10151 x = gen_rtx_REG (mode, REGNO (scratch));
10152 emit_move_insn (x, op_true);
10156 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
10157 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10159 if (op_false == CONST0_RTX (mode))
10161 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
10162 x = gen_rtx_AND (vmode, dest, op_true);
10163 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10167 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
10169 if (op_true == CONST0_RTX (mode))
10171 x = gen_rtx_NOT (vmode, dest);
10172 x = gen_rtx_AND (vmode, x, op_false);
10173 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10177 x = gen_rtx_AND (vmode, scratch, dest);
10178 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10180 x = gen_rtx_NOT (vmode, dest);
10181 x = gen_rtx_AND (vmode, x, op_false);
10182 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10184 x = gen_rtx_IOR (vmode, dest, scratch);
10185 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10190 /* Expand conditional increment or decrement using adb/sbb instructions.
10191 The default case using setcc followed by the conditional move can be
10192 done by generic code. */
10194 ix86_expand_int_addcc (rtx operands[])
10196 enum rtx_code code = GET_CODE (operands[1]);
10198 rtx val = const0_rtx;
10199 bool fpcmp = false;
10200 enum machine_mode mode = GET_MODE (operands[0]);
10202 if (operands[3] != const1_rtx
10203 && operands[3] != constm1_rtx)
10205 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10206 ix86_compare_op1, &compare_op))
10208 code = GET_CODE (compare_op);
10210 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10211 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10214 code = ix86_fp_compare_code_to_integer (code);
10221 PUT_CODE (compare_op,
10222 reverse_condition_maybe_unordered
10223 (GET_CODE (compare_op)));
10225 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10227 PUT_MODE (compare_op, mode);
10229 /* Construct either adc or sbb insn. */
10230 if ((code == LTU) == (operands[3] == constm1_rtx))
10232 switch (GET_MODE (operands[0]))
10235 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10238 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10241 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10244 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10252 switch (GET_MODE (operands[0]))
10255 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10258 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10261 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10264 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10270 return 1; /* DONE */
10274 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10275 works for floating pointer parameters and nonoffsetable memories.
10276 For pushes, it returns just stack offsets; the values will be saved
10277 in the right order. Maximally three parts are generated. */
10280 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10285 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10287 size = (GET_MODE_SIZE (mode) + 4) / 8;
10289 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10291 if (size < 2 || size > 3)
10294 /* Optimize constant pool reference to immediates. This is used by fp
10295 moves, that force all constants to memory to allow combining. */
10296 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10298 rtx tmp = maybe_get_pool_constant (operand);
10303 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10305 /* The only non-offsetable memories we handle are pushes. */
10306 if (! push_operand (operand, VOIDmode))
10309 operand = copy_rtx (operand);
10310 PUT_MODE (operand, Pmode);
10311 parts[0] = parts[1] = parts[2] = operand;
10315 if (GET_CODE (operand) == CONST_VECTOR)
10317 enum machine_mode imode = int_mode_for_mode (mode);
10318 operand = simplify_subreg (imode, operand, mode, 0);
10319 gcc_assert (operand != NULL);
10325 if (mode == DImode)
10326 split_di (&operand, 1, &parts[0], &parts[1]);
10329 if (REG_P (operand))
10331 if (!reload_completed)
10333 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10334 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10336 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10338 else if (offsettable_memref_p (operand))
10340 operand = adjust_address (operand, SImode, 0);
10341 parts[0] = operand;
10342 parts[1] = adjust_address (operand, SImode, 4);
10344 parts[2] = adjust_address (operand, SImode, 8);
10346 else if (GET_CODE (operand) == CONST_DOUBLE)
10351 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10355 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10356 parts[2] = gen_int_mode (l[2], SImode);
10359 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10364 parts[1] = gen_int_mode (l[1], SImode);
10365 parts[0] = gen_int_mode (l[0], SImode);
10373 if (mode == TImode)
10374 split_ti (&operand, 1, &parts[0], &parts[1]);
10375 if (mode == XFmode || mode == TFmode)
10377 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10378 if (REG_P (operand))
10380 if (!reload_completed)
10382 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10383 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10385 else if (offsettable_memref_p (operand))
10387 operand = adjust_address (operand, DImode, 0);
10388 parts[0] = operand;
10389 parts[1] = adjust_address (operand, upper_mode, 8);
10391 else if (GET_CODE (operand) == CONST_DOUBLE)
10396 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10397 real_to_target (l, &r, mode);
10399 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10400 if (HOST_BITS_PER_WIDE_INT >= 64)
10403 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10404 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10407 parts[0] = immed_double_const (l[0], l[1], DImode);
10409 if (upper_mode == SImode)
10410 parts[1] = gen_int_mode (l[2], SImode);
10411 else if (HOST_BITS_PER_WIDE_INT >= 64)
10414 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10415 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10418 parts[1] = immed_double_const (l[2], l[3], DImode);
10428 /* Emit insns to perform a move or push of DI, DF, and XF values.
10429 Return false when normal moves are needed; true when all required
10430 insns have been emitted. Operands 2-4 contain the input values
10431 int the correct order; operands 5-7 contain the output values. */
10434 ix86_split_long_move (rtx operands[])
10439 int collisions = 0;
10440 enum machine_mode mode = GET_MODE (operands[0]);
10442 /* The DFmode expanders may ask us to move double.
10443 For 64bit target this is single move. By hiding the fact
10444 here we simplify i386.md splitters. */
10445 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10447 /* Optimize constant pool reference to immediates. This is used by
10448 fp moves, that force all constants to memory to allow combining. */
10450 if (GET_CODE (operands[1]) == MEM
10451 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10452 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10453 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10454 if (push_operand (operands[0], VOIDmode))
10456 operands[0] = copy_rtx (operands[0]);
10457 PUT_MODE (operands[0], Pmode);
10460 operands[0] = gen_lowpart (DImode, operands[0]);
10461 operands[1] = gen_lowpart (DImode, operands[1]);
10462 emit_move_insn (operands[0], operands[1]);
10466 /* The only non-offsettable memory we handle is push. */
10467 if (push_operand (operands[0], VOIDmode))
10469 else if (GET_CODE (operands[0]) == MEM
10470 && ! offsettable_memref_p (operands[0]))
10473 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10474 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10476 /* When emitting push, take care for source operands on the stack. */
10477 if (push && GET_CODE (operands[1]) == MEM
10478 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10481 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10482 XEXP (part[1][2], 0));
10483 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10484 XEXP (part[1][1], 0));
10487 /* We need to do copy in the right order in case an address register
10488 of the source overlaps the destination. */
10489 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10491 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10493 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10496 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10499 /* Collision in the middle part can be handled by reordering. */
10500 if (collisions == 1 && nparts == 3
10501 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10504 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10505 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10508 /* If there are more collisions, we can't handle it by reordering.
10509 Do an lea to the last part and use only one colliding move. */
10510 else if (collisions > 1)
10516 base = part[0][nparts - 1];
10518 /* Handle the case when the last part isn't valid for lea.
10519 Happens in 64-bit mode storing the 12-byte XFmode. */
10520 if (GET_MODE (base) != Pmode)
10521 base = gen_rtx_REG (Pmode, REGNO (base));
10523 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10524 part[1][0] = replace_equiv_address (part[1][0], base);
10525 part[1][1] = replace_equiv_address (part[1][1],
10526 plus_constant (base, UNITS_PER_WORD));
10528 part[1][2] = replace_equiv_address (part[1][2],
10529 plus_constant (base, 8));
10539 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10540 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10541 emit_move_insn (part[0][2], part[1][2]);
10546 /* In 64bit mode we don't have 32bit push available. In case this is
10547 register, it is OK - we will just use larger counterpart. We also
10548 retype memory - these comes from attempt to avoid REX prefix on
10549 moving of second half of TFmode value. */
10550 if (GET_MODE (part[1][1]) == SImode)
10552 if (GET_CODE (part[1][1]) == MEM)
10553 part[1][1] = adjust_address (part[1][1], DImode, 0);
10554 else if (REG_P (part[1][1]))
10555 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10558 if (GET_MODE (part[1][0]) == SImode)
10559 part[1][0] = part[1][1];
10562 emit_move_insn (part[0][1], part[1][1]);
10563 emit_move_insn (part[0][0], part[1][0]);
10567 /* Choose correct order to not overwrite the source before it is copied. */
10568 if ((REG_P (part[0][0])
10569 && REG_P (part[1][1])
10570 && (REGNO (part[0][0]) == REGNO (part[1][1])
10572 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10574 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10578 operands[2] = part[0][2];
10579 operands[3] = part[0][1];
10580 operands[4] = part[0][0];
10581 operands[5] = part[1][2];
10582 operands[6] = part[1][1];
10583 operands[7] = part[1][0];
10587 operands[2] = part[0][1];
10588 operands[3] = part[0][0];
10589 operands[5] = part[1][1];
10590 operands[6] = part[1][0];
10597 operands[2] = part[0][0];
10598 operands[3] = part[0][1];
10599 operands[4] = part[0][2];
10600 operands[5] = part[1][0];
10601 operands[6] = part[1][1];
10602 operands[7] = part[1][2];
10606 operands[2] = part[0][0];
10607 operands[3] = part[0][1];
10608 operands[5] = part[1][0];
10609 operands[6] = part[1][1];
10613 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10616 if (GET_CODE (operands[5]) == CONST_INT
10617 && operands[5] != const0_rtx
10618 && REG_P (operands[2]))
10620 if (GET_CODE (operands[6]) == CONST_INT
10621 && INTVAL (operands[6]) == INTVAL (operands[5]))
10622 operands[6] = operands[2];
10625 && GET_CODE (operands[7]) == CONST_INT
10626 && INTVAL (operands[7]) == INTVAL (operands[5]))
10627 operands[7] = operands[2];
10631 && GET_CODE (operands[6]) == CONST_INT
10632 && operands[6] != const0_rtx
10633 && REG_P (operands[3])
10634 && GET_CODE (operands[7]) == CONST_INT
10635 && INTVAL (operands[7]) == INTVAL (operands[6]))
10636 operands[7] = operands[3];
10639 emit_move_insn (operands[2], operands[5]);
10640 emit_move_insn (operands[3], operands[6]);
10642 emit_move_insn (operands[4], operands[7]);
10647 /* Helper function of ix86_split_ashldi used to generate an SImode
10648 left shift by a constant, either using a single shift or
10649 a sequence of add instructions. */
10652 ix86_expand_ashlsi3_const (rtx operand, int count)
10655 emit_insn (gen_addsi3 (operand, operand, operand));
10656 else if (!optimize_size
10657 && count * ix86_cost->add <= ix86_cost->shift_const)
10660 for (i=0; i<count; i++)
10661 emit_insn (gen_addsi3 (operand, operand, operand));
10664 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10668 ix86_split_ashldi (rtx *operands, rtx scratch)
10670 rtx low[2], high[2];
10673 if (GET_CODE (operands[2]) == CONST_INT)
10675 split_di (operands, 2, low, high);
10676 count = INTVAL (operands[2]) & 63;
10680 emit_move_insn (high[0], low[1]);
10681 emit_move_insn (low[0], const0_rtx);
10684 ix86_expand_ashlsi3_const (high[0], count - 32);
10688 if (!rtx_equal_p (operands[0], operands[1]))
10689 emit_move_insn (operands[0], operands[1]);
10690 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10691 ix86_expand_ashlsi3_const (low[0], count);
10696 split_di (operands, 1, low, high);
10698 if (operands[1] == const1_rtx)
10700 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10701 can be done with two 32-bit shifts, no branches, no cmoves. */
10702 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10704 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10706 ix86_expand_clear (low[0]);
10707 ix86_expand_clear (high[0]);
10708 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10710 d = gen_lowpart (QImode, low[0]);
10711 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10712 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10713 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10715 d = gen_lowpart (QImode, high[0]);
10716 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10717 s = gen_rtx_NE (QImode, flags, const0_rtx);
10718 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10721 /* Otherwise, we can get the same results by manually performing
10722 a bit extract operation on bit 5, and then performing the two
10723 shifts. The two methods of getting 0/1 into low/high are exactly
10724 the same size. Avoiding the shift in the bit extract case helps
10725 pentium4 a bit; no one else seems to care much either way. */
10730 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10731 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10733 x = gen_lowpart (SImode, operands[2]);
10734 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10736 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10737 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10738 emit_move_insn (low[0], high[0]);
10739 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10742 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10743 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10747 if (operands[1] == constm1_rtx)
10749 /* For -1LL << N, we can avoid the shld instruction, because we
10750 know that we're shifting 0...31 ones into a -1. */
10751 emit_move_insn (low[0], constm1_rtx);
10753 emit_move_insn (high[0], low[0]);
10755 emit_move_insn (high[0], constm1_rtx);
10759 if (!rtx_equal_p (operands[0], operands[1]))
10760 emit_move_insn (operands[0], operands[1]);
10762 split_di (operands, 1, low, high);
10763 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10766 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10768 if (TARGET_CMOVE && scratch)
10770 ix86_expand_clear (scratch);
10771 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10774 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10778 ix86_split_ashrdi (rtx *operands, rtx scratch)
10780 rtx low[2], high[2];
10783 if (GET_CODE (operands[2]) == CONST_INT)
10785 split_di (operands, 2, low, high);
10786 count = INTVAL (operands[2]) & 63;
10790 emit_move_insn (high[0], high[1]);
10791 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10792 emit_move_insn (low[0], high[0]);
10795 else if (count >= 32)
10797 emit_move_insn (low[0], high[1]);
10798 emit_move_insn (high[0], low[0]);
10799 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10801 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10805 if (!rtx_equal_p (operands[0], operands[1]))
10806 emit_move_insn (operands[0], operands[1]);
10807 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10808 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10813 if (!rtx_equal_p (operands[0], operands[1]))
10814 emit_move_insn (operands[0], operands[1]);
10816 split_di (operands, 1, low, high);
10818 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10819 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10821 if (TARGET_CMOVE && scratch)
10823 emit_move_insn (scratch, high[0]);
10824 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10825 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10829 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10834 ix86_split_lshrdi (rtx *operands, rtx scratch)
10836 rtx low[2], high[2];
10839 if (GET_CODE (operands[2]) == CONST_INT)
10841 split_di (operands, 2, low, high);
10842 count = INTVAL (operands[2]) & 63;
10846 emit_move_insn (low[0], high[1]);
10847 ix86_expand_clear (high[0]);
10850 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10854 if (!rtx_equal_p (operands[0], operands[1]))
10855 emit_move_insn (operands[0], operands[1]);
10856 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10857 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10862 if (!rtx_equal_p (operands[0], operands[1]))
10863 emit_move_insn (operands[0], operands[1]);
10865 split_di (operands, 1, low, high);
10867 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10868 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10870 /* Heh. By reversing the arguments, we can reuse this pattern. */
10871 if (TARGET_CMOVE && scratch)
10873 ix86_expand_clear (scratch);
10874 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10878 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10882 /* Helper function for the string operations below. Dest VARIABLE whether
10883 it is aligned to VALUE bytes. If true, jump to the label. */
10885 ix86_expand_aligntest (rtx variable, int value)
10887 rtx label = gen_label_rtx ();
10888 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10889 if (GET_MODE (variable) == DImode)
10890 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10892 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10893 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10898 /* Adjust COUNTER by the VALUE. */
10900 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10902 if (GET_MODE (countreg) == DImode)
10903 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10905 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10908 /* Zero extend possibly SImode EXP to Pmode register. */
10910 ix86_zero_extend_to_Pmode (rtx exp)
10913 if (GET_MODE (exp) == VOIDmode)
10914 return force_reg (Pmode, exp);
10915 if (GET_MODE (exp) == Pmode)
10916 return copy_to_mode_reg (Pmode, exp);
10917 r = gen_reg_rtx (Pmode);
10918 emit_insn (gen_zero_extendsidi2 (r, exp));
10922 /* Expand string move (memcpy) operation. Use i386 string operations when
10923 profitable. expand_clrmem contains similar code. */
10925 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10927 rtx srcreg, destreg, countreg, srcexp, destexp;
10928 enum machine_mode counter_mode;
10929 HOST_WIDE_INT align = 0;
10930 unsigned HOST_WIDE_INT count = 0;
10932 if (GET_CODE (align_exp) == CONST_INT)
10933 align = INTVAL (align_exp);
10935 /* Can't use any of this if the user has appropriated esi or edi. */
10936 if (global_regs[4] || global_regs[5])
10939 /* This simple hack avoids all inlining code and simplifies code below. */
10940 if (!TARGET_ALIGN_STRINGOPS)
10943 if (GET_CODE (count_exp) == CONST_INT)
10945 count = INTVAL (count_exp);
10946 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10950 /* Figure out proper mode for counter. For 32bits it is always SImode,
10951 for 64bits use SImode when possible, otherwise DImode.
10952 Set count to number of bytes copied when known at compile time. */
10954 || GET_MODE (count_exp) == SImode
10955 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10956 counter_mode = SImode;
10958 counter_mode = DImode;
10960 if (counter_mode != SImode && counter_mode != DImode)
10963 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10964 if (destreg != XEXP (dst, 0))
10965 dst = replace_equiv_address_nv (dst, destreg);
10966 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10967 if (srcreg != XEXP (src, 0))
10968 src = replace_equiv_address_nv (src, srcreg);
10970 /* When optimizing for size emit simple rep ; movsb instruction for
10971 counts not divisible by 4. */
10973 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10975 emit_insn (gen_cld ());
10976 countreg = ix86_zero_extend_to_Pmode (count_exp);
10977 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10978 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10979 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10983 /* For constant aligned (or small unaligned) copies use rep movsl
10984 followed by code copying the rest. For PentiumPro ensure 8 byte
10985 alignment to allow rep movsl acceleration. */
10987 else if (count != 0
10989 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10990 || optimize_size || count < (unsigned int) 64))
10992 unsigned HOST_WIDE_INT offset = 0;
10993 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10994 rtx srcmem, dstmem;
10996 emit_insn (gen_cld ());
10997 if (count & ~(size - 1))
10999 countreg = copy_to_mode_reg (counter_mode,
11000 GEN_INT ((count >> (size == 4 ? 2 : 3))
11001 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11002 countreg = ix86_zero_extend_to_Pmode (countreg);
11004 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11005 GEN_INT (size == 4 ? 2 : 3));
11006 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11007 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11009 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11010 countreg, destexp, srcexp));
11011 offset = count & ~(size - 1);
11013 if (size == 8 && (count & 0x04))
11015 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11017 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11019 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11024 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11026 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11028 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11033 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11035 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11037 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11040 /* The generic code based on the glibc implementation:
11041 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11042 allowing accelerated copying there)
11043 - copy the data using rep movsl
11044 - copy the rest. */
11049 rtx srcmem, dstmem;
11050 int desired_alignment = (TARGET_PENTIUMPRO
11051 && (count == 0 || count >= (unsigned int) 260)
11052 ? 8 : UNITS_PER_WORD);
11053 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11054 dst = change_address (dst, BLKmode, destreg);
11055 src = change_address (src, BLKmode, srcreg);
11057 /* In case we don't know anything about the alignment, default to
11058 library version, since it is usually equally fast and result in
11061 Also emit call when we know that the count is large and call overhead
11062 will not be important. */
11063 if (!TARGET_INLINE_ALL_STRINGOPS
11064 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11067 if (TARGET_SINGLE_STRINGOP)
11068 emit_insn (gen_cld ());
11070 countreg2 = gen_reg_rtx (Pmode);
11071 countreg = copy_to_mode_reg (counter_mode, count_exp);
11073 /* We don't use loops to align destination and to copy parts smaller
11074 than 4 bytes, because gcc is able to optimize such code better (in
11075 the case the destination or the count really is aligned, gcc is often
11076 able to predict the branches) and also it is friendlier to the
11077 hardware branch prediction.
11079 Using loops is beneficial for generic case, because we can
11080 handle small counts using the loops. Many CPUs (such as Athlon)
11081 have large REP prefix setup costs.
11083 This is quite costly. Maybe we can revisit this decision later or
11084 add some customizability to this code. */
11086 if (count == 0 && align < desired_alignment)
11088 label = gen_label_rtx ();
11089 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11090 LEU, 0, counter_mode, 1, label);
11094 rtx label = ix86_expand_aligntest (destreg, 1);
11095 srcmem = change_address (src, QImode, srcreg);
11096 dstmem = change_address (dst, QImode, destreg);
11097 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11098 ix86_adjust_counter (countreg, 1);
11099 emit_label (label);
11100 LABEL_NUSES (label) = 1;
11104 rtx label = ix86_expand_aligntest (destreg, 2);
11105 srcmem = change_address (src, HImode, srcreg);
11106 dstmem = change_address (dst, HImode, destreg);
11107 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11108 ix86_adjust_counter (countreg, 2);
11109 emit_label (label);
11110 LABEL_NUSES (label) = 1;
11112 if (align <= 4 && desired_alignment > 4)
11114 rtx label = ix86_expand_aligntest (destreg, 4);
11115 srcmem = change_address (src, SImode, srcreg);
11116 dstmem = change_address (dst, SImode, destreg);
11117 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11118 ix86_adjust_counter (countreg, 4);
11119 emit_label (label);
11120 LABEL_NUSES (label) = 1;
11123 if (label && desired_alignment > 4 && !TARGET_64BIT)
11125 emit_label (label);
11126 LABEL_NUSES (label) = 1;
11129 if (!TARGET_SINGLE_STRINGOP)
11130 emit_insn (gen_cld ());
11133 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11135 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11139 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11140 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11142 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11143 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11144 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11145 countreg2, destexp, srcexp));
11149 emit_label (label);
11150 LABEL_NUSES (label) = 1;
11152 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11154 srcmem = change_address (src, SImode, srcreg);
11155 dstmem = change_address (dst, SImode, destreg);
11156 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11158 if ((align <= 4 || count == 0) && TARGET_64BIT)
11160 rtx label = ix86_expand_aligntest (countreg, 4);
11161 srcmem = change_address (src, SImode, srcreg);
11162 dstmem = change_address (dst, SImode, destreg);
11163 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11164 emit_label (label);
11165 LABEL_NUSES (label) = 1;
11167 if (align > 2 && count != 0 && (count & 2))
11169 srcmem = change_address (src, HImode, srcreg);
11170 dstmem = change_address (dst, HImode, destreg);
11171 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11173 if (align <= 2 || count == 0)
11175 rtx label = ix86_expand_aligntest (countreg, 2);
11176 srcmem = change_address (src, HImode, srcreg);
11177 dstmem = change_address (dst, HImode, destreg);
11178 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11179 emit_label (label);
11180 LABEL_NUSES (label) = 1;
11182 if (align > 1 && count != 0 && (count & 1))
11184 srcmem = change_address (src, QImode, srcreg);
11185 dstmem = change_address (dst, QImode, destreg);
11186 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11188 if (align <= 1 || count == 0)
11190 rtx label = ix86_expand_aligntest (countreg, 1);
11191 srcmem = change_address (src, QImode, srcreg);
11192 dstmem = change_address (dst, QImode, destreg);
11193 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11202 /* Expand string clear operation (bzero). Use i386 string operations when
11203 profitable. expand_movmem contains similar code. */
11205 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11207 rtx destreg, zeroreg, countreg, destexp;
11208 enum machine_mode counter_mode;
11209 HOST_WIDE_INT align = 0;
11210 unsigned HOST_WIDE_INT count = 0;
11212 if (GET_CODE (align_exp) == CONST_INT)
11213 align = INTVAL (align_exp);
11215 /* Can't use any of this if the user has appropriated esi. */
11216 if (global_regs[4])
11219 /* This simple hack avoids all inlining code and simplifies code below. */
11220 if (!TARGET_ALIGN_STRINGOPS)
11223 if (GET_CODE (count_exp) == CONST_INT)
11225 count = INTVAL (count_exp);
11226 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11229 /* Figure out proper mode for counter. For 32bits it is always SImode,
11230 for 64bits use SImode when possible, otherwise DImode.
11231 Set count to number of bytes copied when known at compile time. */
11233 || GET_MODE (count_exp) == SImode
11234 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11235 counter_mode = SImode;
11237 counter_mode = DImode;
11239 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11240 if (destreg != XEXP (dst, 0))
11241 dst = replace_equiv_address_nv (dst, destreg);
11244 /* When optimizing for size emit simple rep ; movsb instruction for
11245 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11246 sequence is 7 bytes long, so if optimizing for size and count is
11247 small enough that some stosl, stosw and stosb instructions without
11248 rep are shorter, fall back into the next if. */
11250 if ((!optimize || optimize_size)
11253 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11255 emit_insn (gen_cld ());
11257 countreg = ix86_zero_extend_to_Pmode (count_exp);
11258 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11259 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11260 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11262 else if (count != 0
11264 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11265 || optimize_size || count < (unsigned int) 64))
11267 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11268 unsigned HOST_WIDE_INT offset = 0;
11270 emit_insn (gen_cld ());
11272 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11273 if (count & ~(size - 1))
11275 unsigned HOST_WIDE_INT repcount;
11276 unsigned int max_nonrep;
11278 repcount = count >> (size == 4 ? 2 : 3);
11280 repcount &= 0x3fffffff;
11282 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11283 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11284 bytes. In both cases the latter seems to be faster for small
11286 max_nonrep = size == 4 ? 7 : 4;
11287 if (!optimize_size)
11290 case PROCESSOR_PENTIUM4:
11291 case PROCESSOR_NOCONA:
11298 if (repcount <= max_nonrep)
11299 while (repcount-- > 0)
11301 rtx mem = adjust_automodify_address_nv (dst,
11302 GET_MODE (zeroreg),
11304 emit_insn (gen_strset (destreg, mem, zeroreg));
11309 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11310 countreg = ix86_zero_extend_to_Pmode (countreg);
11311 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11312 GEN_INT (size == 4 ? 2 : 3));
11313 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11314 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11316 offset = count & ~(size - 1);
11319 if (size == 8 && (count & 0x04))
11321 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11323 emit_insn (gen_strset (destreg, mem,
11324 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11329 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11331 emit_insn (gen_strset (destreg, mem,
11332 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11337 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11339 emit_insn (gen_strset (destreg, mem,
11340 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11347 /* Compute desired alignment of the string operation. */
11348 int desired_alignment = (TARGET_PENTIUMPRO
11349 && (count == 0 || count >= (unsigned int) 260)
11350 ? 8 : UNITS_PER_WORD);
11352 /* In case we don't know anything about the alignment, default to
11353 library version, since it is usually equally fast and result in
11356 Also emit call when we know that the count is large and call overhead
11357 will not be important. */
11358 if (!TARGET_INLINE_ALL_STRINGOPS
11359 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11362 if (TARGET_SINGLE_STRINGOP)
11363 emit_insn (gen_cld ());
11365 countreg2 = gen_reg_rtx (Pmode);
11366 countreg = copy_to_mode_reg (counter_mode, count_exp);
11367 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11368 /* Get rid of MEM_OFFSET, it won't be accurate. */
11369 dst = change_address (dst, BLKmode, destreg);
11371 if (count == 0 && align < desired_alignment)
11373 label = gen_label_rtx ();
11374 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11375 LEU, 0, counter_mode, 1, label);
11379 rtx label = ix86_expand_aligntest (destreg, 1);
11380 emit_insn (gen_strset (destreg, dst,
11381 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11382 ix86_adjust_counter (countreg, 1);
11383 emit_label (label);
11384 LABEL_NUSES (label) = 1;
11388 rtx label = ix86_expand_aligntest (destreg, 2);
11389 emit_insn (gen_strset (destreg, dst,
11390 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11391 ix86_adjust_counter (countreg, 2);
11392 emit_label (label);
11393 LABEL_NUSES (label) = 1;
11395 if (align <= 4 && desired_alignment > 4)
11397 rtx label = ix86_expand_aligntest (destreg, 4);
11398 emit_insn (gen_strset (destreg, dst,
11400 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11402 ix86_adjust_counter (countreg, 4);
11403 emit_label (label);
11404 LABEL_NUSES (label) = 1;
11407 if (label && desired_alignment > 4 && !TARGET_64BIT)
11409 emit_label (label);
11410 LABEL_NUSES (label) = 1;
11414 if (!TARGET_SINGLE_STRINGOP)
11415 emit_insn (gen_cld ());
11418 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11420 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11424 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11425 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11427 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11428 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11432 emit_label (label);
11433 LABEL_NUSES (label) = 1;
11436 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11437 emit_insn (gen_strset (destreg, dst,
11438 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11439 if (TARGET_64BIT && (align <= 4 || count == 0))
11441 rtx label = ix86_expand_aligntest (countreg, 4);
11442 emit_insn (gen_strset (destreg, dst,
11443 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11444 emit_label (label);
11445 LABEL_NUSES (label) = 1;
11447 if (align > 2 && count != 0 && (count & 2))
11448 emit_insn (gen_strset (destreg, dst,
11449 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11450 if (align <= 2 || count == 0)
11452 rtx label = ix86_expand_aligntest (countreg, 2);
11453 emit_insn (gen_strset (destreg, dst,
11454 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11455 emit_label (label);
11456 LABEL_NUSES (label) = 1;
11458 if (align > 1 && count != 0 && (count & 1))
11459 emit_insn (gen_strset (destreg, dst,
11460 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11461 if (align <= 1 || count == 0)
11463 rtx label = ix86_expand_aligntest (countreg, 1);
11464 emit_insn (gen_strset (destreg, dst,
11465 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11466 emit_label (label);
11467 LABEL_NUSES (label) = 1;
11473 /* Expand strlen. */
11475 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11477 rtx addr, scratch1, scratch2, scratch3, scratch4;
11479 /* The generic case of strlen expander is long. Avoid it's
11480 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11482 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11483 && !TARGET_INLINE_ALL_STRINGOPS
11485 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11488 addr = force_reg (Pmode, XEXP (src, 0));
11489 scratch1 = gen_reg_rtx (Pmode);
11491 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11494 /* Well it seems that some optimizer does not combine a call like
11495 foo(strlen(bar), strlen(bar));
11496 when the move and the subtraction is done here. It does calculate
11497 the length just once when these instructions are done inside of
11498 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11499 often used and I use one fewer register for the lifetime of
11500 output_strlen_unroll() this is better. */
11502 emit_move_insn (out, addr);
11504 ix86_expand_strlensi_unroll_1 (out, src, align);
11506 /* strlensi_unroll_1 returns the address of the zero at the end of
11507 the string, like memchr(), so compute the length by subtracting
11508 the start address. */
11510 emit_insn (gen_subdi3 (out, out, addr));
11512 emit_insn (gen_subsi3 (out, out, addr));
11517 scratch2 = gen_reg_rtx (Pmode);
11518 scratch3 = gen_reg_rtx (Pmode);
11519 scratch4 = force_reg (Pmode, constm1_rtx);
11521 emit_move_insn (scratch3, addr);
11522 eoschar = force_reg (QImode, eoschar);
11524 emit_insn (gen_cld ());
11525 src = replace_equiv_address_nv (src, scratch3);
11527 /* If .md starts supporting :P, this can be done in .md. */
11528 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11529 scratch4), UNSPEC_SCAS);
11530 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11533 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11534 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11538 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11539 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11545 /* Expand the appropriate insns for doing strlen if not just doing
11548 out = result, initialized with the start address
11549 align_rtx = alignment of the address.
11550 scratch = scratch register, initialized with the startaddress when
11551 not aligned, otherwise undefined
11553 This is just the body. It needs the initializations mentioned above and
11554 some address computing at the end. These things are done in i386.md. */
11557 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11561 rtx align_2_label = NULL_RTX;
11562 rtx align_3_label = NULL_RTX;
11563 rtx align_4_label = gen_label_rtx ();
11564 rtx end_0_label = gen_label_rtx ();
11566 rtx tmpreg = gen_reg_rtx (SImode);
11567 rtx scratch = gen_reg_rtx (SImode);
11571 if (GET_CODE (align_rtx) == CONST_INT)
11572 align = INTVAL (align_rtx);
11574 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11576 /* Is there a known alignment and is it less than 4? */
11579 rtx scratch1 = gen_reg_rtx (Pmode);
11580 emit_move_insn (scratch1, out);
11581 /* Is there a known alignment and is it not 2? */
11584 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11585 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11587 /* Leave just the 3 lower bits. */
11588 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11589 NULL_RTX, 0, OPTAB_WIDEN);
11591 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11592 Pmode, 1, align_4_label);
11593 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11594 Pmode, 1, align_2_label);
11595 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11596 Pmode, 1, align_3_label);
11600 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11601 check if is aligned to 4 - byte. */
11603 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11604 NULL_RTX, 0, OPTAB_WIDEN);
11606 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11607 Pmode, 1, align_4_label);
11610 mem = change_address (src, QImode, out);
11612 /* Now compare the bytes. */
11614 /* Compare the first n unaligned byte on a byte per byte basis. */
11615 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11616 QImode, 1, end_0_label);
11618 /* Increment the address. */
11620 emit_insn (gen_adddi3 (out, out, const1_rtx));
11622 emit_insn (gen_addsi3 (out, out, const1_rtx));
11624 /* Not needed with an alignment of 2 */
11627 emit_label (align_2_label);
11629 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11633 emit_insn (gen_adddi3 (out, out, const1_rtx));
11635 emit_insn (gen_addsi3 (out, out, const1_rtx));
11637 emit_label (align_3_label);
11640 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11644 emit_insn (gen_adddi3 (out, out, const1_rtx));
11646 emit_insn (gen_addsi3 (out, out, const1_rtx));
11649 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11650 align this loop. It gives only huge programs, but does not help to
11652 emit_label (align_4_label);
11654 mem = change_address (src, SImode, out);
11655 emit_move_insn (scratch, mem);
11657 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11659 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11661 /* This formula yields a nonzero result iff one of the bytes is zero.
11662 This saves three branches inside loop and many cycles. */
11664 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11665 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11666 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11667 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11668 gen_int_mode (0x80808080, SImode)));
11669 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11674 rtx reg = gen_reg_rtx (SImode);
11675 rtx reg2 = gen_reg_rtx (Pmode);
11676 emit_move_insn (reg, tmpreg);
11677 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11679 /* If zero is not in the first two bytes, move two bytes forward. */
11680 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11681 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11682 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11683 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11684 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11687 /* Emit lea manually to avoid clobbering of flags. */
11688 emit_insn (gen_rtx_SET (SImode, reg2,
11689 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11691 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11692 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11693 emit_insn (gen_rtx_SET (VOIDmode, out,
11694 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11701 rtx end_2_label = gen_label_rtx ();
11702 /* Is zero in the first two bytes? */
11704 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11705 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11706 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11707 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11708 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11710 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11711 JUMP_LABEL (tmp) = end_2_label;
11713 /* Not in the first two. Move two bytes forward. */
11714 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11716 emit_insn (gen_adddi3 (out, out, const2_rtx));
11718 emit_insn (gen_addsi3 (out, out, const2_rtx));
11720 emit_label (end_2_label);
11724 /* Avoid branch in fixing the byte. */
11725 tmpreg = gen_lowpart (QImode, tmpreg);
11726 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11727 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11729 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11731 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11733 emit_label (end_0_label);
11737 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11738 rtx callarg2 ATTRIBUTE_UNUSED,
11739 rtx pop, int sibcall)
11741 rtx use = NULL, call;
11743 if (pop == const0_rtx)
11745 if (TARGET_64BIT && pop)
11749 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11750 fnaddr = machopic_indirect_call_target (fnaddr);
11752 /* Static functions and indirect calls don't need the pic register. */
11753 if (! TARGET_64BIT && flag_pic
11754 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11755 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11756 use_reg (&use, pic_offset_table_rtx);
11758 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11760 rtx al = gen_rtx_REG (QImode, 0);
11761 emit_move_insn (al, callarg2);
11762 use_reg (&use, al);
11764 #endif /* TARGET_MACHO */
11766 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11768 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11769 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11771 if (sibcall && TARGET_64BIT
11772 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11775 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11776 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11777 emit_move_insn (fnaddr, addr);
11778 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11781 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11783 call = gen_rtx_SET (VOIDmode, retval, call);
11786 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11787 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11788 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11791 call = emit_call_insn (call);
11793 CALL_INSN_FUNCTION_USAGE (call) = use;
11797 /* Clear stack slot assignments remembered from previous functions.
11798 This is called from INIT_EXPANDERS once before RTL is emitted for each
11801 static struct machine_function *
11802 ix86_init_machine_status (void)
11804 struct machine_function *f;
11806 f = ggc_alloc_cleared (sizeof (struct machine_function));
11807 f->use_fast_prologue_epilogue_nregs = -1;
11812 /* Return a MEM corresponding to a stack slot with mode MODE.
11813 Allocate a new slot if necessary.
11815 The RTL for a function can have several slots available: N is
11816 which slot to use. */
11819 assign_386_stack_local (enum machine_mode mode, int n)
11821 struct stack_local_entry *s;
11823 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11826 for (s = ix86_stack_locals; s; s = s->next)
11827 if (s->mode == mode && s->n == n)
11830 s = (struct stack_local_entry *)
11831 ggc_alloc (sizeof (struct stack_local_entry));
11834 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11836 s->next = ix86_stack_locals;
11837 ix86_stack_locals = s;
11841 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11843 static GTY(()) rtx ix86_tls_symbol;
11845 ix86_tls_get_addr (void)
11848 if (!ix86_tls_symbol)
11850 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11851 (TARGET_GNU_TLS && !TARGET_64BIT)
11852 ? "___tls_get_addr"
11853 : "__tls_get_addr");
11856 return ix86_tls_symbol;
11859 /* Calculate the length of the memory address in the instruction
11860 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11863 memory_address_length (rtx addr)
11865 struct ix86_address parts;
11866 rtx base, index, disp;
11869 if (GET_CODE (addr) == PRE_DEC
11870 || GET_CODE (addr) == POST_INC
11871 || GET_CODE (addr) == PRE_MODIFY
11872 || GET_CODE (addr) == POST_MODIFY)
11875 if (! ix86_decompose_address (addr, &parts))
11878 if (parts.base && GET_CODE (parts.base) == SUBREG)
11879 parts.base = SUBREG_REG (parts.base);
11880 if (parts.index && GET_CODE (parts.index) == SUBREG)
11881 parts.index = SUBREG_REG (parts.index);
11884 index = parts.index;
11889 - esp as the base always wants an index,
11890 - ebp as the base always wants a displacement. */
11892 /* Register Indirect. */
11893 if (base && !index && !disp)
11895 /* esp (for its index) and ebp (for its displacement) need
11896 the two-byte modrm form. */
11897 if (addr == stack_pointer_rtx
11898 || addr == arg_pointer_rtx
11899 || addr == frame_pointer_rtx
11900 || addr == hard_frame_pointer_rtx)
11904 /* Direct Addressing. */
11905 else if (disp && !base && !index)
11910 /* Find the length of the displacement constant. */
11913 if (GET_CODE (disp) == CONST_INT
11914 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11920 /* ebp always wants a displacement. */
11921 else if (base == hard_frame_pointer_rtx)
11924 /* An index requires the two-byte modrm form.... */
11926 /* ...like esp, which always wants an index. */
11927 || base == stack_pointer_rtx
11928 || base == arg_pointer_rtx
11929 || base == frame_pointer_rtx)
11936 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11937 is set, expect that insn have 8bit immediate alternative. */
11939 ix86_attr_length_immediate_default (rtx insn, int shortform)
11943 extract_insn_cached (insn);
11944 for (i = recog_data.n_operands - 1; i >= 0; --i)
11945 if (CONSTANT_P (recog_data.operand[i]))
11950 && GET_CODE (recog_data.operand[i]) == CONST_INT
11951 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11955 switch (get_attr_mode (insn))
11966 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11971 fatal_insn ("unknown insn mode", insn);
11977 /* Compute default value for "length_address" attribute. */
11979 ix86_attr_length_address_default (rtx insn)
11983 if (get_attr_type (insn) == TYPE_LEA)
11985 rtx set = PATTERN (insn);
11986 if (GET_CODE (set) == SET)
11988 else if (GET_CODE (set) == PARALLEL
11989 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11990 set = XVECEXP (set, 0, 0);
11993 #ifdef ENABLE_CHECKING
11999 return memory_address_length (SET_SRC (set));
12002 extract_insn_cached (insn);
12003 for (i = recog_data.n_operands - 1; i >= 0; --i)
12004 if (GET_CODE (recog_data.operand[i]) == MEM)
12006 return memory_address_length (XEXP (recog_data.operand[i], 0));
12012 /* Return the maximum number of instructions a cpu can issue. */
12015 ix86_issue_rate (void)
12019 case PROCESSOR_PENTIUM:
12023 case PROCESSOR_PENTIUMPRO:
12024 case PROCESSOR_PENTIUM4:
12025 case PROCESSOR_ATHLON:
12027 case PROCESSOR_NOCONA:
12035 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12036 by DEP_INSN and nothing set by DEP_INSN. */
12039 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12043 /* Simplify the test for uninteresting insns. */
12044 if (insn_type != TYPE_SETCC
12045 && insn_type != TYPE_ICMOV
12046 && insn_type != TYPE_FCMOV
12047 && insn_type != TYPE_IBR)
12050 if ((set = single_set (dep_insn)) != 0)
12052 set = SET_DEST (set);
12055 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12056 && XVECLEN (PATTERN (dep_insn), 0) == 2
12057 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12058 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12060 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12061 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12066 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12069 /* This test is true if the dependent insn reads the flags but
12070 not any other potentially set register. */
12071 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12074 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12080 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12081 address with operands set by DEP_INSN. */
12084 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12088 if (insn_type == TYPE_LEA
12091 addr = PATTERN (insn);
12092 if (GET_CODE (addr) == SET)
12094 else if (GET_CODE (addr) == PARALLEL
12095 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12096 addr = XVECEXP (addr, 0, 0);
12099 addr = SET_SRC (addr);
12104 extract_insn_cached (insn);
12105 for (i = recog_data.n_operands - 1; i >= 0; --i)
12106 if (GET_CODE (recog_data.operand[i]) == MEM)
12108 addr = XEXP (recog_data.operand[i], 0);
12115 return modified_in_p (addr, dep_insn);
12119 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12121 enum attr_type insn_type, dep_insn_type;
12122 enum attr_memory memory;
12124 int dep_insn_code_number;
12126 /* Anti and output dependencies have zero cost on all CPUs. */
12127 if (REG_NOTE_KIND (link) != 0)
12130 dep_insn_code_number = recog_memoized (dep_insn);
12132 /* If we can't recognize the insns, we can't really do anything. */
12133 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12136 insn_type = get_attr_type (insn);
12137 dep_insn_type = get_attr_type (dep_insn);
12141 case PROCESSOR_PENTIUM:
12142 /* Address Generation Interlock adds a cycle of latency. */
12143 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12146 /* ??? Compares pair with jump/setcc. */
12147 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12150 /* Floating point stores require value to be ready one cycle earlier. */
12151 if (insn_type == TYPE_FMOV
12152 && get_attr_memory (insn) == MEMORY_STORE
12153 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12157 case PROCESSOR_PENTIUMPRO:
12158 memory = get_attr_memory (insn);
12160 /* INT->FP conversion is expensive. */
12161 if (get_attr_fp_int_src (dep_insn))
12164 /* There is one cycle extra latency between an FP op and a store. */
12165 if (insn_type == TYPE_FMOV
12166 && (set = single_set (dep_insn)) != NULL_RTX
12167 && (set2 = single_set (insn)) != NULL_RTX
12168 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12169 && GET_CODE (SET_DEST (set2)) == MEM)
12172 /* Show ability of reorder buffer to hide latency of load by executing
12173 in parallel with previous instruction in case
12174 previous instruction is not needed to compute the address. */
12175 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12176 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12178 /* Claim moves to take one cycle, as core can issue one load
12179 at time and the next load can start cycle later. */
12180 if (dep_insn_type == TYPE_IMOV
12181 || dep_insn_type == TYPE_FMOV)
12189 memory = get_attr_memory (insn);
12191 /* The esp dependency is resolved before the instruction is really
12193 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12194 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12197 /* INT->FP conversion is expensive. */
12198 if (get_attr_fp_int_src (dep_insn))
12201 /* Show ability of reorder buffer to hide latency of load by executing
12202 in parallel with previous instruction in case
12203 previous instruction is not needed to compute the address. */
12204 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12205 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12207 /* Claim moves to take one cycle, as core can issue one load
12208 at time and the next load can start cycle later. */
12209 if (dep_insn_type == TYPE_IMOV
12210 || dep_insn_type == TYPE_FMOV)
12219 case PROCESSOR_ATHLON:
12221 memory = get_attr_memory (insn);
12223 /* Show ability of reorder buffer to hide latency of load by executing
12224 in parallel with previous instruction in case
12225 previous instruction is not needed to compute the address. */
12226 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12227 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12229 enum attr_unit unit = get_attr_unit (insn);
12232 /* Because of the difference between the length of integer and
12233 floating unit pipeline preparation stages, the memory operands
12234 for floating point are cheaper.
12236 ??? For Athlon it the difference is most probably 2. */
12237 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12240 loadcost = TARGET_ATHLON ? 2 : 0;
12242 if (cost >= loadcost)
12255 /* How many alternative schedules to try. This should be as wide as the
12256 scheduling freedom in the DFA, but no wider. Making this value too
12257 large results extra work for the scheduler. */
12260 ia32_multipass_dfa_lookahead (void)
12262 if (ix86_tune == PROCESSOR_PENTIUM)
12265 if (ix86_tune == PROCESSOR_PENTIUMPRO
12266 || ix86_tune == PROCESSOR_K6)
12274 /* Compute the alignment given to a constant that is being placed in memory.
12275 EXP is the constant and ALIGN is the alignment that the object would
12277 The value of this function is used instead of that alignment to align
12281 ix86_constant_alignment (tree exp, int align)
12283 if (TREE_CODE (exp) == REAL_CST)
12285 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12287 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12290 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12291 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12292 return BITS_PER_WORD;
12297 /* Compute the alignment for a static variable.
12298 TYPE is the data type, and ALIGN is the alignment that
12299 the object would ordinarily have. The value of this function is used
12300 instead of that alignment to align the object. */
12303 ix86_data_alignment (tree type, int align)
12305 if (AGGREGATE_TYPE_P (type)
12306 && TYPE_SIZE (type)
12307 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12308 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12309 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12312 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12313 to 16byte boundary. */
12316 if (AGGREGATE_TYPE_P (type)
12317 && TYPE_SIZE (type)
12318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12319 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12320 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12324 if (TREE_CODE (type) == ARRAY_TYPE)
12326 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12328 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12331 else if (TREE_CODE (type) == COMPLEX_TYPE)
12334 if (TYPE_MODE (type) == DCmode && align < 64)
12336 if (TYPE_MODE (type) == XCmode && align < 128)
12339 else if ((TREE_CODE (type) == RECORD_TYPE
12340 || TREE_CODE (type) == UNION_TYPE
12341 || TREE_CODE (type) == QUAL_UNION_TYPE)
12342 && TYPE_FIELDS (type))
12344 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12346 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12349 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12350 || TREE_CODE (type) == INTEGER_TYPE)
12352 if (TYPE_MODE (type) == DFmode && align < 64)
12354 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12361 /* Compute the alignment for a local variable.
12362 TYPE is the data type, and ALIGN is the alignment that
12363 the object would ordinarily have. The value of this macro is used
12364 instead of that alignment to align the object. */
12367 ix86_local_alignment (tree type, int align)
12369 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12370 to 16byte boundary. */
12373 if (AGGREGATE_TYPE_P (type)
12374 && TYPE_SIZE (type)
12375 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12376 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12377 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12380 if (TREE_CODE (type) == ARRAY_TYPE)
12382 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12384 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12387 else if (TREE_CODE (type) == COMPLEX_TYPE)
12389 if (TYPE_MODE (type) == DCmode && align < 64)
12391 if (TYPE_MODE (type) == XCmode && align < 128)
12394 else if ((TREE_CODE (type) == RECORD_TYPE
12395 || TREE_CODE (type) == UNION_TYPE
12396 || TREE_CODE (type) == QUAL_UNION_TYPE)
12397 && TYPE_FIELDS (type))
12399 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12401 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12404 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12405 || TREE_CODE (type) == INTEGER_TYPE)
12408 if (TYPE_MODE (type) == DFmode && align < 64)
12410 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12416 /* Emit RTL insns to initialize the variable parts of a trampoline.
12417 FNADDR is an RTX for the address of the function's pure code.
12418 CXT is an RTX for the static chain value for the function. */
12420 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12424 /* Compute offset from the end of the jmp to the target function. */
12425 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12426 plus_constant (tramp, 10),
12427 NULL_RTX, 1, OPTAB_DIRECT);
12428 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12429 gen_int_mode (0xb9, QImode));
12430 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12431 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12432 gen_int_mode (0xe9, QImode));
12433 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12438 /* Try to load address using shorter movl instead of movabs.
12439 We may want to support movq for kernel mode, but kernel does not use
12440 trampolines at the moment. */
12441 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12443 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12444 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12445 gen_int_mode (0xbb41, HImode));
12446 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12447 gen_lowpart (SImode, fnaddr));
12452 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12453 gen_int_mode (0xbb49, HImode));
12454 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12458 /* Load static chain using movabs to r10. */
12459 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12460 gen_int_mode (0xba49, HImode));
12461 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12464 /* Jump to the r11 */
12465 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12466 gen_int_mode (0xff49, HImode));
12467 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12468 gen_int_mode (0xe3, QImode));
12470 if (offset > TRAMPOLINE_SIZE)
12474 #ifdef ENABLE_EXECUTE_STACK
12475 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12476 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12480 /* Codes for all the SSE/MMX builtins. */
12483 IX86_BUILTIN_ADDPS,
12484 IX86_BUILTIN_ADDSS,
12485 IX86_BUILTIN_DIVPS,
12486 IX86_BUILTIN_DIVSS,
12487 IX86_BUILTIN_MULPS,
12488 IX86_BUILTIN_MULSS,
12489 IX86_BUILTIN_SUBPS,
12490 IX86_BUILTIN_SUBSS,
12492 IX86_BUILTIN_CMPEQPS,
12493 IX86_BUILTIN_CMPLTPS,
12494 IX86_BUILTIN_CMPLEPS,
12495 IX86_BUILTIN_CMPGTPS,
12496 IX86_BUILTIN_CMPGEPS,
12497 IX86_BUILTIN_CMPNEQPS,
12498 IX86_BUILTIN_CMPNLTPS,
12499 IX86_BUILTIN_CMPNLEPS,
12500 IX86_BUILTIN_CMPNGTPS,
12501 IX86_BUILTIN_CMPNGEPS,
12502 IX86_BUILTIN_CMPORDPS,
12503 IX86_BUILTIN_CMPUNORDPS,
12504 IX86_BUILTIN_CMPNEPS,
12505 IX86_BUILTIN_CMPEQSS,
12506 IX86_BUILTIN_CMPLTSS,
12507 IX86_BUILTIN_CMPLESS,
12508 IX86_BUILTIN_CMPNEQSS,
12509 IX86_BUILTIN_CMPNLTSS,
12510 IX86_BUILTIN_CMPNLESS,
12511 IX86_BUILTIN_CMPNGTSS,
12512 IX86_BUILTIN_CMPNGESS,
12513 IX86_BUILTIN_CMPORDSS,
12514 IX86_BUILTIN_CMPUNORDSS,
12515 IX86_BUILTIN_CMPNESS,
12517 IX86_BUILTIN_COMIEQSS,
12518 IX86_BUILTIN_COMILTSS,
12519 IX86_BUILTIN_COMILESS,
12520 IX86_BUILTIN_COMIGTSS,
12521 IX86_BUILTIN_COMIGESS,
12522 IX86_BUILTIN_COMINEQSS,
12523 IX86_BUILTIN_UCOMIEQSS,
12524 IX86_BUILTIN_UCOMILTSS,
12525 IX86_BUILTIN_UCOMILESS,
12526 IX86_BUILTIN_UCOMIGTSS,
12527 IX86_BUILTIN_UCOMIGESS,
12528 IX86_BUILTIN_UCOMINEQSS,
12530 IX86_BUILTIN_CVTPI2PS,
12531 IX86_BUILTIN_CVTPS2PI,
12532 IX86_BUILTIN_CVTSI2SS,
12533 IX86_BUILTIN_CVTSI642SS,
12534 IX86_BUILTIN_CVTSS2SI,
12535 IX86_BUILTIN_CVTSS2SI64,
12536 IX86_BUILTIN_CVTTPS2PI,
12537 IX86_BUILTIN_CVTTSS2SI,
12538 IX86_BUILTIN_CVTTSS2SI64,
12540 IX86_BUILTIN_MAXPS,
12541 IX86_BUILTIN_MAXSS,
12542 IX86_BUILTIN_MINPS,
12543 IX86_BUILTIN_MINSS,
12545 IX86_BUILTIN_LOADUPS,
12546 IX86_BUILTIN_STOREUPS,
12547 IX86_BUILTIN_MOVSS,
12549 IX86_BUILTIN_MOVHLPS,
12550 IX86_BUILTIN_MOVLHPS,
12551 IX86_BUILTIN_LOADHPS,
12552 IX86_BUILTIN_LOADLPS,
12553 IX86_BUILTIN_STOREHPS,
12554 IX86_BUILTIN_STORELPS,
12556 IX86_BUILTIN_MASKMOVQ,
12557 IX86_BUILTIN_MOVMSKPS,
12558 IX86_BUILTIN_PMOVMSKB,
12560 IX86_BUILTIN_MOVNTPS,
12561 IX86_BUILTIN_MOVNTQ,
12563 IX86_BUILTIN_LOADDQU,
12564 IX86_BUILTIN_STOREDQU,
12566 IX86_BUILTIN_PACKSSWB,
12567 IX86_BUILTIN_PACKSSDW,
12568 IX86_BUILTIN_PACKUSWB,
12570 IX86_BUILTIN_PADDB,
12571 IX86_BUILTIN_PADDW,
12572 IX86_BUILTIN_PADDD,
12573 IX86_BUILTIN_PADDQ,
12574 IX86_BUILTIN_PADDSB,
12575 IX86_BUILTIN_PADDSW,
12576 IX86_BUILTIN_PADDUSB,
12577 IX86_BUILTIN_PADDUSW,
12578 IX86_BUILTIN_PSUBB,
12579 IX86_BUILTIN_PSUBW,
12580 IX86_BUILTIN_PSUBD,
12581 IX86_BUILTIN_PSUBQ,
12582 IX86_BUILTIN_PSUBSB,
12583 IX86_BUILTIN_PSUBSW,
12584 IX86_BUILTIN_PSUBUSB,
12585 IX86_BUILTIN_PSUBUSW,
12588 IX86_BUILTIN_PANDN,
12592 IX86_BUILTIN_PAVGB,
12593 IX86_BUILTIN_PAVGW,
12595 IX86_BUILTIN_PCMPEQB,
12596 IX86_BUILTIN_PCMPEQW,
12597 IX86_BUILTIN_PCMPEQD,
12598 IX86_BUILTIN_PCMPGTB,
12599 IX86_BUILTIN_PCMPGTW,
12600 IX86_BUILTIN_PCMPGTD,
12602 IX86_BUILTIN_PMADDWD,
12604 IX86_BUILTIN_PMAXSW,
12605 IX86_BUILTIN_PMAXUB,
12606 IX86_BUILTIN_PMINSW,
12607 IX86_BUILTIN_PMINUB,
12609 IX86_BUILTIN_PMULHUW,
12610 IX86_BUILTIN_PMULHW,
12611 IX86_BUILTIN_PMULLW,
12613 IX86_BUILTIN_PSADBW,
12614 IX86_BUILTIN_PSHUFW,
12616 IX86_BUILTIN_PSLLW,
12617 IX86_BUILTIN_PSLLD,
12618 IX86_BUILTIN_PSLLQ,
12619 IX86_BUILTIN_PSRAW,
12620 IX86_BUILTIN_PSRAD,
12621 IX86_BUILTIN_PSRLW,
12622 IX86_BUILTIN_PSRLD,
12623 IX86_BUILTIN_PSRLQ,
12624 IX86_BUILTIN_PSLLWI,
12625 IX86_BUILTIN_PSLLDI,
12626 IX86_BUILTIN_PSLLQI,
12627 IX86_BUILTIN_PSRAWI,
12628 IX86_BUILTIN_PSRADI,
12629 IX86_BUILTIN_PSRLWI,
12630 IX86_BUILTIN_PSRLDI,
12631 IX86_BUILTIN_PSRLQI,
12633 IX86_BUILTIN_PUNPCKHBW,
12634 IX86_BUILTIN_PUNPCKHWD,
12635 IX86_BUILTIN_PUNPCKHDQ,
12636 IX86_BUILTIN_PUNPCKLBW,
12637 IX86_BUILTIN_PUNPCKLWD,
12638 IX86_BUILTIN_PUNPCKLDQ,
12640 IX86_BUILTIN_SHUFPS,
12642 IX86_BUILTIN_RCPPS,
12643 IX86_BUILTIN_RCPSS,
12644 IX86_BUILTIN_RSQRTPS,
12645 IX86_BUILTIN_RSQRTSS,
12646 IX86_BUILTIN_SQRTPS,
12647 IX86_BUILTIN_SQRTSS,
12649 IX86_BUILTIN_UNPCKHPS,
12650 IX86_BUILTIN_UNPCKLPS,
12652 IX86_BUILTIN_ANDPS,
12653 IX86_BUILTIN_ANDNPS,
12655 IX86_BUILTIN_XORPS,
12658 IX86_BUILTIN_LDMXCSR,
12659 IX86_BUILTIN_STMXCSR,
12660 IX86_BUILTIN_SFENCE,
12662 /* 3DNow! Original */
12663 IX86_BUILTIN_FEMMS,
12664 IX86_BUILTIN_PAVGUSB,
12665 IX86_BUILTIN_PF2ID,
12666 IX86_BUILTIN_PFACC,
12667 IX86_BUILTIN_PFADD,
12668 IX86_BUILTIN_PFCMPEQ,
12669 IX86_BUILTIN_PFCMPGE,
12670 IX86_BUILTIN_PFCMPGT,
12671 IX86_BUILTIN_PFMAX,
12672 IX86_BUILTIN_PFMIN,
12673 IX86_BUILTIN_PFMUL,
12674 IX86_BUILTIN_PFRCP,
12675 IX86_BUILTIN_PFRCPIT1,
12676 IX86_BUILTIN_PFRCPIT2,
12677 IX86_BUILTIN_PFRSQIT1,
12678 IX86_BUILTIN_PFRSQRT,
12679 IX86_BUILTIN_PFSUB,
12680 IX86_BUILTIN_PFSUBR,
12681 IX86_BUILTIN_PI2FD,
12682 IX86_BUILTIN_PMULHRW,
12684 /* 3DNow! Athlon Extensions */
12685 IX86_BUILTIN_PF2IW,
12686 IX86_BUILTIN_PFNACC,
12687 IX86_BUILTIN_PFPNACC,
12688 IX86_BUILTIN_PI2FW,
12689 IX86_BUILTIN_PSWAPDSI,
12690 IX86_BUILTIN_PSWAPDSF,
12693 IX86_BUILTIN_ADDPD,
12694 IX86_BUILTIN_ADDSD,
12695 IX86_BUILTIN_DIVPD,
12696 IX86_BUILTIN_DIVSD,
12697 IX86_BUILTIN_MULPD,
12698 IX86_BUILTIN_MULSD,
12699 IX86_BUILTIN_SUBPD,
12700 IX86_BUILTIN_SUBSD,
12702 IX86_BUILTIN_CMPEQPD,
12703 IX86_BUILTIN_CMPLTPD,
12704 IX86_BUILTIN_CMPLEPD,
12705 IX86_BUILTIN_CMPGTPD,
12706 IX86_BUILTIN_CMPGEPD,
12707 IX86_BUILTIN_CMPNEQPD,
12708 IX86_BUILTIN_CMPNLTPD,
12709 IX86_BUILTIN_CMPNLEPD,
12710 IX86_BUILTIN_CMPNGTPD,
12711 IX86_BUILTIN_CMPNGEPD,
12712 IX86_BUILTIN_CMPORDPD,
12713 IX86_BUILTIN_CMPUNORDPD,
12714 IX86_BUILTIN_CMPNEPD,
12715 IX86_BUILTIN_CMPEQSD,
12716 IX86_BUILTIN_CMPLTSD,
12717 IX86_BUILTIN_CMPLESD,
12718 IX86_BUILTIN_CMPNEQSD,
12719 IX86_BUILTIN_CMPNLTSD,
12720 IX86_BUILTIN_CMPNLESD,
12721 IX86_BUILTIN_CMPORDSD,
12722 IX86_BUILTIN_CMPUNORDSD,
12723 IX86_BUILTIN_CMPNESD,
12725 IX86_BUILTIN_COMIEQSD,
12726 IX86_BUILTIN_COMILTSD,
12727 IX86_BUILTIN_COMILESD,
12728 IX86_BUILTIN_COMIGTSD,
12729 IX86_BUILTIN_COMIGESD,
12730 IX86_BUILTIN_COMINEQSD,
12731 IX86_BUILTIN_UCOMIEQSD,
12732 IX86_BUILTIN_UCOMILTSD,
12733 IX86_BUILTIN_UCOMILESD,
12734 IX86_BUILTIN_UCOMIGTSD,
12735 IX86_BUILTIN_UCOMIGESD,
12736 IX86_BUILTIN_UCOMINEQSD,
12738 IX86_BUILTIN_MAXPD,
12739 IX86_BUILTIN_MAXSD,
12740 IX86_BUILTIN_MINPD,
12741 IX86_BUILTIN_MINSD,
12743 IX86_BUILTIN_ANDPD,
12744 IX86_BUILTIN_ANDNPD,
12746 IX86_BUILTIN_XORPD,
12748 IX86_BUILTIN_SQRTPD,
12749 IX86_BUILTIN_SQRTSD,
12751 IX86_BUILTIN_UNPCKHPD,
12752 IX86_BUILTIN_UNPCKLPD,
12754 IX86_BUILTIN_SHUFPD,
12756 IX86_BUILTIN_LOADUPD,
12757 IX86_BUILTIN_STOREUPD,
12758 IX86_BUILTIN_MOVSD,
12760 IX86_BUILTIN_LOADHPD,
12761 IX86_BUILTIN_LOADLPD,
12763 IX86_BUILTIN_CVTDQ2PD,
12764 IX86_BUILTIN_CVTDQ2PS,
12766 IX86_BUILTIN_CVTPD2DQ,
12767 IX86_BUILTIN_CVTPD2PI,
12768 IX86_BUILTIN_CVTPD2PS,
12769 IX86_BUILTIN_CVTTPD2DQ,
12770 IX86_BUILTIN_CVTTPD2PI,
12772 IX86_BUILTIN_CVTPI2PD,
12773 IX86_BUILTIN_CVTSI2SD,
12774 IX86_BUILTIN_CVTSI642SD,
12776 IX86_BUILTIN_CVTSD2SI,
12777 IX86_BUILTIN_CVTSD2SI64,
12778 IX86_BUILTIN_CVTSD2SS,
12779 IX86_BUILTIN_CVTSS2SD,
12780 IX86_BUILTIN_CVTTSD2SI,
12781 IX86_BUILTIN_CVTTSD2SI64,
12783 IX86_BUILTIN_CVTPS2DQ,
12784 IX86_BUILTIN_CVTPS2PD,
12785 IX86_BUILTIN_CVTTPS2DQ,
12787 IX86_BUILTIN_MOVNTI,
12788 IX86_BUILTIN_MOVNTPD,
12789 IX86_BUILTIN_MOVNTDQ,
12792 IX86_BUILTIN_MASKMOVDQU,
12793 IX86_BUILTIN_MOVMSKPD,
12794 IX86_BUILTIN_PMOVMSKB128,
12796 IX86_BUILTIN_PACKSSWB128,
12797 IX86_BUILTIN_PACKSSDW128,
12798 IX86_BUILTIN_PACKUSWB128,
12800 IX86_BUILTIN_PADDB128,
12801 IX86_BUILTIN_PADDW128,
12802 IX86_BUILTIN_PADDD128,
12803 IX86_BUILTIN_PADDQ128,
12804 IX86_BUILTIN_PADDSB128,
12805 IX86_BUILTIN_PADDSW128,
12806 IX86_BUILTIN_PADDUSB128,
12807 IX86_BUILTIN_PADDUSW128,
12808 IX86_BUILTIN_PSUBB128,
12809 IX86_BUILTIN_PSUBW128,
12810 IX86_BUILTIN_PSUBD128,
12811 IX86_BUILTIN_PSUBQ128,
12812 IX86_BUILTIN_PSUBSB128,
12813 IX86_BUILTIN_PSUBSW128,
12814 IX86_BUILTIN_PSUBUSB128,
12815 IX86_BUILTIN_PSUBUSW128,
12817 IX86_BUILTIN_PAND128,
12818 IX86_BUILTIN_PANDN128,
12819 IX86_BUILTIN_POR128,
12820 IX86_BUILTIN_PXOR128,
12822 IX86_BUILTIN_PAVGB128,
12823 IX86_BUILTIN_PAVGW128,
12825 IX86_BUILTIN_PCMPEQB128,
12826 IX86_BUILTIN_PCMPEQW128,
12827 IX86_BUILTIN_PCMPEQD128,
12828 IX86_BUILTIN_PCMPGTB128,
12829 IX86_BUILTIN_PCMPGTW128,
12830 IX86_BUILTIN_PCMPGTD128,
12832 IX86_BUILTIN_PMADDWD128,
12834 IX86_BUILTIN_PMAXSW128,
12835 IX86_BUILTIN_PMAXUB128,
12836 IX86_BUILTIN_PMINSW128,
12837 IX86_BUILTIN_PMINUB128,
12839 IX86_BUILTIN_PMULUDQ,
12840 IX86_BUILTIN_PMULUDQ128,
12841 IX86_BUILTIN_PMULHUW128,
12842 IX86_BUILTIN_PMULHW128,
12843 IX86_BUILTIN_PMULLW128,
12845 IX86_BUILTIN_PSADBW128,
12846 IX86_BUILTIN_PSHUFHW,
12847 IX86_BUILTIN_PSHUFLW,
12848 IX86_BUILTIN_PSHUFD,
12850 IX86_BUILTIN_PSLLW128,
12851 IX86_BUILTIN_PSLLD128,
12852 IX86_BUILTIN_PSLLQ128,
12853 IX86_BUILTIN_PSRAW128,
12854 IX86_BUILTIN_PSRAD128,
12855 IX86_BUILTIN_PSRLW128,
12856 IX86_BUILTIN_PSRLD128,
12857 IX86_BUILTIN_PSRLQ128,
12858 IX86_BUILTIN_PSLLDQI128,
12859 IX86_BUILTIN_PSLLWI128,
12860 IX86_BUILTIN_PSLLDI128,
12861 IX86_BUILTIN_PSLLQI128,
12862 IX86_BUILTIN_PSRAWI128,
12863 IX86_BUILTIN_PSRADI128,
12864 IX86_BUILTIN_PSRLDQI128,
12865 IX86_BUILTIN_PSRLWI128,
12866 IX86_BUILTIN_PSRLDI128,
12867 IX86_BUILTIN_PSRLQI128,
12869 IX86_BUILTIN_PUNPCKHBW128,
12870 IX86_BUILTIN_PUNPCKHWD128,
12871 IX86_BUILTIN_PUNPCKHDQ128,
12872 IX86_BUILTIN_PUNPCKHQDQ128,
12873 IX86_BUILTIN_PUNPCKLBW128,
12874 IX86_BUILTIN_PUNPCKLWD128,
12875 IX86_BUILTIN_PUNPCKLDQ128,
12876 IX86_BUILTIN_PUNPCKLQDQ128,
12878 IX86_BUILTIN_CLFLUSH,
12879 IX86_BUILTIN_MFENCE,
12880 IX86_BUILTIN_LFENCE,
12882 /* Prescott New Instructions. */
12883 IX86_BUILTIN_ADDSUBPS,
12884 IX86_BUILTIN_HADDPS,
12885 IX86_BUILTIN_HSUBPS,
12886 IX86_BUILTIN_MOVSHDUP,
12887 IX86_BUILTIN_MOVSLDUP,
12888 IX86_BUILTIN_ADDSUBPD,
12889 IX86_BUILTIN_HADDPD,
12890 IX86_BUILTIN_HSUBPD,
12891 IX86_BUILTIN_LDDQU,
12893 IX86_BUILTIN_MONITOR,
12894 IX86_BUILTIN_MWAIT,
12896 IX86_BUILTIN_VEC_INIT_V2SI,
12897 IX86_BUILTIN_VEC_INIT_V4HI,
12898 IX86_BUILTIN_VEC_INIT_V8QI,
12899 IX86_BUILTIN_VEC_EXT_V2DF,
12900 IX86_BUILTIN_VEC_EXT_V2DI,
12901 IX86_BUILTIN_VEC_EXT_V4SF,
12902 IX86_BUILTIN_VEC_EXT_V4SI,
12903 IX86_BUILTIN_VEC_EXT_V8HI,
12904 IX86_BUILTIN_VEC_EXT_V2SI,
12905 IX86_BUILTIN_VEC_EXT_V4HI,
12906 IX86_BUILTIN_VEC_SET_V8HI,
12907 IX86_BUILTIN_VEC_SET_V4HI,
12912 #define def_builtin(MASK, NAME, TYPE, CODE) \
12914 if ((MASK) & target_flags \
12915 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12916 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12917 NULL, NULL_TREE); \
12920 /* Bits for builtin_description.flag. */
12922 /* Set when we don't support the comparison natively, and should
12923 swap_comparison in order to support it. */
12924 #define BUILTIN_DESC_SWAP_OPERANDS 1
12926 struct builtin_description
12928 const unsigned int mask;
12929 const enum insn_code icode;
12930 const char *const name;
12931 const enum ix86_builtins code;
12932 const enum rtx_code comparison;
12933 const unsigned int flag;
12936 static const struct builtin_description bdesc_comi[] =
12938 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12939 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12940 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12941 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12942 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12943 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12944 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12945 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12946 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12947 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12948 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12949 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12950 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12951 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12952 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12953 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12954 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12955 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12956 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12957 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12958 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12959 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12964 static const struct builtin_description bdesc_2arg[] =
12967 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12968 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12969 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12970 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12971 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12972 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12973 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12974 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12976 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12977 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12978 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12979 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
12980 BUILTIN_DESC_SWAP_OPERANDS },
12981 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
12982 BUILTIN_DESC_SWAP_OPERANDS },
12983 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12984 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12985 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12986 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12987 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
12988 BUILTIN_DESC_SWAP_OPERANDS },
12989 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
12990 BUILTIN_DESC_SWAP_OPERANDS },
12991 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12992 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12993 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12994 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12995 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12996 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12997 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12998 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12999 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13000 BUILTIN_DESC_SWAP_OPERANDS },
13001 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13002 BUILTIN_DESC_SWAP_OPERANDS },
13003 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13005 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13006 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13007 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13008 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13010 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13011 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13012 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13013 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13015 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13016 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13017 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13018 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13019 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13022 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13023 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13024 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13025 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13026 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13027 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13028 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13029 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13031 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13032 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13033 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13034 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13035 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13036 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13037 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13038 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13040 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13041 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13042 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13044 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13045 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13046 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13047 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13049 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13050 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13052 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13053 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13054 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13055 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13056 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13057 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13059 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13060 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13061 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13062 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13064 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13065 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13066 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13067 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13068 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13069 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13072 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13073 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13074 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13076 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13077 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13078 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13080 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13081 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13082 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13083 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13084 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13085 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13087 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13088 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13089 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13090 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13091 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13092 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13094 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13095 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13096 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13097 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13099 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13100 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13104 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13113 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13114 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13115 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13116 BUILTIN_DESC_SWAP_OPERANDS },
13117 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13118 BUILTIN_DESC_SWAP_OPERANDS },
13119 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13120 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13121 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13122 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13124 BUILTIN_DESC_SWAP_OPERANDS },
13125 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13126 BUILTIN_DESC_SWAP_OPERANDS },
13127 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13128 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13129 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13130 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13131 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13132 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13133 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13134 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13135 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13137 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13139 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13140 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13142 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13143 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13144 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13145 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13147 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13148 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13149 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13152 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13153 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13154 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13155 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13156 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13157 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13158 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13159 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13161 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13162 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13163 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13164 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13165 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13166 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13167 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13168 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13170 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13171 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13173 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13176 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13178 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13179 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13181 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13182 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13188 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13189 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13190 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13191 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13193 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13194 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13195 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13196 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13197 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13198 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13199 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13200 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13202 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13203 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13204 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13206 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13207 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13209 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13210 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13212 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13213 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13214 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13216 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13217 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13218 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13220 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13221 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13223 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13225 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13226 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13227 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13228 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13231 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13232 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13233 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13234 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13235 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13236 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13239 static const struct builtin_description bdesc_1arg[] =
13241 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13242 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13244 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13245 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13246 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13248 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13249 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13250 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13251 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13252 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13253 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13255 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13256 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13258 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13260 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13261 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13263 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13264 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13265 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13266 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13267 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13269 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13271 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13272 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13273 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13274 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13276 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13277 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13278 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13281 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13282 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13286 ix86_init_builtins (void)
13289 ix86_init_mmx_sse_builtins ();
13292 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13293 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13296 ix86_init_mmx_sse_builtins (void)
13298 const struct builtin_description * d;
13301 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13302 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13303 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13304 tree V2DI_type_node
13305 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13306 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13307 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13308 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13309 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13310 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13311 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13313 tree pchar_type_node = build_pointer_type (char_type_node);
13314 tree pcchar_type_node = build_pointer_type (
13315 build_type_variant (char_type_node, 1, 0));
13316 tree pfloat_type_node = build_pointer_type (float_type_node);
13317 tree pcfloat_type_node = build_pointer_type (
13318 build_type_variant (float_type_node, 1, 0));
13319 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13320 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13321 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13324 tree int_ftype_v4sf_v4sf
13325 = build_function_type_list (integer_type_node,
13326 V4SF_type_node, V4SF_type_node, NULL_TREE);
13327 tree v4si_ftype_v4sf_v4sf
13328 = build_function_type_list (V4SI_type_node,
13329 V4SF_type_node, V4SF_type_node, NULL_TREE);
13330 /* MMX/SSE/integer conversions. */
13331 tree int_ftype_v4sf
13332 = build_function_type_list (integer_type_node,
13333 V4SF_type_node, NULL_TREE);
13334 tree int64_ftype_v4sf
13335 = build_function_type_list (long_long_integer_type_node,
13336 V4SF_type_node, NULL_TREE);
13337 tree int_ftype_v8qi
13338 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13339 tree v4sf_ftype_v4sf_int
13340 = build_function_type_list (V4SF_type_node,
13341 V4SF_type_node, integer_type_node, NULL_TREE);
13342 tree v4sf_ftype_v4sf_int64
13343 = build_function_type_list (V4SF_type_node,
13344 V4SF_type_node, long_long_integer_type_node,
13346 tree v4sf_ftype_v4sf_v2si
13347 = build_function_type_list (V4SF_type_node,
13348 V4SF_type_node, V2SI_type_node, NULL_TREE);
13350 /* Miscellaneous. */
13351 tree v8qi_ftype_v4hi_v4hi
13352 = build_function_type_list (V8QI_type_node,
13353 V4HI_type_node, V4HI_type_node, NULL_TREE);
13354 tree v4hi_ftype_v2si_v2si
13355 = build_function_type_list (V4HI_type_node,
13356 V2SI_type_node, V2SI_type_node, NULL_TREE);
13357 tree v4sf_ftype_v4sf_v4sf_int
13358 = build_function_type_list (V4SF_type_node,
13359 V4SF_type_node, V4SF_type_node,
13360 integer_type_node, NULL_TREE);
13361 tree v2si_ftype_v4hi_v4hi
13362 = build_function_type_list (V2SI_type_node,
13363 V4HI_type_node, V4HI_type_node, NULL_TREE);
13364 tree v4hi_ftype_v4hi_int
13365 = build_function_type_list (V4HI_type_node,
13366 V4HI_type_node, integer_type_node, NULL_TREE);
13367 tree v4hi_ftype_v4hi_di
13368 = build_function_type_list (V4HI_type_node,
13369 V4HI_type_node, long_long_unsigned_type_node,
13371 tree v2si_ftype_v2si_di
13372 = build_function_type_list (V2SI_type_node,
13373 V2SI_type_node, long_long_unsigned_type_node,
13375 tree void_ftype_void
13376 = build_function_type (void_type_node, void_list_node);
13377 tree void_ftype_unsigned
13378 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13379 tree void_ftype_unsigned_unsigned
13380 = build_function_type_list (void_type_node, unsigned_type_node,
13381 unsigned_type_node, NULL_TREE);
13382 tree void_ftype_pcvoid_unsigned_unsigned
13383 = build_function_type_list (void_type_node, const_ptr_type_node,
13384 unsigned_type_node, unsigned_type_node,
13386 tree unsigned_ftype_void
13387 = build_function_type (unsigned_type_node, void_list_node);
13388 tree v2si_ftype_v4sf
13389 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13390 /* Loads/stores. */
13391 tree void_ftype_v8qi_v8qi_pchar
13392 = build_function_type_list (void_type_node,
13393 V8QI_type_node, V8QI_type_node,
13394 pchar_type_node, NULL_TREE);
13395 tree v4sf_ftype_pcfloat
13396 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13397 /* @@@ the type is bogus */
13398 tree v4sf_ftype_v4sf_pv2si
13399 = build_function_type_list (V4SF_type_node,
13400 V4SF_type_node, pv2si_type_node, NULL_TREE);
13401 tree void_ftype_pv2si_v4sf
13402 = build_function_type_list (void_type_node,
13403 pv2si_type_node, V4SF_type_node, NULL_TREE);
13404 tree void_ftype_pfloat_v4sf
13405 = build_function_type_list (void_type_node,
13406 pfloat_type_node, V4SF_type_node, NULL_TREE);
13407 tree void_ftype_pdi_di
13408 = build_function_type_list (void_type_node,
13409 pdi_type_node, long_long_unsigned_type_node,
13411 tree void_ftype_pv2di_v2di
13412 = build_function_type_list (void_type_node,
13413 pv2di_type_node, V2DI_type_node, NULL_TREE);
13414 /* Normal vector unops. */
13415 tree v4sf_ftype_v4sf
13416 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13418 /* Normal vector binops. */
13419 tree v4sf_ftype_v4sf_v4sf
13420 = build_function_type_list (V4SF_type_node,
13421 V4SF_type_node, V4SF_type_node, NULL_TREE);
13422 tree v8qi_ftype_v8qi_v8qi
13423 = build_function_type_list (V8QI_type_node,
13424 V8QI_type_node, V8QI_type_node, NULL_TREE);
13425 tree v4hi_ftype_v4hi_v4hi
13426 = build_function_type_list (V4HI_type_node,
13427 V4HI_type_node, V4HI_type_node, NULL_TREE);
13428 tree v2si_ftype_v2si_v2si
13429 = build_function_type_list (V2SI_type_node,
13430 V2SI_type_node, V2SI_type_node, NULL_TREE);
13431 tree di_ftype_di_di
13432 = build_function_type_list (long_long_unsigned_type_node,
13433 long_long_unsigned_type_node,
13434 long_long_unsigned_type_node, NULL_TREE);
13436 tree v2si_ftype_v2sf
13437 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13438 tree v2sf_ftype_v2si
13439 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13440 tree v2si_ftype_v2si
13441 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13442 tree v2sf_ftype_v2sf
13443 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13444 tree v2sf_ftype_v2sf_v2sf
13445 = build_function_type_list (V2SF_type_node,
13446 V2SF_type_node, V2SF_type_node, NULL_TREE);
13447 tree v2si_ftype_v2sf_v2sf
13448 = build_function_type_list (V2SI_type_node,
13449 V2SF_type_node, V2SF_type_node, NULL_TREE);
13450 tree pint_type_node = build_pointer_type (integer_type_node);
13451 tree pdouble_type_node = build_pointer_type (double_type_node);
13452 tree pcdouble_type_node = build_pointer_type (
13453 build_type_variant (double_type_node, 1, 0));
13454 tree int_ftype_v2df_v2df
13455 = build_function_type_list (integer_type_node,
13456 V2DF_type_node, V2DF_type_node, NULL_TREE);
13458 tree ti_ftype_ti_ti
13459 = build_function_type_list (intTI_type_node,
13460 intTI_type_node, intTI_type_node, NULL_TREE);
13461 tree void_ftype_pcvoid
13462 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13463 tree v4sf_ftype_v4si
13464 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13465 tree v4si_ftype_v4sf
13466 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13467 tree v2df_ftype_v4si
13468 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13469 tree v4si_ftype_v2df
13470 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13471 tree v2si_ftype_v2df
13472 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13473 tree v4sf_ftype_v2df
13474 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13475 tree v2df_ftype_v2si
13476 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13477 tree v2df_ftype_v4sf
13478 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13479 tree int_ftype_v2df
13480 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13481 tree int64_ftype_v2df
13482 = build_function_type_list (long_long_integer_type_node,
13483 V2DF_type_node, NULL_TREE);
13484 tree v2df_ftype_v2df_int
13485 = build_function_type_list (V2DF_type_node,
13486 V2DF_type_node, integer_type_node, NULL_TREE);
13487 tree v2df_ftype_v2df_int64
13488 = build_function_type_list (V2DF_type_node,
13489 V2DF_type_node, long_long_integer_type_node,
13491 tree v4sf_ftype_v4sf_v2df
13492 = build_function_type_list (V4SF_type_node,
13493 V4SF_type_node, V2DF_type_node, NULL_TREE);
13494 tree v2df_ftype_v2df_v4sf
13495 = build_function_type_list (V2DF_type_node,
13496 V2DF_type_node, V4SF_type_node, NULL_TREE);
13497 tree v2df_ftype_v2df_v2df_int
13498 = build_function_type_list (V2DF_type_node,
13499 V2DF_type_node, V2DF_type_node,
13502 tree v2df_ftype_v2df_pcdouble
13503 = build_function_type_list (V2DF_type_node,
13504 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13505 tree void_ftype_pdouble_v2df
13506 = build_function_type_list (void_type_node,
13507 pdouble_type_node, V2DF_type_node, NULL_TREE);
13508 tree void_ftype_pint_int
13509 = build_function_type_list (void_type_node,
13510 pint_type_node, integer_type_node, NULL_TREE);
13511 tree void_ftype_v16qi_v16qi_pchar
13512 = build_function_type_list (void_type_node,
13513 V16QI_type_node, V16QI_type_node,
13514 pchar_type_node, NULL_TREE);
13515 tree v2df_ftype_pcdouble
13516 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13517 tree v2df_ftype_v2df_v2df
13518 = build_function_type_list (V2DF_type_node,
13519 V2DF_type_node, V2DF_type_node, NULL_TREE);
13520 tree v16qi_ftype_v16qi_v16qi
13521 = build_function_type_list (V16QI_type_node,
13522 V16QI_type_node, V16QI_type_node, NULL_TREE);
13523 tree v8hi_ftype_v8hi_v8hi
13524 = build_function_type_list (V8HI_type_node,
13525 V8HI_type_node, V8HI_type_node, NULL_TREE);
13526 tree v4si_ftype_v4si_v4si
13527 = build_function_type_list (V4SI_type_node,
13528 V4SI_type_node, V4SI_type_node, NULL_TREE);
13529 tree v2di_ftype_v2di_v2di
13530 = build_function_type_list (V2DI_type_node,
13531 V2DI_type_node, V2DI_type_node, NULL_TREE);
13532 tree v2di_ftype_v2df_v2df
13533 = build_function_type_list (V2DI_type_node,
13534 V2DF_type_node, V2DF_type_node, NULL_TREE);
13535 tree v2df_ftype_v2df
13536 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13537 tree v2di_ftype_v2di_int
13538 = build_function_type_list (V2DI_type_node,
13539 V2DI_type_node, integer_type_node, NULL_TREE);
13540 tree v4si_ftype_v4si_int
13541 = build_function_type_list (V4SI_type_node,
13542 V4SI_type_node, integer_type_node, NULL_TREE);
13543 tree v8hi_ftype_v8hi_int
13544 = build_function_type_list (V8HI_type_node,
13545 V8HI_type_node, integer_type_node, NULL_TREE);
13546 tree v8hi_ftype_v8hi_v2di
13547 = build_function_type_list (V8HI_type_node,
13548 V8HI_type_node, V2DI_type_node, NULL_TREE);
13549 tree v4si_ftype_v4si_v2di
13550 = build_function_type_list (V4SI_type_node,
13551 V4SI_type_node, V2DI_type_node, NULL_TREE);
13552 tree v4si_ftype_v8hi_v8hi
13553 = build_function_type_list (V4SI_type_node,
13554 V8HI_type_node, V8HI_type_node, NULL_TREE);
13555 tree di_ftype_v8qi_v8qi
13556 = build_function_type_list (long_long_unsigned_type_node,
13557 V8QI_type_node, V8QI_type_node, NULL_TREE);
13558 tree di_ftype_v2si_v2si
13559 = build_function_type_list (long_long_unsigned_type_node,
13560 V2SI_type_node, V2SI_type_node, NULL_TREE);
13561 tree v2di_ftype_v16qi_v16qi
13562 = build_function_type_list (V2DI_type_node,
13563 V16QI_type_node, V16QI_type_node, NULL_TREE);
13564 tree v2di_ftype_v4si_v4si
13565 = build_function_type_list (V2DI_type_node,
13566 V4SI_type_node, V4SI_type_node, NULL_TREE);
13567 tree int_ftype_v16qi
13568 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13569 tree v16qi_ftype_pcchar
13570 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13571 tree void_ftype_pchar_v16qi
13572 = build_function_type_list (void_type_node,
13573 pchar_type_node, V16QI_type_node, NULL_TREE);
13576 tree float128_type;
13579 /* The __float80 type. */
13580 if (TYPE_MODE (long_double_type_node) == XFmode)
13581 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13585 /* The __float80 type. */
13586 float80_type = make_node (REAL_TYPE);
13587 TYPE_PRECISION (float80_type) = 80;
13588 layout_type (float80_type);
13589 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13592 float128_type = make_node (REAL_TYPE);
13593 TYPE_PRECISION (float128_type) = 128;
13594 layout_type (float128_type);
13595 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13597 /* Add all builtins that are more or less simple operations on two
13599 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13601 /* Use one of the operands; the target can have a different mode for
13602 mask-generating compares. */
13603 enum machine_mode mode;
13608 mode = insn_data[d->icode].operand[1].mode;
13613 type = v16qi_ftype_v16qi_v16qi;
13616 type = v8hi_ftype_v8hi_v8hi;
13619 type = v4si_ftype_v4si_v4si;
13622 type = v2di_ftype_v2di_v2di;
13625 type = v2df_ftype_v2df_v2df;
13628 type = ti_ftype_ti_ti;
13631 type = v4sf_ftype_v4sf_v4sf;
13634 type = v8qi_ftype_v8qi_v8qi;
13637 type = v4hi_ftype_v4hi_v4hi;
13640 type = v2si_ftype_v2si_v2si;
13643 type = di_ftype_di_di;
13650 /* Override for comparisons. */
13651 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13652 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13653 type = v4si_ftype_v4sf_v4sf;
13655 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13656 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13657 type = v2di_ftype_v2df_v2df;
13659 def_builtin (d->mask, d->name, type, d->code);
13662 /* Add the remaining MMX insns with somewhat more complicated types. */
13663 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13664 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13665 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13666 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13668 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13669 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13670 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13672 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13673 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13675 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13676 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13678 /* comi/ucomi insns. */
13679 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13680 if (d->mask == MASK_SSE2)
13681 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13683 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13685 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13686 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13687 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13689 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13690 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13691 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13692 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13693 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13694 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13695 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13696 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13697 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13698 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13699 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13701 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13703 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13704 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13706 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13707 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13708 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13709 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13711 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13712 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13713 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13714 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13716 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13718 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13720 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13721 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13722 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13723 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13724 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13725 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13727 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13729 /* Original 3DNow! */
13730 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13731 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13732 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13733 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13734 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13735 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13736 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13737 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13738 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13739 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13740 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13741 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13742 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13743 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13744 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13745 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13746 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13747 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13748 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13749 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13751 /* 3DNow! extension as used in the Athlon CPU. */
13752 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13753 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13754 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13755 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13756 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13757 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13765 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13766 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13769 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13770 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13771 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13772 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13774 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13775 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13776 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13777 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13779 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13780 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13782 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13784 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13785 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13787 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13788 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13789 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13790 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13793 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13795 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13796 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13797 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13798 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13800 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13801 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13802 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13804 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13805 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13806 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13807 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13809 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13810 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13811 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13813 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13814 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13817 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13819 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13821 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13823 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13824 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13825 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13827 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13828 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13830 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13831 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13832 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13833 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13835 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13836 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13837 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13838 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13840 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13841 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13843 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13845 /* Prescott New Instructions. */
13846 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13847 void_ftype_pcvoid_unsigned_unsigned,
13848 IX86_BUILTIN_MONITOR);
13849 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13850 void_ftype_unsigned_unsigned,
13851 IX86_BUILTIN_MWAIT);
13852 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13854 IX86_BUILTIN_MOVSHDUP);
13855 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13857 IX86_BUILTIN_MOVSLDUP);
13858 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13859 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13861 /* Access to the vec_init patterns. */
13862 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13863 integer_type_node, NULL_TREE);
13864 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13865 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13867 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13868 short_integer_type_node,
13869 short_integer_type_node,
13870 short_integer_type_node, NULL_TREE);
13871 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13872 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13874 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13875 char_type_node, char_type_node,
13876 char_type_node, char_type_node,
13877 char_type_node, char_type_node,
13878 char_type_node, NULL_TREE);
13879 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13880 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13882 /* Access to the vec_extract patterns. */
13883 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13884 integer_type_node, NULL_TREE);
13885 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13886 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13888 ftype = build_function_type_list (long_long_integer_type_node,
13889 V2DI_type_node, integer_type_node,
13891 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13892 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13894 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13895 integer_type_node, NULL_TREE);
13896 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13897 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13899 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13900 integer_type_node, NULL_TREE);
13901 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13902 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13904 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13905 integer_type_node, NULL_TREE);
13906 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13907 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13909 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13910 integer_type_node, NULL_TREE);
13911 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13912 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13914 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
13915 integer_type_node, NULL_TREE);
13916 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
13917 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
13919 /* Access to the vec_set patterns. */
13920 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13922 integer_type_node, NULL_TREE);
13923 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13924 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13926 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13928 integer_type_node, NULL_TREE);
13929 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13930 ftype, IX86_BUILTIN_VEC_SET_V4HI);
13933 /* Errors in the source file can cause expand_expr to return const0_rtx
13934 where we expect a vector. To avoid crashing, use one of the vector
13935 clear instructions. */
13937 safe_vector_operand (rtx x, enum machine_mode mode)
13939 if (x == const0_rtx)
13940 x = CONST0_RTX (mode);
13944 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13947 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13950 tree arg0 = TREE_VALUE (arglist);
13951 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13952 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13953 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13954 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13955 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13956 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13958 if (VECTOR_MODE_P (mode0))
13959 op0 = safe_vector_operand (op0, mode0);
13960 if (VECTOR_MODE_P (mode1))
13961 op1 = safe_vector_operand (op1, mode1);
13963 if (optimize || !target
13964 || GET_MODE (target) != tmode
13965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13966 target = gen_reg_rtx (tmode);
13968 if (GET_MODE (op1) == SImode && mode1 == TImode)
13970 rtx x = gen_reg_rtx (V4SImode);
13971 emit_insn (gen_sse2_loadd (x, op1));
13972 op1 = gen_lowpart (TImode, x);
13975 /* In case the insn wants input operands in modes different from
13976 the result, abort. */
13977 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13978 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13981 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13982 op0 = copy_to_mode_reg (mode0, op0);
13983 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13984 op1 = copy_to_mode_reg (mode1, op1);
13986 /* ??? Using ix86_fixup_binary_operands is problematic when
13987 we've got mismatched modes. Fake it. */
13993 if (tmode == mode0 && tmode == mode1)
13995 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
13999 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14001 op0 = force_reg (mode0, op0);
14002 op1 = force_reg (mode1, op1);
14003 target = gen_reg_rtx (tmode);
14006 pat = GEN_FCN (icode) (target, op0, op1);
14013 /* Subroutine of ix86_expand_builtin to take care of stores. */
14016 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14019 tree arg0 = TREE_VALUE (arglist);
14020 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14021 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14022 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14023 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14024 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14026 if (VECTOR_MODE_P (mode1))
14027 op1 = safe_vector_operand (op1, mode1);
14029 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14030 op1 = copy_to_mode_reg (mode1, op1);
14032 pat = GEN_FCN (icode) (op0, op1);
14038 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14041 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14042 rtx target, int do_load)
14045 tree arg0 = TREE_VALUE (arglist);
14046 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14047 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14048 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14050 if (optimize || !target
14051 || GET_MODE (target) != tmode
14052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14053 target = gen_reg_rtx (tmode);
14055 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14058 if (VECTOR_MODE_P (mode0))
14059 op0 = safe_vector_operand (op0, mode0);
14061 if ((optimize && !register_operand (op0, mode0))
14062 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14063 op0 = copy_to_mode_reg (mode0, op0);
14066 pat = GEN_FCN (icode) (target, op0);
14073 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14074 sqrtss, rsqrtss, rcpss. */
14077 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14080 tree arg0 = TREE_VALUE (arglist);
14081 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14082 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14083 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14085 if (optimize || !target
14086 || GET_MODE (target) != tmode
14087 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14088 target = gen_reg_rtx (tmode);
14090 if (VECTOR_MODE_P (mode0))
14091 op0 = safe_vector_operand (op0, mode0);
14093 if ((optimize && !register_operand (op0, mode0))
14094 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14095 op0 = copy_to_mode_reg (mode0, op0);
14098 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14099 op1 = copy_to_mode_reg (mode0, op1);
14101 pat = GEN_FCN (icode) (target, op0, op1);
14108 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14111 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14115 tree arg0 = TREE_VALUE (arglist);
14116 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14117 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14118 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14120 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14121 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14122 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14123 enum rtx_code comparison = d->comparison;
14125 if (VECTOR_MODE_P (mode0))
14126 op0 = safe_vector_operand (op0, mode0);
14127 if (VECTOR_MODE_P (mode1))
14128 op1 = safe_vector_operand (op1, mode1);
14130 /* Swap operands if we have a comparison that isn't available in
14132 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14134 rtx tmp = gen_reg_rtx (mode1);
14135 emit_move_insn (tmp, op1);
14140 if (optimize || !target
14141 || GET_MODE (target) != tmode
14142 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14143 target = gen_reg_rtx (tmode);
14145 if ((optimize && !register_operand (op0, mode0))
14146 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14147 op0 = copy_to_mode_reg (mode0, op0);
14148 if ((optimize && !register_operand (op1, mode1))
14149 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14150 op1 = copy_to_mode_reg (mode1, op1);
14152 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14153 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14160 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14163 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14167 tree arg0 = TREE_VALUE (arglist);
14168 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14169 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14170 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14172 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14173 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14174 enum rtx_code comparison = d->comparison;
14176 if (VECTOR_MODE_P (mode0))
14177 op0 = safe_vector_operand (op0, mode0);
14178 if (VECTOR_MODE_P (mode1))
14179 op1 = safe_vector_operand (op1, mode1);
14181 /* Swap operands if we have a comparison that isn't available in
14183 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14190 target = gen_reg_rtx (SImode);
14191 emit_move_insn (target, const0_rtx);
14192 target = gen_rtx_SUBREG (QImode, target, 0);
14194 if ((optimize && !register_operand (op0, mode0))
14195 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14196 op0 = copy_to_mode_reg (mode0, op0);
14197 if ((optimize && !register_operand (op1, mode1))
14198 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14199 op1 = copy_to_mode_reg (mode1, op1);
14201 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14202 pat = GEN_FCN (d->icode) (op0, op1);
14206 emit_insn (gen_rtx_SET (VOIDmode,
14207 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14208 gen_rtx_fmt_ee (comparison, QImode,
14212 return SUBREG_REG (target);
14215 /* Return the integer constant in ARG. Constrain it to be in the range
14216 of the subparts of VEC_TYPE; issue an error if not. */
14219 get_element_number (tree vec_type, tree arg)
14221 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14223 if (!host_integerp (arg, 1)
14224 || (elt = tree_low_cst (arg, 1), elt > max))
14226 error ("selector must be an integer constant in the range 0..%i", max);
14233 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14234 ix86_expand_vector_init. We DO have language-level syntax for this, in
14235 the form of (type){ init-list }. Except that since we can't place emms
14236 instructions from inside the compiler, we can't allow the use of MMX
14237 registers unless the user explicitly asks for it. So we do *not* define
14238 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14239 we have builtins invoked by mmintrin.h that gives us license to emit
14240 these sorts of instructions. */
14243 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14245 enum machine_mode tmode = TYPE_MODE (type);
14246 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14247 int i, n_elt = GET_MODE_NUNITS (tmode);
14248 rtvec v = rtvec_alloc (n_elt);
14250 gcc_assert (VECTOR_MODE_P (tmode));
14252 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14254 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14255 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14258 gcc_assert (arglist == NULL);
14260 if (!target || !register_operand (target, tmode))
14261 target = gen_reg_rtx (tmode);
14263 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14267 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14268 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14269 had a language-level syntax for referencing vector elements. */
14272 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14274 enum machine_mode tmode, mode0;
14279 arg0 = TREE_VALUE (arglist);
14280 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14282 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14283 elt = get_element_number (TREE_TYPE (arg0), arg1);
14285 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14286 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14287 gcc_assert (VECTOR_MODE_P (mode0));
14289 op0 = force_reg (mode0, op0);
14291 if (optimize || !target || !register_operand (target, tmode))
14292 target = gen_reg_rtx (tmode);
14294 ix86_expand_vector_extract (true, target, op0, elt);
14299 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14300 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14301 a language-level syntax for referencing vector elements. */
14304 ix86_expand_vec_set_builtin (tree arglist)
14306 enum machine_mode tmode, mode1;
14307 tree arg0, arg1, arg2;
14311 arg0 = TREE_VALUE (arglist);
14312 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14313 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14315 tmode = TYPE_MODE (TREE_TYPE (arg0));
14316 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14317 gcc_assert (VECTOR_MODE_P (tmode));
14319 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14320 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14321 elt = get_element_number (TREE_TYPE (arg0), arg2);
14323 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14324 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14326 op0 = force_reg (tmode, op0);
14327 op1 = force_reg (mode1, op1);
14329 ix86_expand_vector_set (true, op0, op1, elt);
14334 /* Expand an expression EXP that calls a built-in function,
14335 with result going to TARGET if that's convenient
14336 (and in mode MODE if that's convenient).
14337 SUBTARGET may be used as the target for computing one of EXP's operands.
14338 IGNORE is nonzero if the value is to be ignored. */
14341 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14342 enum machine_mode mode ATTRIBUTE_UNUSED,
14343 int ignore ATTRIBUTE_UNUSED)
14345 const struct builtin_description *d;
14347 enum insn_code icode;
14348 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14349 tree arglist = TREE_OPERAND (exp, 1);
14350 tree arg0, arg1, arg2;
14351 rtx op0, op1, op2, pat;
14352 enum machine_mode tmode, mode0, mode1, mode2;
14353 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14357 case IX86_BUILTIN_EMMS:
14358 emit_insn (gen_mmx_emms ());
14361 case IX86_BUILTIN_SFENCE:
14362 emit_insn (gen_sse_sfence ());
14365 case IX86_BUILTIN_MASKMOVQ:
14366 case IX86_BUILTIN_MASKMOVDQU:
14367 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14368 ? CODE_FOR_mmx_maskmovq
14369 : CODE_FOR_sse2_maskmovdqu);
14370 /* Note the arg order is different from the operand order. */
14371 arg1 = TREE_VALUE (arglist);
14372 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14373 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14374 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14375 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14376 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14377 mode0 = insn_data[icode].operand[0].mode;
14378 mode1 = insn_data[icode].operand[1].mode;
14379 mode2 = insn_data[icode].operand[2].mode;
14381 op0 = force_reg (Pmode, op0);
14382 op0 = gen_rtx_MEM (mode1, op0);
14384 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14385 op0 = copy_to_mode_reg (mode0, op0);
14386 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14387 op1 = copy_to_mode_reg (mode1, op1);
14388 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14389 op2 = copy_to_mode_reg (mode2, op2);
14390 pat = GEN_FCN (icode) (op0, op1, op2);
14396 case IX86_BUILTIN_SQRTSS:
14397 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14398 case IX86_BUILTIN_RSQRTSS:
14399 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14400 case IX86_BUILTIN_RCPSS:
14401 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14403 case IX86_BUILTIN_LOADUPS:
14404 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14406 case IX86_BUILTIN_STOREUPS:
14407 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14409 case IX86_BUILTIN_LOADHPS:
14410 case IX86_BUILTIN_LOADLPS:
14411 case IX86_BUILTIN_LOADHPD:
14412 case IX86_BUILTIN_LOADLPD:
14413 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14414 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14415 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14416 : CODE_FOR_sse2_loadlpd);
14417 arg0 = TREE_VALUE (arglist);
14418 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14419 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14420 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14421 tmode = insn_data[icode].operand[0].mode;
14422 mode0 = insn_data[icode].operand[1].mode;
14423 mode1 = insn_data[icode].operand[2].mode;
14425 op0 = force_reg (mode0, op0);
14426 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14427 if (optimize || target == 0
14428 || GET_MODE (target) != tmode
14429 || !register_operand (target, tmode))
14430 target = gen_reg_rtx (tmode);
14431 pat = GEN_FCN (icode) (target, op0, op1);
14437 case IX86_BUILTIN_STOREHPS:
14438 case IX86_BUILTIN_STORELPS:
14439 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14440 : CODE_FOR_sse_storelps);
14441 arg0 = TREE_VALUE (arglist);
14442 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14443 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14444 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14445 mode0 = insn_data[icode].operand[0].mode;
14446 mode1 = insn_data[icode].operand[1].mode;
14448 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14449 op1 = force_reg (mode1, op1);
14451 pat = GEN_FCN (icode) (op0, op1);
14457 case IX86_BUILTIN_MOVNTPS:
14458 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14459 case IX86_BUILTIN_MOVNTQ:
14460 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14462 case IX86_BUILTIN_LDMXCSR:
14463 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14464 target = assign_386_stack_local (SImode, 0);
14465 emit_move_insn (target, op0);
14466 emit_insn (gen_sse_ldmxcsr (target));
14469 case IX86_BUILTIN_STMXCSR:
14470 target = assign_386_stack_local (SImode, 0);
14471 emit_insn (gen_sse_stmxcsr (target));
14472 return copy_to_mode_reg (SImode, target);
14474 case IX86_BUILTIN_SHUFPS:
14475 case IX86_BUILTIN_SHUFPD:
14476 icode = (fcode == IX86_BUILTIN_SHUFPS
14477 ? CODE_FOR_sse_shufps
14478 : CODE_FOR_sse2_shufpd);
14479 arg0 = TREE_VALUE (arglist);
14480 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14481 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14482 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14483 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14484 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14485 tmode = insn_data[icode].operand[0].mode;
14486 mode0 = insn_data[icode].operand[1].mode;
14487 mode1 = insn_data[icode].operand[2].mode;
14488 mode2 = insn_data[icode].operand[3].mode;
14490 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14491 op0 = copy_to_mode_reg (mode0, op0);
14492 if ((optimize && !register_operand (op1, mode1))
14493 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14494 op1 = copy_to_mode_reg (mode1, op1);
14495 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14497 /* @@@ better error message */
14498 error ("mask must be an immediate");
14499 return gen_reg_rtx (tmode);
14501 if (optimize || target == 0
14502 || GET_MODE (target) != tmode
14503 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14504 target = gen_reg_rtx (tmode);
14505 pat = GEN_FCN (icode) (target, op0, op1, op2);
14511 case IX86_BUILTIN_PSHUFW:
14512 case IX86_BUILTIN_PSHUFD:
14513 case IX86_BUILTIN_PSHUFHW:
14514 case IX86_BUILTIN_PSHUFLW:
14515 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14516 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14517 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14518 : CODE_FOR_mmx_pshufw);
14519 arg0 = TREE_VALUE (arglist);
14520 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14521 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14522 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14523 tmode = insn_data[icode].operand[0].mode;
14524 mode1 = insn_data[icode].operand[1].mode;
14525 mode2 = insn_data[icode].operand[2].mode;
14527 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14528 op0 = copy_to_mode_reg (mode1, op0);
14529 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14531 /* @@@ better error message */
14532 error ("mask must be an immediate");
14536 || GET_MODE (target) != tmode
14537 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14538 target = gen_reg_rtx (tmode);
14539 pat = GEN_FCN (icode) (target, op0, op1);
14545 case IX86_BUILTIN_PSLLDQI128:
14546 case IX86_BUILTIN_PSRLDQI128:
14547 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14548 : CODE_FOR_sse2_lshrti3);
14549 arg0 = TREE_VALUE (arglist);
14550 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14551 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14552 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14553 tmode = insn_data[icode].operand[0].mode;
14554 mode1 = insn_data[icode].operand[1].mode;
14555 mode2 = insn_data[icode].operand[2].mode;
14557 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14559 op0 = copy_to_reg (op0);
14560 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14562 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14564 error ("shift must be an immediate");
14567 target = gen_reg_rtx (V2DImode);
14568 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14574 case IX86_BUILTIN_FEMMS:
14575 emit_insn (gen_mmx_femms ());
14578 case IX86_BUILTIN_PAVGUSB:
14579 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14581 case IX86_BUILTIN_PF2ID:
14582 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14584 case IX86_BUILTIN_PFACC:
14585 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14587 case IX86_BUILTIN_PFADD:
14588 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14590 case IX86_BUILTIN_PFCMPEQ:
14591 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14593 case IX86_BUILTIN_PFCMPGE:
14594 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14596 case IX86_BUILTIN_PFCMPGT:
14597 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14599 case IX86_BUILTIN_PFMAX:
14600 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14602 case IX86_BUILTIN_PFMIN:
14603 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14605 case IX86_BUILTIN_PFMUL:
14606 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14608 case IX86_BUILTIN_PFRCP:
14609 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14611 case IX86_BUILTIN_PFRCPIT1:
14612 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14614 case IX86_BUILTIN_PFRCPIT2:
14615 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14617 case IX86_BUILTIN_PFRSQIT1:
14618 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14620 case IX86_BUILTIN_PFRSQRT:
14621 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14623 case IX86_BUILTIN_PFSUB:
14624 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14626 case IX86_BUILTIN_PFSUBR:
14627 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14629 case IX86_BUILTIN_PI2FD:
14630 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14632 case IX86_BUILTIN_PMULHRW:
14633 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14635 case IX86_BUILTIN_PF2IW:
14636 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14638 case IX86_BUILTIN_PFNACC:
14639 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14641 case IX86_BUILTIN_PFPNACC:
14642 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14644 case IX86_BUILTIN_PI2FW:
14645 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14647 case IX86_BUILTIN_PSWAPDSI:
14648 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14650 case IX86_BUILTIN_PSWAPDSF:
14651 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14653 case IX86_BUILTIN_SQRTSD:
14654 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14655 case IX86_BUILTIN_LOADUPD:
14656 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14657 case IX86_BUILTIN_STOREUPD:
14658 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14660 case IX86_BUILTIN_MFENCE:
14661 emit_insn (gen_sse2_mfence ());
14663 case IX86_BUILTIN_LFENCE:
14664 emit_insn (gen_sse2_lfence ());
14667 case IX86_BUILTIN_CLFLUSH:
14668 arg0 = TREE_VALUE (arglist);
14669 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14670 icode = CODE_FOR_sse2_clflush;
14671 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14672 op0 = copy_to_mode_reg (Pmode, op0);
14674 emit_insn (gen_sse2_clflush (op0));
14677 case IX86_BUILTIN_MOVNTPD:
14678 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14679 case IX86_BUILTIN_MOVNTDQ:
14680 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14681 case IX86_BUILTIN_MOVNTI:
14682 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14684 case IX86_BUILTIN_LOADDQU:
14685 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14686 case IX86_BUILTIN_STOREDQU:
14687 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14689 case IX86_BUILTIN_MONITOR:
14690 arg0 = TREE_VALUE (arglist);
14691 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14692 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14693 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14694 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14695 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14697 op0 = copy_to_mode_reg (SImode, op0);
14699 op1 = copy_to_mode_reg (SImode, op1);
14701 op2 = copy_to_mode_reg (SImode, op2);
14702 emit_insn (gen_sse3_monitor (op0, op1, op2));
14705 case IX86_BUILTIN_MWAIT:
14706 arg0 = TREE_VALUE (arglist);
14707 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14708 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14709 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14711 op0 = copy_to_mode_reg (SImode, op0);
14713 op1 = copy_to_mode_reg (SImode, op1);
14714 emit_insn (gen_sse3_mwait (op0, op1));
14717 case IX86_BUILTIN_LDDQU:
14718 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14721 case IX86_BUILTIN_VEC_INIT_V2SI:
14722 case IX86_BUILTIN_VEC_INIT_V4HI:
14723 case IX86_BUILTIN_VEC_INIT_V8QI:
14724 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14726 case IX86_BUILTIN_VEC_EXT_V2DF:
14727 case IX86_BUILTIN_VEC_EXT_V2DI:
14728 case IX86_BUILTIN_VEC_EXT_V4SF:
14729 case IX86_BUILTIN_VEC_EXT_V4SI:
14730 case IX86_BUILTIN_VEC_EXT_V8HI:
14731 case IX86_BUILTIN_VEC_EXT_V2SI:
14732 case IX86_BUILTIN_VEC_EXT_V4HI:
14733 return ix86_expand_vec_ext_builtin (arglist, target);
14735 case IX86_BUILTIN_VEC_SET_V8HI:
14736 case IX86_BUILTIN_VEC_SET_V4HI:
14737 return ix86_expand_vec_set_builtin (arglist);
14743 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14744 if (d->code == fcode)
14746 /* Compares are treated specially. */
14747 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14748 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14749 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14750 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14751 return ix86_expand_sse_compare (d, arglist, target);
14753 return ix86_expand_binop_builtin (d->icode, arglist, target);
14756 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14757 if (d->code == fcode)
14758 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14760 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14761 if (d->code == fcode)
14762 return ix86_expand_sse_comi (d, arglist, target);
14764 gcc_unreachable ();
14767 /* Store OPERAND to the memory after reload is completed. This means
14768 that we can't easily use assign_stack_local. */
14770 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14773 if (!reload_completed)
14775 if (TARGET_RED_ZONE)
14777 result = gen_rtx_MEM (mode,
14778 gen_rtx_PLUS (Pmode,
14780 GEN_INT (-RED_ZONE_SIZE)));
14781 emit_move_insn (result, operand);
14783 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14789 operand = gen_lowpart (DImode, operand);
14793 gen_rtx_SET (VOIDmode,
14794 gen_rtx_MEM (DImode,
14795 gen_rtx_PRE_DEC (DImode,
14796 stack_pointer_rtx)),
14802 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14811 split_di (&operand, 1, operands, operands + 1);
14813 gen_rtx_SET (VOIDmode,
14814 gen_rtx_MEM (SImode,
14815 gen_rtx_PRE_DEC (Pmode,
14816 stack_pointer_rtx)),
14819 gen_rtx_SET (VOIDmode,
14820 gen_rtx_MEM (SImode,
14821 gen_rtx_PRE_DEC (Pmode,
14822 stack_pointer_rtx)),
14827 /* It is better to store HImodes as SImodes. */
14828 if (!TARGET_PARTIAL_REG_STALL)
14829 operand = gen_lowpart (SImode, operand);
14833 gen_rtx_SET (VOIDmode,
14834 gen_rtx_MEM (GET_MODE (operand),
14835 gen_rtx_PRE_DEC (SImode,
14836 stack_pointer_rtx)),
14842 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14847 /* Free operand from the memory. */
14849 ix86_free_from_memory (enum machine_mode mode)
14851 if (!TARGET_RED_ZONE)
14855 if (mode == DImode || TARGET_64BIT)
14857 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14861 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14862 to pop or add instruction if registers are available. */
14863 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14864 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14869 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14870 QImode must go into class Q_REGS.
14871 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14872 movdf to do mem-to-mem moves through integer regs. */
14874 ix86_preferred_reload_class (rtx x, enum reg_class class)
14876 /* We're only allowed to return a subclass of CLASS. Many of the
14877 following checks fail for NO_REGS, so eliminate that early. */
14878 if (class == NO_REGS)
14881 /* All classes can load zeros. */
14882 if (x == CONST0_RTX (GET_MODE (x)))
14885 /* Floating-point constants need more complex checks. */
14886 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14888 /* General regs can load everything. */
14889 if (reg_class_subset_p (class, GENERAL_REGS))
14892 /* Floats can load 0 and 1 plus some others. Note that we eliminated
14893 zero above. We only want to wind up preferring 80387 registers if
14894 we plan on doing computation with them. */
14896 && (TARGET_MIX_SSE_I387
14897 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
14898 && standard_80387_constant_p (x))
14900 /* Limit class to non-sse. */
14901 if (class == FLOAT_SSE_REGS)
14903 if (class == FP_TOP_SSE_REGS)
14905 if (class == FP_SECOND_SSE_REGS)
14906 return FP_SECOND_REG;
14907 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
14913 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14915 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
14918 /* Generally when we see PLUS here, it's the function invariant
14919 (plus soft-fp const_int). Which can only be computed into general
14921 if (GET_CODE (x) == PLUS)
14922 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
14924 /* QImode constants are easy to load, but non-constant QImode data
14925 must go into Q_REGS. */
14926 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
14928 if (reg_class_subset_p (class, Q_REGS))
14930 if (reg_class_subset_p (Q_REGS, class))
14938 /* If we are copying between general and FP registers, we need a memory
14939 location. The same is true for SSE and MMX registers.
14941 The macro can't work reliably when one of the CLASSES is class containing
14942 registers from multiple units (SSE, MMX, integer). We avoid this by never
14943 combining those units in single alternative in the machine description.
14944 Ensure that this constraint holds to avoid unexpected surprises.
14946 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14947 enforce these sanity checks. */
14950 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14951 enum machine_mode mode, int strict)
14953 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14954 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14955 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14956 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14957 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14958 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14965 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
14968 /* ??? This is a lie. We do have moves between mmx/general, and for
14969 mmx/sse2. But by saying we need secondary memory we discourage the
14970 register allocator from using the mmx registers unless needed. */
14971 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14974 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14976 /* SSE1 doesn't have any direct moves from other classes. */
14980 /* If the target says that inter-unit moves are more expensive
14981 than moving through memory, then don't generate them. */
14982 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
14985 /* Between SSE and general, we have moves no larger than word size. */
14986 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14989 /* ??? For the cost of one register reformat penalty, we could use
14990 the same instructions to move SFmode and DFmode data, but the
14991 relevant move patterns don't support those alternatives. */
14992 if (mode == SFmode || mode == DFmode)
14999 /* Return the cost of moving data from a register in class CLASS1 to
15000 one in class CLASS2.
15002 It is not required that the cost always equal 2 when FROM is the same as TO;
15003 on some machines it is expensive to move between registers if they are not
15004 general registers. */
15007 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15008 enum reg_class class2)
15010 /* In case we require secondary memory, compute cost of the store followed
15011 by load. In order to avoid bad register allocation choices, we need
15012 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15014 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15018 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15019 MEMORY_MOVE_COST (mode, class1, 1));
15020 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15021 MEMORY_MOVE_COST (mode, class2, 1));
15023 /* In case of copying from general_purpose_register we may emit multiple
15024 stores followed by single load causing memory size mismatch stall.
15025 Count this as arbitrarily high cost of 20. */
15026 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15029 /* In the case of FP/MMX moves, the registers actually overlap, and we
15030 have to switch modes in order to treat them differently. */
15031 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15032 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15038 /* Moves between SSE/MMX and integer unit are expensive. */
15039 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15040 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15041 return ix86_cost->mmxsse_to_integer;
15042 if (MAYBE_FLOAT_CLASS_P (class1))
15043 return ix86_cost->fp_move;
15044 if (MAYBE_SSE_CLASS_P (class1))
15045 return ix86_cost->sse_move;
15046 if (MAYBE_MMX_CLASS_P (class1))
15047 return ix86_cost->mmx_move;
15051 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15054 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15056 /* Flags and only flags can only hold CCmode values. */
15057 if (CC_REGNO_P (regno))
15058 return GET_MODE_CLASS (mode) == MODE_CC;
15059 if (GET_MODE_CLASS (mode) == MODE_CC
15060 || GET_MODE_CLASS (mode) == MODE_RANDOM
15061 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15063 if (FP_REGNO_P (regno))
15064 return VALID_FP_MODE_P (mode);
15065 if (SSE_REGNO_P (regno))
15067 /* We implement the move patterns for all vector modes into and
15068 out of SSE registers, even when no operation instructions
15070 return (VALID_SSE_REG_MODE (mode)
15071 || VALID_SSE2_REG_MODE (mode)
15072 || VALID_MMX_REG_MODE (mode)
15073 || VALID_MMX_REG_MODE_3DNOW (mode));
15075 if (MMX_REGNO_P (regno))
15077 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15078 so if the register is available at all, then we can move data of
15079 the given mode into or out of it. */
15080 return (VALID_MMX_REG_MODE (mode)
15081 || VALID_MMX_REG_MODE_3DNOW (mode));
15084 if (mode == QImode)
15086 /* Take care for QImode values - they can be in non-QI regs,
15087 but then they do cause partial register stalls. */
15088 if (regno < 4 || TARGET_64BIT)
15090 if (!TARGET_PARTIAL_REG_STALL)
15092 return reload_in_progress || reload_completed;
15094 /* We handle both integer and floats in the general purpose registers. */
15095 else if (VALID_INT_MODE_P (mode))
15097 else if (VALID_FP_MODE_P (mode))
15099 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15100 on to use that value in smaller contexts, this can easily force a
15101 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15102 supporting DImode, allow it. */
15103 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15109 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15110 tieable integer mode. */
15113 ix86_tieable_integer_mode_p (enum machine_mode mode)
15122 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15125 return TARGET_64BIT;
15132 /* Return true if MODE1 is accessible in a register that can hold MODE2
15133 without copying. That is, all register classes that can hold MODE2
15134 can also hold MODE1. */
15137 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15139 if (mode1 == mode2)
15142 if (ix86_tieable_integer_mode_p (mode1)
15143 && ix86_tieable_integer_mode_p (mode2))
15146 /* MODE2 being XFmode implies fp stack or general regs, which means we
15147 can tie any smaller floating point modes to it. Note that we do not
15148 tie this with TFmode. */
15149 if (mode2 == XFmode)
15150 return mode1 == SFmode || mode1 == DFmode;
15152 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15153 that we can tie it with SFmode. */
15154 if (mode2 == DFmode)
15155 return mode1 == SFmode;
15157 /* If MODE2 is only appropriate for an SSE register, then tie with
15158 any other mode acceptable to SSE registers. */
15159 if (GET_MODE_SIZE (mode2) >= 8
15160 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15161 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15163 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15164 with any other mode acceptable to MMX registers. */
15165 if (GET_MODE_SIZE (mode2) == 8
15166 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15167 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15172 /* Return the cost of moving data of mode M between a
15173 register and memory. A value of 2 is the default; this cost is
15174 relative to those in `REGISTER_MOVE_COST'.
15176 If moving between registers and memory is more expensive than
15177 between two registers, you should define this macro to express the
15180 Model also increased moving costs of QImode registers in non
15184 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15186 if (FLOAT_CLASS_P (class))
15203 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15205 if (SSE_CLASS_P (class))
15208 switch (GET_MODE_SIZE (mode))
15222 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15224 if (MMX_CLASS_P (class))
15227 switch (GET_MODE_SIZE (mode))
15238 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15240 switch (GET_MODE_SIZE (mode))
15244 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15245 : ix86_cost->movzbl_load);
15247 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15248 : ix86_cost->int_store[0] + 4);
15251 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15253 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15254 if (mode == TFmode)
15256 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15257 * (((int) GET_MODE_SIZE (mode)
15258 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15262 /* Compute a (partial) cost for rtx X. Return true if the complete
15263 cost has been computed, and false if subexpressions should be
15264 scanned. In either case, *TOTAL contains the cost result. */
15267 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15269 enum machine_mode mode = GET_MODE (x);
15277 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15279 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15281 else if (flag_pic && SYMBOLIC_CONST (x)
15283 || (!GET_CODE (x) != LABEL_REF
15284 && (GET_CODE (x) != SYMBOL_REF
15285 || !SYMBOL_REF_LOCAL_P (x)))))
15292 if (mode == VOIDmode)
15295 switch (standard_80387_constant_p (x))
15300 default: /* Other constants */
15305 /* Start with (MEM (SYMBOL_REF)), since that's where
15306 it'll probably end up. Add a penalty for size. */
15307 *total = (COSTS_N_INSNS (1)
15308 + (flag_pic != 0 && !TARGET_64BIT)
15309 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15315 /* The zero extensions is often completely free on x86_64, so make
15316 it as cheap as possible. */
15317 if (TARGET_64BIT && mode == DImode
15318 && GET_MODE (XEXP (x, 0)) == SImode)
15320 else if (TARGET_ZERO_EXTEND_WITH_AND)
15321 *total = COSTS_N_INSNS (ix86_cost->add);
15323 *total = COSTS_N_INSNS (ix86_cost->movzx);
15327 *total = COSTS_N_INSNS (ix86_cost->movsx);
15331 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15332 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15334 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15337 *total = COSTS_N_INSNS (ix86_cost->add);
15340 if ((value == 2 || value == 3)
15341 && ix86_cost->lea <= ix86_cost->shift_const)
15343 *total = COSTS_N_INSNS (ix86_cost->lea);
15353 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15355 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15357 if (INTVAL (XEXP (x, 1)) > 32)
15358 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15360 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15364 if (GET_CODE (XEXP (x, 1)) == AND)
15365 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15367 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15372 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15373 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15375 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15380 if (FLOAT_MODE_P (mode))
15382 *total = COSTS_N_INSNS (ix86_cost->fmul);
15387 rtx op0 = XEXP (x, 0);
15388 rtx op1 = XEXP (x, 1);
15390 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15392 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15393 for (nbits = 0; value != 0; value &= value - 1)
15397 /* This is arbitrary. */
15400 /* Compute costs correctly for widening multiplication. */
15401 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15402 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15403 == GET_MODE_SIZE (mode))
15405 int is_mulwiden = 0;
15406 enum machine_mode inner_mode = GET_MODE (op0);
15408 if (GET_CODE (op0) == GET_CODE (op1))
15409 is_mulwiden = 1, op1 = XEXP (op1, 0);
15410 else if (GET_CODE (op1) == CONST_INT)
15412 if (GET_CODE (op0) == SIGN_EXTEND)
15413 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15416 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15420 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15423 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15424 + nbits * ix86_cost->mult_bit)
15425 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15434 if (FLOAT_MODE_P (mode))
15435 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15437 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15441 if (FLOAT_MODE_P (mode))
15442 *total = COSTS_N_INSNS (ix86_cost->fadd);
15443 else if (GET_MODE_CLASS (mode) == MODE_INT
15444 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15446 if (GET_CODE (XEXP (x, 0)) == PLUS
15447 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15448 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15449 && CONSTANT_P (XEXP (x, 1)))
15451 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15452 if (val == 2 || val == 4 || val == 8)
15454 *total = COSTS_N_INSNS (ix86_cost->lea);
15455 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15456 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15458 *total += rtx_cost (XEXP (x, 1), outer_code);
15462 else if (GET_CODE (XEXP (x, 0)) == MULT
15463 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15465 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15466 if (val == 2 || val == 4 || val == 8)
15468 *total = COSTS_N_INSNS (ix86_cost->lea);
15469 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15470 *total += rtx_cost (XEXP (x, 1), outer_code);
15474 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15476 *total = COSTS_N_INSNS (ix86_cost->lea);
15477 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15478 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15479 *total += rtx_cost (XEXP (x, 1), outer_code);
15486 if (FLOAT_MODE_P (mode))
15488 *total = COSTS_N_INSNS (ix86_cost->fadd);
15496 if (!TARGET_64BIT && mode == DImode)
15498 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15499 + (rtx_cost (XEXP (x, 0), outer_code)
15500 << (GET_MODE (XEXP (x, 0)) != DImode))
15501 + (rtx_cost (XEXP (x, 1), outer_code)
15502 << (GET_MODE (XEXP (x, 1)) != DImode)));
15508 if (FLOAT_MODE_P (mode))
15510 *total = COSTS_N_INSNS (ix86_cost->fchs);
15516 if (!TARGET_64BIT && mode == DImode)
15517 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15519 *total = COSTS_N_INSNS (ix86_cost->add);
15523 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15524 && XEXP (XEXP (x, 0), 1) == const1_rtx
15525 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15526 && XEXP (x, 1) == const0_rtx)
15528 /* This kind of construct is implemented using test[bwl].
15529 Treat it as if we had an AND. */
15530 *total = (COSTS_N_INSNS (ix86_cost->add)
15531 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15532 + rtx_cost (const1_rtx, outer_code));
15538 if (!TARGET_SSE_MATH
15540 || (mode == DFmode && !TARGET_SSE2))
15545 if (FLOAT_MODE_P (mode))
15546 *total = COSTS_N_INSNS (ix86_cost->fabs);
15550 if (FLOAT_MODE_P (mode))
15551 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15555 if (XINT (x, 1) == UNSPEC_TP)
15566 static int current_machopic_label_num;
15568 /* Given a symbol name and its associated stub, write out the
15569 definition of the stub. */
15572 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15574 unsigned int length;
15575 char *binder_name, *symbol_name, lazy_ptr_name[32];
15576 int label = ++current_machopic_label_num;
15578 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15579 symb = (*targetm.strip_name_encoding) (symb);
15581 length = strlen (stub);
15582 binder_name = alloca (length + 32);
15583 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15585 length = strlen (symb);
15586 symbol_name = alloca (length + 32);
15587 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15589 sprintf (lazy_ptr_name, "L%d$lz", label);
15592 machopic_picsymbol_stub_section ();
15594 machopic_symbol_stub_section ();
15596 fprintf (file, "%s:\n", stub);
15597 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15601 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15602 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15603 fprintf (file, "\tjmp %%edx\n");
15606 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15608 fprintf (file, "%s:\n", binder_name);
15612 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15613 fprintf (file, "\tpushl %%eax\n");
15616 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15618 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15620 machopic_lazy_symbol_ptr_section ();
15621 fprintf (file, "%s:\n", lazy_ptr_name);
15622 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15623 fprintf (file, "\t.long %s\n", binder_name);
15625 #endif /* TARGET_MACHO */
15627 /* Order the registers for register allocator. */
15630 x86_order_regs_for_local_alloc (void)
15635 /* First allocate the local general purpose registers. */
15636 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15637 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15638 reg_alloc_order [pos++] = i;
15640 /* Global general purpose registers. */
15641 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15642 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15643 reg_alloc_order [pos++] = i;
15645 /* x87 registers come first in case we are doing FP math
15647 if (!TARGET_SSE_MATH)
15648 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15649 reg_alloc_order [pos++] = i;
15651 /* SSE registers. */
15652 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15653 reg_alloc_order [pos++] = i;
15654 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15655 reg_alloc_order [pos++] = i;
15657 /* x87 registers. */
15658 if (TARGET_SSE_MATH)
15659 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15660 reg_alloc_order [pos++] = i;
15662 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15663 reg_alloc_order [pos++] = i;
15665 /* Initialize the rest of array as we do not allocate some registers
15667 while (pos < FIRST_PSEUDO_REGISTER)
15668 reg_alloc_order [pos++] = 0;
15671 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15672 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15675 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15676 struct attribute_spec.handler. */
15678 ix86_handle_struct_attribute (tree *node, tree name,
15679 tree args ATTRIBUTE_UNUSED,
15680 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15683 if (DECL_P (*node))
15685 if (TREE_CODE (*node) == TYPE_DECL)
15686 type = &TREE_TYPE (*node);
15691 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15692 || TREE_CODE (*type) == UNION_TYPE)))
15694 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15695 *no_add_attrs = true;
15698 else if ((is_attribute_p ("ms_struct", name)
15699 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15700 || ((is_attribute_p ("gcc_struct", name)
15701 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15703 warning ("%qs incompatible attribute ignored",
15704 IDENTIFIER_POINTER (name));
15705 *no_add_attrs = true;
15712 ix86_ms_bitfield_layout_p (tree record_type)
15714 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15715 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15716 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15719 /* Returns an expression indicating where the this parameter is
15720 located on entry to the FUNCTION. */
15723 x86_this_parameter (tree function)
15725 tree type = TREE_TYPE (function);
15729 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15730 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15733 if (ix86_function_regparm (type, function) > 0)
15737 parm = TYPE_ARG_TYPES (type);
15738 /* Figure out whether or not the function has a variable number of
15740 for (; parm; parm = TREE_CHAIN (parm))
15741 if (TREE_VALUE (parm) == void_type_node)
15743 /* If not, the this parameter is in the first argument. */
15747 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15749 return gen_rtx_REG (SImode, regno);
15753 if (aggregate_value_p (TREE_TYPE (type), type))
15754 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15756 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15759 /* Determine whether x86_output_mi_thunk can succeed. */
15762 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15763 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15764 HOST_WIDE_INT vcall_offset, tree function)
15766 /* 64-bit can handle anything. */
15770 /* For 32-bit, everything's fine if we have one free register. */
15771 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15774 /* Need a free register for vcall_offset. */
15778 /* Need a free register for GOT references. */
15779 if (flag_pic && !(*targetm.binds_local_p) (function))
15782 /* Otherwise ok. */
15786 /* Output the assembler code for a thunk function. THUNK_DECL is the
15787 declaration for the thunk function itself, FUNCTION is the decl for
15788 the target function. DELTA is an immediate constant offset to be
15789 added to THIS. If VCALL_OFFSET is nonzero, the word at
15790 *(*this + vcall_offset) should be added to THIS. */
15793 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15794 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15795 HOST_WIDE_INT vcall_offset, tree function)
15798 rtx this = x86_this_parameter (function);
15801 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15802 pull it in now and let DELTA benefit. */
15805 else if (vcall_offset)
15807 /* Put the this parameter into %eax. */
15809 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15810 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15813 this_reg = NULL_RTX;
15815 /* Adjust the this parameter by a fixed constant. */
15818 xops[0] = GEN_INT (delta);
15819 xops[1] = this_reg ? this_reg : this;
15822 if (!x86_64_general_operand (xops[0], DImode))
15824 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15826 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15830 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15833 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15836 /* Adjust the this parameter by a value stored in the vtable. */
15840 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15843 int tmp_regno = 2 /* ECX */;
15844 if (lookup_attribute ("fastcall",
15845 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15846 tmp_regno = 0 /* EAX */;
15847 tmp = gen_rtx_REG (SImode, tmp_regno);
15850 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15853 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15855 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15857 /* Adjust the this parameter. */
15858 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15859 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15861 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15862 xops[0] = GEN_INT (vcall_offset);
15864 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15865 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15867 xops[1] = this_reg;
15869 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15871 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15874 /* If necessary, drop THIS back to its stack slot. */
15875 if (this_reg && this_reg != this)
15877 xops[0] = this_reg;
15879 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15882 xops[0] = XEXP (DECL_RTL (function), 0);
15885 if (!flag_pic || (*targetm.binds_local_p) (function))
15886 output_asm_insn ("jmp\t%P0", xops);
15889 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15890 tmp = gen_rtx_CONST (Pmode, tmp);
15891 tmp = gen_rtx_MEM (QImode, tmp);
15893 output_asm_insn ("jmp\t%A0", xops);
15898 if (!flag_pic || (*targetm.binds_local_p) (function))
15899 output_asm_insn ("jmp\t%P0", xops);
15904 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15905 tmp = (gen_rtx_SYMBOL_REF
15907 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15908 tmp = gen_rtx_MEM (QImode, tmp);
15910 output_asm_insn ("jmp\t%0", xops);
15913 #endif /* TARGET_MACHO */
15915 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15916 output_set_got (tmp);
15919 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15920 output_asm_insn ("jmp\t{*}%1", xops);
15926 x86_file_start (void)
15928 default_file_start ();
15929 if (X86_FILE_START_VERSION_DIRECTIVE)
15930 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15931 if (X86_FILE_START_FLTUSED)
15932 fputs ("\t.global\t__fltused\n", asm_out_file);
15933 if (ix86_asm_dialect == ASM_INTEL)
15934 fputs ("\t.intel_syntax\n", asm_out_file);
15938 x86_field_alignment (tree field, int computed)
15940 enum machine_mode mode;
15941 tree type = TREE_TYPE (field);
15943 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15945 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15946 ? get_inner_array_type (type) : type);
15947 if (mode == DFmode || mode == DCmode
15948 || GET_MODE_CLASS (mode) == MODE_INT
15949 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15950 return MIN (32, computed);
15954 /* Output assembler code to FILE to increment profiler label # LABELNO
15955 for profiling a function entry. */
15957 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15962 #ifndef NO_PROFILE_COUNTERS
15963 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15965 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15969 #ifndef NO_PROFILE_COUNTERS
15970 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15972 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15976 #ifndef NO_PROFILE_COUNTERS
15977 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15978 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15980 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15984 #ifndef NO_PROFILE_COUNTERS
15985 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15986 PROFILE_COUNT_REGISTER);
15988 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15992 /* We don't have exact information about the insn sizes, but we may assume
15993 quite safely that we are informed about all 1 byte insns and memory
15994 address sizes. This is enough to eliminate unnecessary padding in
15998 min_insn_size (rtx insn)
16002 if (!INSN_P (insn) || !active_insn_p (insn))
16005 /* Discard alignments we've emit and jump instructions. */
16006 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16007 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16009 if (GET_CODE (insn) == JUMP_INSN
16010 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16011 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16014 /* Important case - calls are always 5 bytes.
16015 It is common to have many calls in the row. */
16016 if (GET_CODE (insn) == CALL_INSN
16017 && symbolic_reference_mentioned_p (PATTERN (insn))
16018 && !SIBLING_CALL_P (insn))
16020 if (get_attr_length (insn) <= 1)
16023 /* For normal instructions we may rely on the sizes of addresses
16024 and the presence of symbol to require 4 bytes of encoding.
16025 This is not the case for jumps where references are PC relative. */
16026 if (GET_CODE (insn) != JUMP_INSN)
16028 l = get_attr_length_address (insn);
16029 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16038 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16042 ix86_avoid_jump_misspredicts (void)
16044 rtx insn, start = get_insns ();
16045 int nbytes = 0, njumps = 0;
16048 /* Look for all minimal intervals of instructions containing 4 jumps.
16049 The intervals are bounded by START and INSN. NBYTES is the total
16050 size of instructions in the interval including INSN and not including
16051 START. When the NBYTES is smaller than 16 bytes, it is possible
16052 that the end of START and INSN ends up in the same 16byte page.
16054 The smallest offset in the page INSN can start is the case where START
16055 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16056 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16058 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16061 nbytes += min_insn_size (insn);
16063 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16064 INSN_UID (insn), min_insn_size (insn));
16065 if ((GET_CODE (insn) == JUMP_INSN
16066 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16067 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16068 || GET_CODE (insn) == CALL_INSN)
16075 start = NEXT_INSN (start);
16076 if ((GET_CODE (start) == JUMP_INSN
16077 && GET_CODE (PATTERN (start)) != ADDR_VEC
16078 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16079 || GET_CODE (start) == CALL_INSN)
16080 njumps--, isjump = 1;
16083 nbytes -= min_insn_size (start);
16088 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16089 INSN_UID (start), INSN_UID (insn), nbytes);
16091 if (njumps == 3 && isjump && nbytes < 16)
16093 int padsize = 15 - nbytes + min_insn_size (insn);
16096 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16097 INSN_UID (insn), padsize);
16098 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16103 /* AMD Athlon works faster
16104 when RET is not destination of conditional jump or directly preceded
16105 by other jump instruction. We avoid the penalty by inserting NOP just
16106 before the RET instructions in such cases. */
16108 ix86_pad_returns (void)
16113 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16115 basic_block bb = e->src;
16116 rtx ret = BB_END (bb);
16118 bool replace = false;
16120 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16121 || !maybe_hot_bb_p (bb))
16123 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16124 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16126 if (prev && GET_CODE (prev) == CODE_LABEL)
16131 FOR_EACH_EDGE (e, ei, bb->preds)
16132 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16133 && !(e->flags & EDGE_FALLTHRU))
16138 prev = prev_active_insn (ret);
16140 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16141 || GET_CODE (prev) == CALL_INSN))
16143 /* Empty functions get branch mispredict even when the jump destination
16144 is not visible to us. */
16145 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16150 emit_insn_before (gen_return_internal_long (), ret);
16156 /* Implement machine specific optimizations. We implement padding of returns
16157 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16161 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16162 ix86_pad_returns ();
16163 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16164 ix86_avoid_jump_misspredicts ();
16167 /* Return nonzero when QImode register that must be represented via REX prefix
16170 x86_extended_QIreg_mentioned_p (rtx insn)
16173 extract_insn_cached (insn);
16174 for (i = 0; i < recog_data.n_operands; i++)
16175 if (REG_P (recog_data.operand[i])
16176 && REGNO (recog_data.operand[i]) >= 4)
16181 /* Return nonzero when P points to register encoded via REX prefix.
16182 Called via for_each_rtx. */
16184 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16186 unsigned int regno;
16189 regno = REGNO (*p);
16190 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16193 /* Return true when INSN mentions register that must be encoded using REX
16196 x86_extended_reg_mentioned_p (rtx insn)
16198 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16201 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16202 optabs would emit if we didn't have TFmode patterns. */
16205 x86_emit_floatuns (rtx operands[2])
16207 rtx neglab, donelab, i0, i1, f0, in, out;
16208 enum machine_mode mode, inmode;
16210 inmode = GET_MODE (operands[1]);
16211 if (inmode != SImode
16212 && inmode != DImode)
16216 in = force_reg (inmode, operands[1]);
16217 mode = GET_MODE (out);
16218 neglab = gen_label_rtx ();
16219 donelab = gen_label_rtx ();
16220 i1 = gen_reg_rtx (Pmode);
16221 f0 = gen_reg_rtx (mode);
16223 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16225 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16226 emit_jump_insn (gen_jump (donelab));
16229 emit_label (neglab);
16231 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16232 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16233 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16234 expand_float (f0, i0, 0);
16235 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16237 emit_label (donelab);
16240 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16241 with all elements equal to VAR. Return true if successful. */
16244 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16245 rtx target, rtx val)
16247 enum machine_mode smode, wsmode, wvmode;
16254 if (!mmx_ok && !TARGET_SSE)
16262 val = force_reg (GET_MODE_INNER (mode), val);
16263 x = gen_rtx_VEC_DUPLICATE (mode, val);
16264 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16270 if (TARGET_SSE || TARGET_3DNOW_A)
16272 val = gen_lowpart (SImode, val);
16273 x = gen_rtx_TRUNCATE (HImode, val);
16274 x = gen_rtx_VEC_DUPLICATE (mode, x);
16275 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16304 /* Replicate the value once into the next wider mode and recurse. */
16305 val = convert_modes (wsmode, smode, val, true);
16306 x = expand_simple_binop (wsmode, ASHIFT, val,
16307 GEN_INT (GET_MODE_BITSIZE (smode)),
16308 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16309 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16311 x = gen_reg_rtx (wvmode);
16312 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16313 gcc_unreachable ();
16314 emit_move_insn (target, gen_lowpart (mode, x));
16322 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16323 whose low element is VAR, and other elements are zero. Return true
16327 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16328 rtx target, rtx var)
16330 enum machine_mode vsimode;
16337 if (!mmx_ok && !TARGET_SSE)
16343 var = force_reg (GET_MODE_INNER (mode), var);
16344 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16345 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16350 var = force_reg (GET_MODE_INNER (mode), var);
16351 x = gen_rtx_VEC_DUPLICATE (mode, var);
16352 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16353 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16358 vsimode = V4SImode;
16364 vsimode = V2SImode;
16367 /* Zero extend the variable element to SImode and recurse. */
16368 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16370 x = gen_reg_rtx (vsimode);
16371 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16372 gcc_unreachable ();
16374 emit_move_insn (target, gen_lowpart (mode, x));
16382 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16383 consisting of the values in VALS. It is known that all elements
16384 except ONE_VAR are constants. Return true if successful. */
16387 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16388 rtx target, rtx vals, int one_var)
16390 rtx var = XVECEXP (vals, 0, one_var);
16391 enum machine_mode wmode;
16394 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16395 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16403 /* For the two element vectors, it's just as easy to use
16404 the general case. */
16420 /* There's no way to set one QImode entry easily. Combine
16421 the variable value with its adjacent constant value, and
16422 promote to an HImode set. */
16423 x = XVECEXP (vals, 0, one_var ^ 1);
16426 var = convert_modes (HImode, QImode, var, true);
16427 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16428 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16429 x = GEN_INT (INTVAL (x) & 0xff);
16433 var = convert_modes (HImode, QImode, var, true);
16434 x = gen_int_mode (INTVAL (x) << 8, HImode);
16436 if (x != const0_rtx)
16437 var = expand_simple_binop (HImode, IOR, var, x, var,
16438 1, OPTAB_LIB_WIDEN);
16440 x = gen_reg_rtx (wmode);
16441 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16442 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16444 emit_move_insn (target, gen_lowpart (mode, x));
16451 emit_move_insn (target, const_vec);
16452 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16456 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16457 all values variable, and none identical. */
16460 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16461 rtx target, rtx vals)
16463 enum machine_mode half_mode = GET_MODE_INNER (mode);
16464 rtx op0 = NULL, op1 = NULL;
16465 bool use_vec_concat = false;
16471 if (!mmx_ok && !TARGET_SSE)
16477 /* For the two element vectors, we always implement VEC_CONCAT. */
16478 op0 = XVECEXP (vals, 0, 0);
16479 op1 = XVECEXP (vals, 0, 1);
16480 use_vec_concat = true;
16484 half_mode = V2SFmode;
16487 half_mode = V2SImode;
16493 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16494 Recurse to load the two halves. */
16496 op0 = gen_reg_rtx (half_mode);
16497 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16498 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16500 op1 = gen_reg_rtx (half_mode);
16501 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16502 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16504 use_vec_concat = true;
16515 gcc_unreachable ();
16518 if (use_vec_concat)
16520 if (!register_operand (op0, half_mode))
16521 op0 = force_reg (half_mode, op0);
16522 if (!register_operand (op1, half_mode))
16523 op1 = force_reg (half_mode, op1);
16525 emit_insn (gen_rtx_SET (VOIDmode, target,
16526 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16530 int i, j, n_elts, n_words, n_elt_per_word;
16531 enum machine_mode inner_mode;
16532 rtx words[4], shift;
16534 inner_mode = GET_MODE_INNER (mode);
16535 n_elts = GET_MODE_NUNITS (mode);
16536 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16537 n_elt_per_word = n_elts / n_words;
16538 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16540 for (i = 0; i < n_words; ++i)
16542 rtx word = NULL_RTX;
16544 for (j = 0; j < n_elt_per_word; ++j)
16546 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16547 elt = convert_modes (word_mode, inner_mode, elt, true);
16553 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16554 word, 1, OPTAB_LIB_WIDEN);
16555 word = expand_simple_binop (word_mode, IOR, word, elt,
16556 word, 1, OPTAB_LIB_WIDEN);
16564 emit_move_insn (target, gen_lowpart (mode, words[0]));
16565 else if (n_words == 2)
16567 rtx tmp = gen_reg_rtx (mode);
16568 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16569 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16570 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16571 emit_move_insn (target, tmp);
16573 else if (n_words == 4)
16575 rtx tmp = gen_reg_rtx (V4SImode);
16576 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16577 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16578 emit_move_insn (target, gen_lowpart (mode, tmp));
16581 gcc_unreachable ();
16585 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16586 instructions unless MMX_OK is true. */
16589 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16591 enum machine_mode mode = GET_MODE (target);
16592 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16593 int n_elts = GET_MODE_NUNITS (mode);
16594 int n_var = 0, one_var = -1;
16595 bool all_same = true, all_const_zero = true;
16599 for (i = 0; i < n_elts; ++i)
16601 x = XVECEXP (vals, 0, i);
16602 if (!CONSTANT_P (x))
16603 n_var++, one_var = i;
16604 else if (x != CONST0_RTX (inner_mode))
16605 all_const_zero = false;
16606 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16610 /* Constants are best loaded from the constant pool. */
16613 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16617 /* If all values are identical, broadcast the value. */
16619 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16620 XVECEXP (vals, 0, 0)))
16623 /* Values where only one field is non-constant are best loaded from
16624 the pool and overwritten via move later. */
16627 if (all_const_zero && one_var == 0
16628 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16629 XVECEXP (vals, 0, 0)))
16632 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16636 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16640 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16642 enum machine_mode mode = GET_MODE (target);
16643 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16644 bool use_vec_merge = false;
16653 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16654 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16656 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16658 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16659 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16669 /* For the two element vectors, we implement a VEC_CONCAT with
16670 the extraction of the other element. */
16672 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16673 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16676 op0 = val, op1 = tmp;
16678 op0 = tmp, op1 = val;
16680 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16681 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16689 use_vec_merge = true;
16693 /* tmp = op0 = A B C D */
16694 tmp = copy_to_reg (target);
16696 /* op0 = C C D D */
16697 emit_insn (gen_sse_unpcklps (target, target, target));
16699 /* op0 = C C D X */
16700 ix86_expand_vector_set (false, target, val, 0);
16702 /* op0 = A B X D */
16703 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16704 GEN_INT (1), GEN_INT (0),
16705 GEN_INT (2+4), GEN_INT (3+4)));
16709 tmp = copy_to_reg (target);
16710 ix86_expand_vector_set (false, target, val, 0);
16711 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16712 GEN_INT (0), GEN_INT (1),
16713 GEN_INT (0+4), GEN_INT (3+4)));
16717 tmp = copy_to_reg (target);
16718 ix86_expand_vector_set (false, target, val, 0);
16719 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16720 GEN_INT (0), GEN_INT (1),
16721 GEN_INT (2+4), GEN_INT (0+4)));
16725 gcc_unreachable ();
16730 /* Element 0 handled by vec_merge below. */
16733 use_vec_merge = true;
16739 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16740 store into element 0, then shuffle them back. */
16744 order[0] = GEN_INT (elt);
16745 order[1] = const1_rtx;
16746 order[2] = const2_rtx;
16747 order[3] = GEN_INT (3);
16748 order[elt] = const0_rtx;
16750 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16751 order[1], order[2], order[3]));
16753 ix86_expand_vector_set (false, target, val, 0);
16755 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16756 order[1], order[2], order[3]));
16760 /* For SSE1, we have to reuse the V4SF code. */
16761 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16762 gen_lowpart (SFmode, val), elt);
16767 use_vec_merge = TARGET_SSE2;
16770 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16781 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16782 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16783 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16787 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16789 emit_move_insn (mem, target);
16791 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16792 emit_move_insn (tmp, val);
16794 emit_move_insn (target, mem);
16799 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16801 enum machine_mode mode = GET_MODE (vec);
16802 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16803 bool use_vec_extr = false;
16816 use_vec_extr = true;
16828 tmp = gen_reg_rtx (mode);
16829 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16830 GEN_INT (elt), GEN_INT (elt),
16831 GEN_INT (elt+4), GEN_INT (elt+4)));
16835 tmp = gen_reg_rtx (mode);
16836 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16840 gcc_unreachable ();
16843 use_vec_extr = true;
16858 tmp = gen_reg_rtx (mode);
16859 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16860 GEN_INT (elt), GEN_INT (elt),
16861 GEN_INT (elt), GEN_INT (elt)));
16865 tmp = gen_reg_rtx (mode);
16866 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16870 gcc_unreachable ();
16873 use_vec_extr = true;
16878 /* For SSE1, we have to reuse the V4SF code. */
16879 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16880 gen_lowpart (V4SFmode, vec), elt);
16886 use_vec_extr = TARGET_SSE2;
16889 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16894 /* ??? Could extract the appropriate HImode element and shift. */
16901 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16902 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16904 /* Let the rtl optimizers know about the zero extension performed. */
16905 if (inner_mode == HImode)
16907 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16908 target = gen_lowpart (SImode, target);
16911 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16915 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16917 emit_move_insn (mem, vec);
16919 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16920 emit_move_insn (target, tmp);
16924 /* Implements target hook vector_mode_supported_p. */
16926 ix86_vector_mode_supported_p (enum machine_mode mode)
16928 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
16930 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
16932 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
16934 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16939 /* Worker function for TARGET_MD_ASM_CLOBBERS.
16941 We do this in the new i386 backend to maintain source compatibility
16942 with the old cc0-based compiler. */
16945 ix86_md_asm_clobbers (tree clobbers)
16947 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16949 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16951 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16956 /* Worker function for REVERSE_CONDITION. */
16959 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16961 return (mode != CCFPmode && mode != CCFPUmode
16962 ? reverse_condition (code)
16963 : reverse_condition_maybe_unordered (code));
16966 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16970 output_387_reg_move (rtx insn, rtx *operands)
16972 if (REG_P (operands[1])
16973 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16975 if (REGNO (operands[0]) == FIRST_STACK_REG
16976 && TARGET_USE_FFREEP)
16977 return "ffreep\t%y0";
16978 return "fstp\t%y0";
16980 if (STACK_TOP_P (operands[0]))
16981 return "fld%z1\t%y1";
16985 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16986 FP status register is set. */
16989 ix86_emit_fp_unordered_jump (rtx label)
16991 rtx reg = gen_reg_rtx (HImode);
16994 emit_insn (gen_x86_fnstsw_1 (reg));
16996 if (TARGET_USE_SAHF)
16998 emit_insn (gen_x86_sahf_1 (reg));
17000 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17001 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17005 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17007 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17008 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17011 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17012 gen_rtx_LABEL_REF (VOIDmode, label),
17014 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17015 emit_jump_insn (temp);
17018 /* Output code to perform a log1p XFmode calculation. */
17020 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17022 rtx label1 = gen_label_rtx ();
17023 rtx label2 = gen_label_rtx ();
17025 rtx tmp = gen_reg_rtx (XFmode);
17026 rtx tmp2 = gen_reg_rtx (XFmode);
17028 emit_insn (gen_absxf2 (tmp, op1));
17029 emit_insn (gen_cmpxf (tmp,
17030 CONST_DOUBLE_FROM_REAL_VALUE (
17031 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17033 emit_jump_insn (gen_bge (label1));
17035 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17036 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17037 emit_jump (label2);
17039 emit_label (label1);
17040 emit_move_insn (tmp, CONST1_RTX (XFmode));
17041 emit_insn (gen_addxf3 (tmp, op1, tmp));
17042 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17043 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17045 emit_label (label2);
17048 /* Solaris named-section hook. Parameters are as for
17049 named_section_real. */
17052 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17055 /* With Binutils 2.15, the "@unwind" marker must be specified on
17056 every occurrence of the ".eh_frame" section, not just the first
17059 && strcmp (name, ".eh_frame") == 0)
17061 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17062 flags & SECTION_WRITE ? "aw" : "a");
17065 default_elf_asm_named_section (name, flags, decl);
17068 #include "gt-i386.h"