gcc44: Prevent __stack_chk_fail_local references from being hidden.
[dragonfly.git] / contrib / gcc-4.4 / gcc / config / i386 / i386.c
CommitLineData
c251ad9e
SS
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "tree.h"
28#include "tm_p.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
34#include "output.h"
35#include "insn-codes.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "c-common.h"
39#include "except.h"
40#include "function.h"
41#include "recog.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "basic-block.h"
46#include "ggc.h"
47#include "target.h"
48#include "target-def.h"
49#include "langhooks.h"
50#include "cgraph.h"
51#include "gimple.h"
52#include "dwarf2.h"
53#include "df.h"
54#include "tm-constrs.h"
55#include "params.h"
56#include "cselib.h"
57
58static int x86_builtin_vectorization_cost (bool);
59static rtx legitimize_dllimport_symbol (rtx, bool);
60
61#ifndef CHECK_STACK_LIMIT
62#define CHECK_STACK_LIMIT (-1)
63#endif
64
65/* Return index of given mode in mult and division cost tables. */
66#define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73/* Processor costs (relative to an add) */
74/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75#define COSTS_N_BYTES(N) ((N) * 2)
76
77#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79const
80struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147};
148
149/* Processor costs (relative to an add) */
150static const
151struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218};
219
220static const
221struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290};
291
292static const
293struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360};
361
362static const
363struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437};
438
439static const
440struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508};
509
510static const
511struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581};
582
583static const
584struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654};
655
656static const
657struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733};
734
735struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820};
821
822static const
823struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
842 6, /* MOVE_RATIO */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
868 2, /* Branch cost */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
891};
892
893static const
894struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
913 17, /* MOVE_RATIO */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
939 1, /* Branch cost */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {-1, libcall}}},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
964};
965
966static const
967struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
986 16, /* MOVE_RATIO */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1037};
1038
1039/* Generic64 should produce code tuned for Nocona and K8. */
1040static const
1041struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1114};
1115
1116/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1117static const
1118struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1185};
1186
1187const struct processor_costs *ix86_cost = &pentium_cost;
1188
1189/* Processor feature/optimization bitmasks. */
1190#define m_386 (1<<PROCESSOR_I386)
1191#define m_486 (1<<PROCESSOR_I486)
1192#define m_PENT (1<<PROCESSOR_PENTIUM)
1193#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195#define m_NOCONA (1<<PROCESSOR_NOCONA)
1196#define m_CORE2 (1<<PROCESSOR_CORE2)
1197
1198#define m_GEODE (1<<PROCESSOR_GEODE)
1199#define m_K6 (1<<PROCESSOR_K6)
1200#define m_K6_GEODE (m_K6 | m_GEODE)
1201#define m_K8 (1<<PROCESSOR_K8)
1202#define m_ATHLON (1<<PROCESSOR_ATHLON)
1203#define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1206
1207#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1209
1210/* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1213
1214/* Feature tests against the various tunings. */
1215unsigned char ix86_tune_features[X86_TUNE_LAST];
1216
1217/* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1225
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 m_486 | m_PENT,
1232
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1235
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1243 0,
1244
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1246 ~m_386,
1247
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1251
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1265 m_PPRO,
1266
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1269
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1272
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_MOV0 */
1277 m_K6,
1278
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1281
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1283 m_PENT4,
1284
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1286 m_PPRO,
1287
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1289 ~m_PENT,
1290
1291 /* X86_TUNE_READ_MODIFY */
1292 ~(m_PENT | m_PPRO),
1293
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1297
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1300
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1303
1304 /* X86_TUNE_QIMODE_MATH */
1305 ~0,
1306
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1311 ~m_PPRO,
1312
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1314 0,
1315
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1317 m_PPRO,
1318
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1337
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1351
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353 m_AMDFAM10,
1354
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1359 m_ATHLON_K8,
1360
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1362 m_AMD_MULTIPLE,
1363
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1366
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1375
1376 /* X86_TUNE_SHIFT1 */
1377 ~m_486,
1378
1379 /* X86_TUNE_USE_FFREEP */
1380 m_AMD_MULTIPLE,
1381
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1384
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386 ~(m_AMDFAM10),
1387
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1391
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1394
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1400
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1406
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1408 ~m_K8,
1409
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1411 m_K8 | m_GENERIC64,
1412
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1415 ~(m_386 | m_486),
1416
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1420
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1422 machines. */
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1424
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 than a MOV. */
1427 m_PENT,
1428
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1431 m_PENT,
1432
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1436 m_K6,
1437
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1439 from FP to FP. */
1440 m_AMDFAM10 | m_GENERIC,
1441
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1444 m_AMDFAM10,
1445
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1449 m_CORE2,
1450};
1451
1452/* Feature tests against the various architecture variations. */
1453unsigned char ix86_arch_features[X86_ARCH_LAST];
1454
1455/* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1460
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1462 ~m_386,
1463
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1465 ~(m_386 | m_486),
1466
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1468 ~m_386,
1469
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 ~m_386,
1472};
1473
1474static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1476
1477static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1480
1481static enum stringop_alg stringop_alg = no_stringop;
1482
1483/* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1485 epilogue code. */
1486#define FAST_PROLOGUE_INSN_COUNT 20
1487
1488/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1492
1493/* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1495
1496enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1497{
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1502 /* FP registers */
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1505 /* arg pointer */
1506 NON_Q_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1509 /* SSE registers */
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1511 SSE_REGS, SSE_REGS,
1512 /* MMX registers */
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1514 MMX_REGS, MMX_REGS,
1515 /* REX registers */
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 SSE_REGS, SSE_REGS,
1521};
1522
1523/* The "default" register map used in 32bit mode. */
1524
1525int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1526{
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1534};
1535
1536/* The "default" register map used in 64bit mode. */
1537
1538int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539{
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1547};
1548
1549/* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1593 numbers.
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1602*/
1603int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604{
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1612};
1613
1614/* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1616
1617rtx ix86_compare_op0 = NULL_RTX;
1618rtx ix86_compare_op1 = NULL_RTX;
1619rtx ix86_compare_emitted = NULL_RTX;
1620
1621/* Define parameter passing and return registers. */
1622
1623static int const x86_64_int_parameter_registers[6] =
1624{
1625 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1626};
1627
1628static int const x86_64_ms_abi_int_parameter_registers[4] =
1629{
1630 CX_REG, DX_REG, R8_REG, R9_REG
1631};
1632
1633static int const x86_64_int_return_registers[4] =
1634{
1635 AX_REG, DX_REG, DI_REG, SI_REG
1636};
1637
1638/* Define the structure for the machine field in struct function. */
1639
1640struct stack_local_entry GTY(())
1641{
1642 unsigned short mode;
1643 unsigned short n;
1644 rtx rtl;
1645 struct stack_local_entry *next;
1646};
1647
1648/* Structure describing stack frame layout.
1649 Stack grows downward:
1650
1651 [arguments]
1652 <- ARG_POINTER
1653 saved pc
1654
1655 saved frame pointer if frame_pointer_needed
1656 <- HARD_FRAME_POINTER
1657 [saved regs]
1658
1659 [padding0]
1660
1661 [saved SSE regs]
1662
1663 [padding1] \
1664 )
1665 [va_arg registers] (
1666 > to_allocate <- FRAME_POINTER
1667 [frame] (
1668 )
1669 [padding2] /
1670 */
1671struct ix86_frame
1672{
1673 int padding0;
1674 int nsseregs;
1675 int nregs;
1676 int padding1;
1677 int va_arg_size;
1678 HOST_WIDE_INT frame;
1679 int padding2;
1680 int outgoing_arguments_size;
1681 int red_zone_size;
1682
1683 HOST_WIDE_INT to_allocate;
1684 /* The offsets relative to ARG_POINTER. */
1685 HOST_WIDE_INT frame_pointer_offset;
1686 HOST_WIDE_INT hard_frame_pointer_offset;
1687 HOST_WIDE_INT stack_pointer_offset;
1688
1689 /* When save_regs_using_mov is set, emit prologue using
1690 move instead of push instructions. */
1691 bool save_regs_using_mov;
1692};
1693
1694/* Code model option. */
1695enum cmodel ix86_cmodel;
1696/* Asm dialect. */
1697enum asm_dialect ix86_asm_dialect = ASM_ATT;
1698/* TLS dialects. */
1699enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1700
1701/* Which unit we are generating floating point math for. */
1702enum fpmath_unit ix86_fpmath;
1703
1704/* Which cpu are we scheduling for. */
1705enum attr_cpu ix86_schedule;
1706
1707/* Which cpu are we optimizing for. */
1708enum processor_type ix86_tune;
1709
1710/* Which instruction set architecture to use. */
1711enum processor_type ix86_arch;
1712
1713/* true if sse prefetch instruction is not NOOP. */
1714int x86_prefetch_sse;
1715
1716/* ix86_regparm_string as a number */
1717static int ix86_regparm;
1718
1719/* -mstackrealign option */
1720extern int ix86_force_align_arg_pointer;
1721static const char ix86_force_align_arg_pointer_string[]
1722 = "force_align_arg_pointer";
1723
1724static rtx (*ix86_gen_leave) (void);
1725static rtx (*ix86_gen_pop1) (rtx);
1726static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1727static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1728static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1729static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1730static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1731static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1732
1733/* Preferred alignment for stack boundary in bits. */
1734unsigned int ix86_preferred_stack_boundary;
1735
1736/* Alignment for incoming stack boundary in bits specified at
1737 command line. */
1738static unsigned int ix86_user_incoming_stack_boundary;
1739
1740/* Default alignment for incoming stack boundary in bits. */
1741static unsigned int ix86_default_incoming_stack_boundary;
1742
1743/* Alignment for incoming stack boundary in bits. */
1744unsigned int ix86_incoming_stack_boundary;
1745
1746/* Values 1-5: see jump.c */
1747int ix86_branch_cost;
1748
1749/* Calling abi specific va_list type nodes. */
1750static GTY(()) tree sysv_va_list_type_node;
1751static GTY(()) tree ms_va_list_type_node;
1752
1753/* Variables which are this size or smaller are put in the data/bss
1754 or ldata/lbss sections. */
1755
1756int ix86_section_threshold = 65536;
1757
1758/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1759char internal_label_prefix[16];
1760int internal_label_prefix_len;
1761
1762/* Fence to use after loop using movnt. */
1763tree x86_mfence;
1764
1765/* Register class used for passing given 64bit part of the argument.
1766 These represent classes as documented by the PS ABI, with the exception
1767 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1768 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1769
1770 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1771 whenever possible (upper half does contain padding). */
1772enum x86_64_reg_class
1773 {
1774 X86_64_NO_CLASS,
1775 X86_64_INTEGER_CLASS,
1776 X86_64_INTEGERSI_CLASS,
1777 X86_64_SSE_CLASS,
1778 X86_64_SSESF_CLASS,
1779 X86_64_SSEDF_CLASS,
1780 X86_64_SSEUP_CLASS,
1781 X86_64_X87_CLASS,
1782 X86_64_X87UP_CLASS,
1783 X86_64_COMPLEX_X87_CLASS,
1784 X86_64_MEMORY_CLASS
1785 };
1786
1787#define MAX_CLASSES 4
1788
1789/* Table of constants used by fldpi, fldln2, etc.... */
1790static REAL_VALUE_TYPE ext_80387_constants_table [5];
1791static bool ext_80387_constants_init = 0;
1792
1793\f
1794static struct machine_function * ix86_init_machine_status (void);
1795static rtx ix86_function_value (const_tree, const_tree, bool);
1796static int ix86_function_regparm (const_tree, const_tree);
1797static void ix86_compute_frame_layout (struct ix86_frame *);
1798static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1799 rtx, rtx, int);
1800static void ix86_add_new_builtins (int);
1801
1802enum ix86_function_specific_strings
1803{
1804 IX86_FUNCTION_SPECIFIC_ARCH,
1805 IX86_FUNCTION_SPECIFIC_TUNE,
1806 IX86_FUNCTION_SPECIFIC_FPMATH,
1807 IX86_FUNCTION_SPECIFIC_MAX
1808};
1809
1810static char *ix86_target_string (int, int, const char *, const char *,
1811 const char *, bool);
1812static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1813static void ix86_function_specific_save (struct cl_target_option *);
1814static void ix86_function_specific_restore (struct cl_target_option *);
1815static void ix86_function_specific_print (FILE *, int,
1816 struct cl_target_option *);
1817static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1818static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1819static bool ix86_can_inline_p (tree, tree);
1820static void ix86_set_current_function (tree);
1821
1822\f
1823/* The svr4 ABI for the i386 says that records and unions are returned
1824 in memory. */
1825#ifndef DEFAULT_PCC_STRUCT_RETURN
1826#define DEFAULT_PCC_STRUCT_RETURN 1
1827#endif
1828
1829/* Whether -mtune= or -march= were specified */
1830static int ix86_tune_defaulted;
1831static int ix86_arch_specified;
1832
1833/* Bit flags that specify the ISA we are compiling for. */
1834int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1835
1836/* A mask of ix86_isa_flags that includes bit X if X
1837 was set or cleared on the command line. */
1838static int ix86_isa_flags_explicit;
1839
1840/* Define a set of ISAs which are available when a given ISA is
1841 enabled. MMX and SSE ISAs are handled separately. */
1842
1843#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1844#define OPTION_MASK_ISA_3DNOW_SET \
1845 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1846
1847#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1848#define OPTION_MASK_ISA_SSE2_SET \
1849 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1850#define OPTION_MASK_ISA_SSE3_SET \
1851 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1852#define OPTION_MASK_ISA_SSSE3_SET \
1853 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1854#define OPTION_MASK_ISA_SSE4_1_SET \
1855 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1856#define OPTION_MASK_ISA_SSE4_2_SET \
1857 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1858#define OPTION_MASK_ISA_AVX_SET \
1859 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1860#define OPTION_MASK_ISA_FMA_SET \
1861 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1862
1863/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1864 as -msse4.2. */
1865#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1866
1867#define OPTION_MASK_ISA_SSE4A_SET \
1868 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1869#define OPTION_MASK_ISA_SSE5_SET \
1870 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1871
1872/* AES and PCLMUL need SSE2 because they use xmm registers */
1873#define OPTION_MASK_ISA_AES_SET \
1874 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1875#define OPTION_MASK_ISA_PCLMUL_SET \
1876 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1877
1878#define OPTION_MASK_ISA_ABM_SET \
1879 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1880#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1881#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1882#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1883
1884/* Define a set of ISAs which aren't available when a given ISA is
1885 disabled. MMX and SSE ISAs are handled separately. */
1886
1887#define OPTION_MASK_ISA_MMX_UNSET \
1888 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1889#define OPTION_MASK_ISA_3DNOW_UNSET \
1890 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1891#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1892
1893#define OPTION_MASK_ISA_SSE_UNSET \
1894 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1895#define OPTION_MASK_ISA_SSE2_UNSET \
1896 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1897#define OPTION_MASK_ISA_SSE3_UNSET \
1898 (OPTION_MASK_ISA_SSE3 \
1899 | OPTION_MASK_ISA_SSSE3_UNSET \
1900 | OPTION_MASK_ISA_SSE4A_UNSET )
1901#define OPTION_MASK_ISA_SSSE3_UNSET \
1902 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1903#define OPTION_MASK_ISA_SSE4_1_UNSET \
1904 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1905#define OPTION_MASK_ISA_SSE4_2_UNSET \
1906 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1907#define OPTION_MASK_ISA_AVX_UNSET \
1908 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1909#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1910
1911/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1912 as -mno-sse4.1. */
1913#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1914
1915#define OPTION_MASK_ISA_SSE4A_UNSET \
1916 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1917#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1918#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1919#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1920#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1921#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1922#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1923#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1924
1925/* Vectorization library interface and handlers. */
1926tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1927static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1928static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1929
1930/* Processor target table, indexed by processor number */
1931struct ptt
1932{
1933 const struct processor_costs *cost; /* Processor costs */
1934 const int align_loop; /* Default alignments. */
1935 const int align_loop_max_skip;
1936 const int align_jump;
1937 const int align_jump_max_skip;
1938 const int align_func;
1939};
1940
1941static const struct ptt processor_target_table[PROCESSOR_max] =
1942{
1943 {&i386_cost, 4, 3, 4, 3, 4},
1944 {&i486_cost, 16, 15, 16, 15, 16},
1945 {&pentium_cost, 16, 7, 16, 7, 16},
1946 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1947 {&geode_cost, 0, 0, 0, 0, 0},
1948 {&k6_cost, 32, 7, 32, 7, 32},
1949 {&athlon_cost, 16, 7, 16, 7, 16},
1950 {&pentium4_cost, 0, 0, 0, 0, 0},
1951 {&k8_cost, 16, 7, 16, 7, 16},
1952 {&nocona_cost, 0, 0, 0, 0, 0},
1953 {&core2_cost, 16, 10, 16, 10, 16},
1954 {&generic32_cost, 16, 7, 16, 7, 16},
1955 {&generic64_cost, 16, 10, 16, 10, 16},
1956 {&amdfam10_cost, 32, 24, 32, 7, 32}
1957};
1958
1959static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1960{
1961 "generic",
1962 "i386",
1963 "i486",
1964 "pentium",
1965 "pentium-mmx",
1966 "pentiumpro",
1967 "pentium2",
1968 "pentium3",
1969 "pentium4",
1970 "pentium-m",
1971 "prescott",
1972 "nocona",
1973 "core2",
1974 "geode",
1975 "k6",
1976 "k6-2",
1977 "k6-3",
1978 "athlon",
1979 "athlon-4",
1980 "k8",
1981 "amdfam10"
1982};
1983\f
1984/* Implement TARGET_HANDLE_OPTION. */
1985
1986static bool
1987ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1988{
1989 switch (code)
1990 {
1991 case OPT_mmmx:
1992 if (value)
1993 {
1994 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1995 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1996 }
1997 else
1998 {
1999 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2000 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2001 }
2002 return true;
2003
2004 case OPT_m3dnow:
2005 if (value)
2006 {
2007 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2008 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2009 }
2010 else
2011 {
2012 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2013 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2014 }
2015 return true;
2016
2017 case OPT_m3dnowa:
2018 return false;
2019
2020 case OPT_msse:
2021 if (value)
2022 {
2023 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2024 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2025 }
2026 else
2027 {
2028 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2029 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2030 }
2031 return true;
2032
2033 case OPT_msse2:
2034 if (value)
2035 {
2036 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2037 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2038 }
2039 else
2040 {
2041 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2042 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2043 }
2044 return true;
2045
2046 case OPT_msse3:
2047 if (value)
2048 {
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2050 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2051 }
2052 else
2053 {
2054 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2055 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2056 }
2057 return true;
2058
2059 case OPT_mssse3:
2060 if (value)
2061 {
2062 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2063 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2064 }
2065 else
2066 {
2067 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2068 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2069 }
2070 return true;
2071
2072 case OPT_msse4_1:
2073 if (value)
2074 {
2075 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2076 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2077 }
2078 else
2079 {
2080 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2081 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2082 }
2083 return true;
2084
2085 case OPT_msse4_2:
2086 if (value)
2087 {
2088 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2089 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2090 }
2091 else
2092 {
2093 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2094 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2095 }
2096 return true;
2097
2098 case OPT_mavx:
2099 if (value)
2100 {
2101 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2102 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2103 }
2104 else
2105 {
2106 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2107 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2108 }
2109 return true;
2110
2111 case OPT_mfma:
2112 if (value)
2113 {
2114 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2116 }
2117 else
2118 {
2119 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2121 }
2122 return true;
2123
2124 case OPT_msse4:
2125 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2126 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2127 return true;
2128
2129 case OPT_mno_sse4:
2130 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2131 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2132 return true;
2133
2134 case OPT_msse4a:
2135 if (value)
2136 {
2137 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2138 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2139 }
2140 else
2141 {
2142 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2144 }
2145 return true;
2146
2147 case OPT_msse5:
2148 if (value)
2149 {
2150 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2151 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2152 }
2153 else
2154 {
2155 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2157 }
2158 return true;
2159
2160 case OPT_mabm:
2161 if (value)
2162 {
2163 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2164 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2165 }
2166 else
2167 {
2168 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2170 }
2171 return true;
2172
2173 case OPT_mpopcnt:
2174 if (value)
2175 {
2176 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2177 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2178 }
2179 else
2180 {
2181 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2183 }
2184 return true;
2185
2186 case OPT_msahf:
2187 if (value)
2188 {
2189 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2190 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2191 }
2192 else
2193 {
2194 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2196 }
2197 return true;
2198
2199 case OPT_mcx16:
2200 if (value)
2201 {
2202 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2203 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2204 }
2205 else
2206 {
2207 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2209 }
2210 return true;
2211
2212 case OPT_maes:
2213 if (value)
2214 {
2215 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2216 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2217 }
2218 else
2219 {
2220 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2222 }
2223 return true;
2224
2225 case OPT_mpclmul:
2226 if (value)
2227 {
2228 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2230 }
2231 else
2232 {
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2235 }
2236 return true;
2237
2238 default:
2239 return true;
2240 }
2241}
2242\f
2243/* Return a string the documents the current -m options. The caller is
2244 responsible for freeing the string. */
2245
2246static char *
2247ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2248 const char *fpmath, bool add_nl_p)
2249{
2250 struct ix86_target_opts
2251 {
2252 const char *option; /* option string */
2253 int mask; /* isa mask options */
2254 };
2255
2256 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2257 preceding options while match those first. */
2258 static struct ix86_target_opts isa_opts[] =
2259 {
2260 { "-m64", OPTION_MASK_ISA_64BIT },
2261 { "-msse5", OPTION_MASK_ISA_SSE5 },
2262 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2263 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2264 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2265 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2266 { "-msse3", OPTION_MASK_ISA_SSE3 },
2267 { "-msse2", OPTION_MASK_ISA_SSE2 },
2268 { "-msse", OPTION_MASK_ISA_SSE },
2269 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2270 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2271 { "-mmmx", OPTION_MASK_ISA_MMX },
2272 { "-mabm", OPTION_MASK_ISA_ABM },
2273 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2274 { "-maes", OPTION_MASK_ISA_AES },
2275 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2276 };
2277
2278 /* Flag options. */
2279 static struct ix86_target_opts flag_opts[] =
2280 {
2281 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2282 { "-m80387", MASK_80387 },
2283 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2284 { "-malign-double", MASK_ALIGN_DOUBLE },
2285 { "-mcld", MASK_CLD },
2286 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2287 { "-mieee-fp", MASK_IEEE_FP },
2288 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2289 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2290 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2291 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2292 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2293 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2294 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2295 { "-mno-red-zone", MASK_NO_RED_ZONE },
2296 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2297 { "-mrecip", MASK_RECIP },
2298 { "-mrtd", MASK_RTD },
2299 { "-msseregparm", MASK_SSEREGPARM },
2300 { "-mstack-arg-probe", MASK_STACK_PROBE },
2301 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2302 };
2303
2304 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2305
2306 char isa_other[40];
2307 char target_other[40];
2308 unsigned num = 0;
2309 unsigned i, j;
2310 char *ret;
2311 char *ptr;
2312 size_t len;
2313 size_t line_len;
2314 size_t sep_len;
2315
2316 memset (opts, '\0', sizeof (opts));
2317
2318 /* Add -march= option. */
2319 if (arch)
2320 {
2321 opts[num][0] = "-march=";
2322 opts[num++][1] = arch;
2323 }
2324
2325 /* Add -mtune= option. */
2326 if (tune)
2327 {
2328 opts[num][0] = "-mtune=";
2329 opts[num++][1] = tune;
2330 }
2331
2332 /* Pick out the options in isa options. */
2333 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2334 {
2335 if ((isa & isa_opts[i].mask) != 0)
2336 {
2337 opts[num++][0] = isa_opts[i].option;
2338 isa &= ~ isa_opts[i].mask;
2339 }
2340 }
2341
2342 if (isa && add_nl_p)
2343 {
2344 opts[num++][0] = isa_other;
2345 sprintf (isa_other, "(other isa: 0x%x)", isa);
2346 }
2347
2348 /* Add flag options. */
2349 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2350 {
2351 if ((flags & flag_opts[i].mask) != 0)
2352 {
2353 opts[num++][0] = flag_opts[i].option;
2354 flags &= ~ flag_opts[i].mask;
2355 }
2356 }
2357
2358 if (flags && add_nl_p)
2359 {
2360 opts[num++][0] = target_other;
2361 sprintf (target_other, "(other flags: 0x%x)", isa);
2362 }
2363
2364 /* Add -fpmath= option. */
2365 if (fpmath)
2366 {
2367 opts[num][0] = "-mfpmath=";
2368 opts[num++][1] = fpmath;
2369 }
2370
2371 /* Any options? */
2372 if (num == 0)
2373 return NULL;
2374
2375 gcc_assert (num < ARRAY_SIZE (opts));
2376
2377 /* Size the string. */
2378 len = 0;
2379 sep_len = (add_nl_p) ? 3 : 1;
2380 for (i = 0; i < num; i++)
2381 {
2382 len += sep_len;
2383 for (j = 0; j < 2; j++)
2384 if (opts[i][j])
2385 len += strlen (opts[i][j]);
2386 }
2387
2388 /* Build the string. */
2389 ret = ptr = (char *) xmalloc (len);
2390 line_len = 0;
2391
2392 for (i = 0; i < num; i++)
2393 {
2394 size_t len2[2];
2395
2396 for (j = 0; j < 2; j++)
2397 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2398
2399 if (i != 0)
2400 {
2401 *ptr++ = ' ';
2402 line_len++;
2403
2404 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2405 {
2406 *ptr++ = '\\';
2407 *ptr++ = '\n';
2408 line_len = 0;
2409 }
2410 }
2411
2412 for (j = 0; j < 2; j++)
2413 if (opts[i][j])
2414 {
2415 memcpy (ptr, opts[i][j], len2[j]);
2416 ptr += len2[j];
2417 line_len += len2[j];
2418 }
2419 }
2420
2421 *ptr = '\0';
2422 gcc_assert (ret + len >= ptr);
2423
2424 return ret;
2425}
2426
2427/* Function that is callable from the debugger to print the current
2428 options. */
2429void
2430ix86_debug_options (void)
2431{
2432 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2433 ix86_arch_string, ix86_tune_string,
2434 ix86_fpmath_string, true);
2435
2436 if (opts)
2437 {
2438 fprintf (stderr, "%s\n\n", opts);
2439 free (opts);
2440 }
2441 else
2442 fprintf (stderr, "<no options>\n\n");
2443
2444 return;
2445}
2446\f
2447/* Sometimes certain combinations of command options do not make
2448 sense on a particular target machine. You can define a macro
2449 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2450 defined, is executed once just after all the command options have
2451 been parsed.
2452
2453 Don't use this macro to turn on various extra optimizations for
2454 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2455
2456void
2457override_options (bool main_args_p)
2458{
2459 int i;
2460 unsigned int ix86_arch_mask, ix86_tune_mask;
2461 const char *prefix;
2462 const char *suffix;
2463 const char *sw;
2464
2465 /* Comes from final.c -- no real reason to change it. */
2466#define MAX_CODE_ALIGN 16
2467
2468 enum pta_flags
2469 {
2470 PTA_SSE = 1 << 0,
2471 PTA_SSE2 = 1 << 1,
2472 PTA_SSE3 = 1 << 2,
2473 PTA_MMX = 1 << 3,
2474 PTA_PREFETCH_SSE = 1 << 4,
2475 PTA_3DNOW = 1 << 5,
2476 PTA_3DNOW_A = 1 << 6,
2477 PTA_64BIT = 1 << 7,
2478 PTA_SSSE3 = 1 << 8,
2479 PTA_CX16 = 1 << 9,
2480 PTA_POPCNT = 1 << 10,
2481 PTA_ABM = 1 << 11,
2482 PTA_SSE4A = 1 << 12,
2483 PTA_NO_SAHF = 1 << 13,
2484 PTA_SSE4_1 = 1 << 14,
2485 PTA_SSE4_2 = 1 << 15,
2486 PTA_SSE5 = 1 << 16,
2487 PTA_AES = 1 << 17,
2488 PTA_PCLMUL = 1 << 18,
2489 PTA_AVX = 1 << 19,
2490 PTA_FMA = 1 << 20
2491 };
2492
2493 static struct pta
2494 {
2495 const char *const name; /* processor name or nickname. */
2496 const enum processor_type processor;
2497 const enum attr_cpu schedule;
2498 const unsigned /*enum pta_flags*/ flags;
2499 }
2500 const processor_alias_table[] =
2501 {
2502 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2503 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2504 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2505 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2506 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2507 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2508 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2509 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2510 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2511 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2512 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2513 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2514 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2515 PTA_MMX | PTA_SSE},
2516 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2517 PTA_MMX | PTA_SSE},
2518 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2519 PTA_MMX | PTA_SSE | PTA_SSE2},
2520 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2521 PTA_MMX |PTA_SSE | PTA_SSE2},
2522 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2523 PTA_MMX | PTA_SSE | PTA_SSE2},
2524 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2525 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2526 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2527 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2528 | PTA_CX16 | PTA_NO_SAHF},
2529 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2530 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2531 | PTA_SSSE3 | PTA_CX16},
2532 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2533 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2534 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2535 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2536 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2537 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2538 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2539 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2540 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2541 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2542 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2543 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2544 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2545 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2546 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2547 {"x86-64", PROCESSOR_K8, CPU_K8,
2548 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2549 {"k8", PROCESSOR_K8, CPU_K8,
2550 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2551 | PTA_SSE2 | PTA_NO_SAHF},
2552 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2553 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2554 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2555 {"opteron", PROCESSOR_K8, CPU_K8,
2556 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2557 | PTA_SSE2 | PTA_NO_SAHF},
2558 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2559 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2560 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2561 {"athlon64", PROCESSOR_K8, CPU_K8,
2562 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2563 | PTA_SSE2 | PTA_NO_SAHF},
2564 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2565 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2566 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2567 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2568 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2569 | PTA_SSE2 | PTA_NO_SAHF},
2570 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2571 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2572 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2573 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2574 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2575 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2576 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2577 0 /* flags are only used for -march switch. */ },
2578 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2579 PTA_64BIT /* flags are only used for -march switch. */ },
2580 };
2581
2582 int const pta_size = ARRAY_SIZE (processor_alias_table);
2583
2584 /* Set up prefix/suffix so the error messages refer to either the command
2585 line argument, or the attribute(target). */
2586 if (main_args_p)
2587 {
2588 prefix = "-m";
2589 suffix = "";
2590 sw = "switch";
2591 }
2592 else
2593 {
2594 prefix = "option(\"";
2595 suffix = "\")";
2596 sw = "attribute";
2597 }
2598
2599#ifdef SUBTARGET_OVERRIDE_OPTIONS
2600 SUBTARGET_OVERRIDE_OPTIONS;
2601#endif
2602
2603#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2604 SUBSUBTARGET_OVERRIDE_OPTIONS;
2605#endif
2606
2607 /* -fPIC is the default for x86_64. */
2608 if (TARGET_MACHO && TARGET_64BIT)
2609 flag_pic = 2;
2610
2611 /* Set the default values for switches whose default depends on TARGET_64BIT
2612 in case they weren't overwritten by command line options. */
2613 if (TARGET_64BIT)
2614 {
2615 /* Mach-O doesn't support omitting the frame pointer for now. */
2616 if (flag_omit_frame_pointer == 2)
2617 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2618 if (flag_asynchronous_unwind_tables == 2)
2619 flag_asynchronous_unwind_tables = 1;
2620 if (flag_pcc_struct_return == 2)
2621 flag_pcc_struct_return = 0;
2622 }
2623 else
2624 {
2625 if (flag_omit_frame_pointer == 2)
2626 flag_omit_frame_pointer = 0;
2627 if (flag_asynchronous_unwind_tables == 2)
2628 flag_asynchronous_unwind_tables = 0;
2629 if (flag_pcc_struct_return == 2)
2630 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2631 }
2632
2633 /* Need to check -mtune=generic first. */
2634 if (ix86_tune_string)
2635 {
2636 if (!strcmp (ix86_tune_string, "generic")
2637 || !strcmp (ix86_tune_string, "i686")
2638 /* As special support for cross compilers we read -mtune=native
2639 as -mtune=generic. With native compilers we won't see the
2640 -mtune=native, as it was changed by the driver. */
2641 || !strcmp (ix86_tune_string, "native"))
2642 {
2643 if (TARGET_64BIT)
2644 ix86_tune_string = "generic64";
2645 else
2646 ix86_tune_string = "generic32";
2647 }
2648 /* If this call is for setting the option attribute, allow the
2649 generic32/generic64 that was previously set. */
2650 else if (!main_args_p
2651 && (!strcmp (ix86_tune_string, "generic32")
2652 || !strcmp (ix86_tune_string, "generic64")))
2653 ;
2654 else if (!strncmp (ix86_tune_string, "generic", 7))
2655 error ("bad value (%s) for %stune=%s %s",
2656 ix86_tune_string, prefix, suffix, sw);
2657 }
2658 else
2659 {
2660 if (ix86_arch_string)
2661 ix86_tune_string = ix86_arch_string;
2662 if (!ix86_tune_string)
2663 {
2664 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2665 ix86_tune_defaulted = 1;
2666 }
2667
2668 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2669 need to use a sensible tune option. */
2670 if (!strcmp (ix86_tune_string, "generic")
2671 || !strcmp (ix86_tune_string, "x86-64")
2672 || !strcmp (ix86_tune_string, "i686"))
2673 {
2674 if (TARGET_64BIT)
2675 ix86_tune_string = "generic64";
2676 else
2677 ix86_tune_string = "generic32";
2678 }
2679 }
2680 if (ix86_stringop_string)
2681 {
2682 if (!strcmp (ix86_stringop_string, "rep_byte"))
2683 stringop_alg = rep_prefix_1_byte;
2684 else if (!strcmp (ix86_stringop_string, "libcall"))
2685 stringop_alg = libcall;
2686 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2687 stringop_alg = rep_prefix_4_byte;
2688 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2689 && TARGET_64BIT)
2690 /* rep; movq isn't available in 32-bit code. */
2691 stringop_alg = rep_prefix_8_byte;
2692 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2693 stringop_alg = loop_1_byte;
2694 else if (!strcmp (ix86_stringop_string, "loop"))
2695 stringop_alg = loop;
2696 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2697 stringop_alg = unrolled_loop;
2698 else
2699 error ("bad value (%s) for %sstringop-strategy=%s %s",
2700 ix86_stringop_string, prefix, suffix, sw);
2701 }
2702 if (!strcmp (ix86_tune_string, "x86-64"))
2703 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2704 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2705 prefix, suffix, prefix, suffix, prefix, suffix);
2706
2707 if (!ix86_arch_string)
a5de8fe8 2708 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
c251ad9e
SS
2709 else
2710 ix86_arch_specified = 1;
2711
2712 if (!strcmp (ix86_arch_string, "generic"))
2713 error ("generic CPU can be used only for %stune=%s %s",
2714 prefix, suffix, sw);
2715 if (!strncmp (ix86_arch_string, "generic", 7))
2716 error ("bad value (%s) for %sarch=%s %s",
2717 ix86_arch_string, prefix, suffix, sw);
2718
2719 if (ix86_cmodel_string != 0)
2720 {
2721 if (!strcmp (ix86_cmodel_string, "small"))
2722 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2723 else if (!strcmp (ix86_cmodel_string, "medium"))
2724 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2725 else if (!strcmp (ix86_cmodel_string, "large"))
2726 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2727 else if (flag_pic)
2728 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2729 else if (!strcmp (ix86_cmodel_string, "32"))
2730 ix86_cmodel = CM_32;
2731 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2732 ix86_cmodel = CM_KERNEL;
2733 else
2734 error ("bad value (%s) for %scmodel=%s %s",
2735 ix86_cmodel_string, prefix, suffix, sw);
2736 }
2737 else
2738 {
2739 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2740 use of rip-relative addressing. This eliminates fixups that
2741 would otherwise be needed if this object is to be placed in a
2742 DLL, and is essentially just as efficient as direct addressing. */
2743 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2744 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2745 else if (TARGET_64BIT)
2746 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2747 else
2748 ix86_cmodel = CM_32;
2749 }
2750 if (ix86_asm_string != 0)
2751 {
2752 if (! TARGET_MACHO
2753 && !strcmp (ix86_asm_string, "intel"))
2754 ix86_asm_dialect = ASM_INTEL;
2755 else if (!strcmp (ix86_asm_string, "att"))
2756 ix86_asm_dialect = ASM_ATT;
2757 else
2758 error ("bad value (%s) for %sasm=%s %s",
2759 ix86_asm_string, prefix, suffix, sw);
2760 }
2761 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2762 error ("code model %qs not supported in the %s bit mode",
2763 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2764 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2765 sorry ("%i-bit mode not compiled in",
2766 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2767
2768 for (i = 0; i < pta_size; i++)
2769 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2770 {
2771 ix86_schedule = processor_alias_table[i].schedule;
2772 ix86_arch = processor_alias_table[i].processor;
2773 /* Default cpu tuning to the architecture. */
2774 ix86_tune = ix86_arch;
2775
2776 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2777 error ("CPU you selected does not support x86-64 "
2778 "instruction set");
2779
2780 if (processor_alias_table[i].flags & PTA_MMX
2781 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2782 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2783 if (processor_alias_table[i].flags & PTA_3DNOW
2784 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2785 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2786 if (processor_alias_table[i].flags & PTA_3DNOW_A
2787 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2788 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2789 if (processor_alias_table[i].flags & PTA_SSE
2790 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2791 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2792 if (processor_alias_table[i].flags & PTA_SSE2
2793 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2794 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2795 if (processor_alias_table[i].flags & PTA_SSE3
2796 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2797 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2798 if (processor_alias_table[i].flags & PTA_SSSE3
2799 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2800 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2801 if (processor_alias_table[i].flags & PTA_SSE4_1
2802 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2803 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2804 if (processor_alias_table[i].flags & PTA_SSE4_2
2805 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2806 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2807 if (processor_alias_table[i].flags & PTA_AVX
2808 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2809 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2810 if (processor_alias_table[i].flags & PTA_FMA
2811 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2812 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2813 if (processor_alias_table[i].flags & PTA_SSE4A
2814 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2815 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2816 if (processor_alias_table[i].flags & PTA_SSE5
2817 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2818 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2819 if (processor_alias_table[i].flags & PTA_ABM
2820 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2821 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2822 if (processor_alias_table[i].flags & PTA_CX16
2823 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2824 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2825 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2826 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2827 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2828 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2829 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2830 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2831 if (processor_alias_table[i].flags & PTA_AES
2832 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2833 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2834 if (processor_alias_table[i].flags & PTA_PCLMUL
2835 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2836 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2837 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2838 x86_prefetch_sse = true;
2839
2840 break;
2841 }
2842
2843 if (i == pta_size)
2844 error ("bad value (%s) for %sarch=%s %s",
2845 ix86_arch_string, prefix, suffix, sw);
2846
2847 ix86_arch_mask = 1u << ix86_arch;
2848 for (i = 0; i < X86_ARCH_LAST; ++i)
2849 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2850
2851 for (i = 0; i < pta_size; i++)
2852 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2853 {
2854 ix86_schedule = processor_alias_table[i].schedule;
2855 ix86_tune = processor_alias_table[i].processor;
2856 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2857 {
2858 if (ix86_tune_defaulted)
2859 {
2860 ix86_tune_string = "x86-64";
2861 for (i = 0; i < pta_size; i++)
2862 if (! strcmp (ix86_tune_string,
2863 processor_alias_table[i].name))
2864 break;
2865 ix86_schedule = processor_alias_table[i].schedule;
2866 ix86_tune = processor_alias_table[i].processor;
2867 }
2868 else
2869 error ("CPU you selected does not support x86-64 "
2870 "instruction set");
2871 }
2872 /* Intel CPUs have always interpreted SSE prefetch instructions as
2873 NOPs; so, we can enable SSE prefetch instructions even when
2874 -mtune (rather than -march) points us to a processor that has them.
2875 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2876 higher processors. */
2877 if (TARGET_CMOVE
2878 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2879 x86_prefetch_sse = true;
2880 break;
2881 }
2882 if (i == pta_size)
2883 error ("bad value (%s) for %stune=%s %s",
2884 ix86_tune_string, prefix, suffix, sw);
2885
2886 ix86_tune_mask = 1u << ix86_tune;
2887 for (i = 0; i < X86_TUNE_LAST; ++i)
2888 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2889
2890 if (optimize_size)
2891 ix86_cost = &ix86_size_cost;
2892 else
2893 ix86_cost = processor_target_table[ix86_tune].cost;
2894
2895 /* Arrange to set up i386_stack_locals for all functions. */
2896 init_machine_status = ix86_init_machine_status;
2897
2898 /* Validate -mregparm= value. */
2899 if (ix86_regparm_string)
2900 {
2901 if (TARGET_64BIT)
2902 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2903 i = atoi (ix86_regparm_string);
2904 if (i < 0 || i > REGPARM_MAX)
2905 error ("%sregparm=%d%s is not between 0 and %d",
2906 prefix, i, suffix, REGPARM_MAX);
2907 else
2908 ix86_regparm = i;
2909 }
2910 if (TARGET_64BIT)
2911 ix86_regparm = REGPARM_MAX;
2912
2913 /* If the user has provided any of the -malign-* options,
2914 warn and use that value only if -falign-* is not set.
2915 Remove this code in GCC 3.2 or later. */
2916 if (ix86_align_loops_string)
2917 {
2918 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2919 prefix, suffix, suffix);
2920 if (align_loops == 0)
2921 {
2922 i = atoi (ix86_align_loops_string);
2923 if (i < 0 || i > MAX_CODE_ALIGN)
2924 error ("%salign-loops=%d%s is not between 0 and %d",
2925 prefix, i, suffix, MAX_CODE_ALIGN);
2926 else
2927 align_loops = 1 << i;
2928 }
2929 }
2930
2931 if (ix86_align_jumps_string)
2932 {
2933 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2934 prefix, suffix, suffix);
2935 if (align_jumps == 0)
2936 {
2937 i = atoi (ix86_align_jumps_string);
2938 if (i < 0 || i > MAX_CODE_ALIGN)
2939 error ("%salign-loops=%d%s is not between 0 and %d",
2940 prefix, i, suffix, MAX_CODE_ALIGN);
2941 else
2942 align_jumps = 1 << i;
2943 }
2944 }
2945
2946 if (ix86_align_funcs_string)
2947 {
2948 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2949 prefix, suffix, suffix);
2950 if (align_functions == 0)
2951 {
2952 i = atoi (ix86_align_funcs_string);
2953 if (i < 0 || i > MAX_CODE_ALIGN)
2954 error ("%salign-loops=%d%s is not between 0 and %d",
2955 prefix, i, suffix, MAX_CODE_ALIGN);
2956 else
2957 align_functions = 1 << i;
2958 }
2959 }
2960
2961 /* Default align_* from the processor table. */
2962 if (align_loops == 0)
2963 {
2964 align_loops = processor_target_table[ix86_tune].align_loop;
2965 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2966 }
2967 if (align_jumps == 0)
2968 {
2969 align_jumps = processor_target_table[ix86_tune].align_jump;
2970 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2971 }
2972 if (align_functions == 0)
2973 {
2974 align_functions = processor_target_table[ix86_tune].align_func;
2975 }
2976
2977 /* Validate -mbranch-cost= value, or provide default. */
2978 ix86_branch_cost = ix86_cost->branch_cost;
2979 if (ix86_branch_cost_string)
2980 {
2981 i = atoi (ix86_branch_cost_string);
2982 if (i < 0 || i > 5)
2983 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2984 else
2985 ix86_branch_cost = i;
2986 }
2987 if (ix86_section_threshold_string)
2988 {
2989 i = atoi (ix86_section_threshold_string);
2990 if (i < 0)
2991 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2992 else
2993 ix86_section_threshold = i;
2994 }
2995
2996 if (ix86_tls_dialect_string)
2997 {
2998 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2999 ix86_tls_dialect = TLS_DIALECT_GNU;
3000 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3001 ix86_tls_dialect = TLS_DIALECT_GNU2;
3002 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3003 ix86_tls_dialect = TLS_DIALECT_SUN;
3004 else
3005 error ("bad value (%s) for %stls-dialect=%s %s",
3006 ix86_tls_dialect_string, prefix, suffix, sw);
3007 }
3008
3009 if (ix87_precision_string)
3010 {
3011 i = atoi (ix87_precision_string);
3012 if (i != 32 && i != 64 && i != 80)
3013 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3014 }
3015
3016 if (TARGET_64BIT)
3017 {
3018 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3019
3020 /* Enable by default the SSE and MMX builtins. Do allow the user to
3021 explicitly disable any of these. In particular, disabling SSE and
3022 MMX for kernel code is extremely useful. */
3023 if (!ix86_arch_specified)
3024 ix86_isa_flags
3025 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3026 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3027
3028 if (TARGET_RTD)
3029 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3030 }
3031 else
3032 {
3033 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3034
3035 if (!ix86_arch_specified)
3036 ix86_isa_flags
3037 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3038
3039 /* i386 ABI does not specify red zone. It still makes sense to use it
3040 when programmer takes care to stack from being destroyed. */
3041 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3042 target_flags |= MASK_NO_RED_ZONE;
3043 }
3044
3045 /* Keep nonleaf frame pointers. */
3046 if (flag_omit_frame_pointer)
3047 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3048 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3049 flag_omit_frame_pointer = 1;
3050
3051 /* If we're doing fast math, we don't care about comparison order
3052 wrt NaNs. This lets us use a shorter comparison sequence. */
3053 if (flag_finite_math_only)
3054 target_flags &= ~MASK_IEEE_FP;
3055
3056 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3057 since the insns won't need emulation. */
3058 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3059 target_flags &= ~MASK_NO_FANCY_MATH_387;
3060
3061 /* Likewise, if the target doesn't have a 387, or we've specified
3062 software floating point, don't use 387 inline intrinsics. */
3063 if (!TARGET_80387)
3064 target_flags |= MASK_NO_FANCY_MATH_387;
3065
3066 /* Turn on MMX builtins for -msse. */
3067 if (TARGET_SSE)
3068 {
3069 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3070 x86_prefetch_sse = true;
3071 }
3072
3073 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3074 if (TARGET_SSE4_2 || TARGET_ABM)
3075 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3076
3077 /* Validate -mpreferred-stack-boundary= value or default it to
3078 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3079 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3080 if (ix86_preferred_stack_boundary_string)
3081 {
3082 i = atoi (ix86_preferred_stack_boundary_string);
3083 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3084 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3085 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3086 else
3087 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3088 }
3089
3090 /* Set the default value for -mstackrealign. */
3091 if (ix86_force_align_arg_pointer == -1)
3092 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3093
3094 /* Validate -mincoming-stack-boundary= value or default it to
3095 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3096 if (ix86_force_align_arg_pointer)
3097 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3098 else
3099 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3100 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3101 if (ix86_incoming_stack_boundary_string)
3102 {
3103 i = atoi (ix86_incoming_stack_boundary_string);
3104 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3105 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3106 i, TARGET_64BIT ? 4 : 2);
3107 else
3108 {
3109 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3110 ix86_incoming_stack_boundary
3111 = ix86_user_incoming_stack_boundary;
3112 }
3113 }
3114
3115 /* Accept -msseregparm only if at least SSE support is enabled. */
3116 if (TARGET_SSEREGPARM
3117 && ! TARGET_SSE)
3118 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3119
3120 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3121 if (ix86_fpmath_string != 0)
3122 {
3123 if (! strcmp (ix86_fpmath_string, "387"))
3124 ix86_fpmath = FPMATH_387;
3125 else if (! strcmp (ix86_fpmath_string, "sse"))
3126 {
3127 if (!TARGET_SSE)
3128 {
3129 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3130 ix86_fpmath = FPMATH_387;
3131 }
3132 else
3133 ix86_fpmath = FPMATH_SSE;
3134 }
3135 else if (! strcmp (ix86_fpmath_string, "387,sse")
3136 || ! strcmp (ix86_fpmath_string, "387+sse")
3137 || ! strcmp (ix86_fpmath_string, "sse,387")
3138 || ! strcmp (ix86_fpmath_string, "sse+387")
3139 || ! strcmp (ix86_fpmath_string, "both"))
3140 {
3141 if (!TARGET_SSE)
3142 {
3143 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3144 ix86_fpmath = FPMATH_387;
3145 }
3146 else if (!TARGET_80387)
3147 {
3148 warning (0, "387 instruction set disabled, using SSE arithmetics");
3149 ix86_fpmath = FPMATH_SSE;
3150 }
3151 else
3152 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3153 }
3154 else
3155 error ("bad value (%s) for %sfpmath=%s %s",
3156 ix86_fpmath_string, prefix, suffix, sw);
3157 }
3158
3159 /* If the i387 is disabled, then do not return values in it. */
3160 if (!TARGET_80387)
3161 target_flags &= ~MASK_FLOAT_RETURNS;
3162
3163 /* Use external vectorized library in vectorizing intrinsics. */
3164 if (ix86_veclibabi_string)
3165 {
3166 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3167 ix86_veclib_handler = ix86_veclibabi_svml;
3168 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3169 ix86_veclib_handler = ix86_veclibabi_acml;
3170 else
3171 error ("unknown vectorization library ABI type (%s) for "
3172 "%sveclibabi=%s %s", ix86_veclibabi_string,
3173 prefix, suffix, sw);
3174 }
3175
3176 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3177 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3178 && !optimize_size)
3179 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3180
3181 /* ??? Unwind info is not correct around the CFG unless either a frame
3182 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3183 unwind info generation to be aware of the CFG and propagating states
3184 around edges. */
3185 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3186 || flag_exceptions || flag_non_call_exceptions)
3187 && flag_omit_frame_pointer
3188 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3189 {
3190 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3191 warning (0, "unwind tables currently require either a frame pointer "
3192 "or %saccumulate-outgoing-args%s for correctness",
3193 prefix, suffix);
3194 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3195 }
3196
3197 /* If stack probes are required, the space used for large function
3198 arguments on the stack must also be probed, so enable
3199 -maccumulate-outgoing-args so this happens in the prologue. */
3200 if (TARGET_STACK_PROBE
3201 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3202 {
3203 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3204 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3205 "for correctness", prefix, suffix);
3206 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3207 }
3208
3209 /* For sane SSE instruction set generation we need fcomi instruction.
3210 It is safe to enable all CMOVE instructions. */
3211 if (TARGET_SSE)
3212 TARGET_CMOVE = 1;
3213
3214 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3215 {
3216 char *p;
3217 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3218 p = strchr (internal_label_prefix, 'X');
3219 internal_label_prefix_len = p - internal_label_prefix;
3220 *p = '\0';
3221 }
3222
3223 /* When scheduling description is not available, disable scheduler pass
3224 so it won't slow down the compilation and make x87 code slower. */
3225 if (!TARGET_SCHEDULE)
3226 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3227
3228 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3229 set_param_value ("simultaneous-prefetches",
3230 ix86_cost->simultaneous_prefetches);
3231 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3232 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3233 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3234 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3235 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3236 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3237
3238 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3239 can be optimized to ap = __builtin_next_arg (0). */
3240 if (!TARGET_64BIT)
3241 targetm.expand_builtin_va_start = NULL;
3242
3243 if (TARGET_64BIT)
3244 {
3245 ix86_gen_leave = gen_leave_rex64;
3246 ix86_gen_pop1 = gen_popdi1;
3247 ix86_gen_add3 = gen_adddi3;
3248 ix86_gen_sub3 = gen_subdi3;
3249 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3250 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3251 ix86_gen_monitor = gen_sse3_monitor64;
3252 ix86_gen_andsp = gen_anddi3;
3253 }
3254 else
3255 {
3256 ix86_gen_leave = gen_leave;
3257 ix86_gen_pop1 = gen_popsi1;
3258 ix86_gen_add3 = gen_addsi3;
3259 ix86_gen_sub3 = gen_subsi3;
3260 ix86_gen_sub3_carry = gen_subsi3_carry;
3261 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3262 ix86_gen_monitor = gen_sse3_monitor;
3263 ix86_gen_andsp = gen_andsi3;
3264 }
3265
3266#ifdef USE_IX86_CLD
3267 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3268 if (!TARGET_64BIT)
3269 target_flags |= MASK_CLD & ~target_flags_explicit;
3270#endif
3271
3272 /* Save the initial options in case the user does function specific options */
3273 if (main_args_p)
3274 target_option_default_node = target_option_current_node
3275 = build_target_option_node ();
3276}
3277
3278/* Update register usage after having seen the compiler flags. */
3279
3280void
3281ix86_conditional_register_usage (void)
3282{
3283 int i;
3284 unsigned int j;
3285
3286 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3287 {
3288 if (fixed_regs[i] > 1)
3289 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3290 if (call_used_regs[i] > 1)
3291 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3292 }
3293
3294 /* The PIC register, if it exists, is fixed. */
3295 j = PIC_OFFSET_TABLE_REGNUM;
3296 if (j != INVALID_REGNUM)
3297 fixed_regs[j] = call_used_regs[j] = 1;
3298
3299 /* The MS_ABI changes the set of call-used registers. */
3300 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3301 {
3302 call_used_regs[SI_REG] = 0;
3303 call_used_regs[DI_REG] = 0;
3304 call_used_regs[XMM6_REG] = 0;
3305 call_used_regs[XMM7_REG] = 0;
3306 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3307 call_used_regs[i] = 0;
3308 }
3309
3310 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3311 other call-clobbered regs for 64-bit. */
3312 if (TARGET_64BIT)
3313 {
3314 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3315
3316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3318 && call_used_regs[i])
3319 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3320 }
3321
3322 /* If MMX is disabled, squash the registers. */
3323 if (! TARGET_MMX)
3324 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3325 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3326 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3327
3328 /* If SSE is disabled, squash the registers. */
3329 if (! TARGET_SSE)
3330 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3331 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3332 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3333
3334 /* If the FPU is disabled, squash the registers. */
3335 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3336 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3337 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3338 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3339
3340 /* If 32-bit, squash the 64-bit registers. */
3341 if (! TARGET_64BIT)
3342 {
3343 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3344 reg_names[i] = "";
3345 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3346 reg_names[i] = "";
3347 }
3348}
3349
3350\f
3351/* Save the current options */
3352
3353static void
3354ix86_function_specific_save (struct cl_target_option *ptr)
3355{
3356 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3357 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3358 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3359 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3360 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3361
3362 ptr->arch = ix86_arch;
3363 ptr->schedule = ix86_schedule;
3364 ptr->tune = ix86_tune;
3365 ptr->fpmath = ix86_fpmath;
3366 ptr->branch_cost = ix86_branch_cost;
3367 ptr->tune_defaulted = ix86_tune_defaulted;
3368 ptr->arch_specified = ix86_arch_specified;
3369 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3370 ptr->target_flags_explicit = target_flags_explicit;
3371}
3372
3373/* Restore the current options */
3374
3375static void
3376ix86_function_specific_restore (struct cl_target_option *ptr)
3377{
3378 enum processor_type old_tune = ix86_tune;
3379 enum processor_type old_arch = ix86_arch;
3380 unsigned int ix86_arch_mask, ix86_tune_mask;
3381 int i;
3382
3383 ix86_arch = ptr->arch;
3384 ix86_schedule = ptr->schedule;
3385 ix86_tune = ptr->tune;
3386 ix86_fpmath = ptr->fpmath;
3387 ix86_branch_cost = ptr->branch_cost;
3388 ix86_tune_defaulted = ptr->tune_defaulted;
3389 ix86_arch_specified = ptr->arch_specified;
3390 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3391 target_flags_explicit = ptr->target_flags_explicit;
3392
3393 /* Recreate the arch feature tests if the arch changed */
3394 if (old_arch != ix86_arch)
3395 {
3396 ix86_arch_mask = 1u << ix86_arch;
3397 for (i = 0; i < X86_ARCH_LAST; ++i)
3398 ix86_arch_features[i]
3399 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3400 }
3401
3402 /* Recreate the tune optimization tests */
3403 if (old_tune != ix86_tune)
3404 {
3405 ix86_tune_mask = 1u << ix86_tune;
3406 for (i = 0; i < X86_TUNE_LAST; ++i)
3407 ix86_tune_features[i]
3408 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3409 }
3410}
3411
3412/* Print the current options */
3413
3414static void
3415ix86_function_specific_print (FILE *file, int indent,
3416 struct cl_target_option *ptr)
3417{
3418 char *target_string
3419 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3420 NULL, NULL, NULL, false);
3421
3422 fprintf (file, "%*sarch = %d (%s)\n",
3423 indent, "",
3424 ptr->arch,
3425 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3426 ? cpu_names[ptr->arch]
3427 : "<unknown>"));
3428
3429 fprintf (file, "%*stune = %d (%s)\n",
3430 indent, "",
3431 ptr->tune,
3432 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3433 ? cpu_names[ptr->tune]
3434 : "<unknown>"));
3435
3436 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3437 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3438 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3439 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3440
3441 if (target_string)
3442 {
3443 fprintf (file, "%*s%s\n", indent, "", target_string);
3444 free (target_string);
3445 }
3446}
3447
3448\f
3449/* Inner function to process the attribute((target(...))), take an argument and
3450 set the current options from the argument. If we have a list, recursively go
3451 over the list. */
3452
3453static bool
3454ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3455{
3456 char *next_optstr;
3457 bool ret = true;
3458
3459#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3460#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3461#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3462#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3463
3464 enum ix86_opt_type
3465 {
3466 ix86_opt_unknown,
3467 ix86_opt_yes,
3468 ix86_opt_no,
3469 ix86_opt_str,
3470 ix86_opt_isa
3471 };
3472
3473 static const struct
3474 {
3475 const char *string;
3476 size_t len;
3477 enum ix86_opt_type type;
3478 int opt;
3479 int mask;
3480 } attrs[] = {
3481 /* isa options */
3482 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3483 IX86_ATTR_ISA ("abm", OPT_mabm),
3484 IX86_ATTR_ISA ("aes", OPT_maes),
3485 IX86_ATTR_ISA ("avx", OPT_mavx),
3486 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3487 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3488 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3489 IX86_ATTR_ISA ("sse", OPT_msse),
3490 IX86_ATTR_ISA ("sse2", OPT_msse2),
3491 IX86_ATTR_ISA ("sse3", OPT_msse3),
3492 IX86_ATTR_ISA ("sse4", OPT_msse4),
3493 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3494 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3495 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3496 IX86_ATTR_ISA ("sse5", OPT_msse5),
3497 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3498
3499 /* string options */
3500 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3501 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3502 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3503
3504 /* flag options */
3505 IX86_ATTR_YES ("cld",
3506 OPT_mcld,
3507 MASK_CLD),
3508
3509 IX86_ATTR_NO ("fancy-math-387",
3510 OPT_mfancy_math_387,
3511 MASK_NO_FANCY_MATH_387),
3512
3513 IX86_ATTR_NO ("fused-madd",
3514 OPT_mfused_madd,
3515 MASK_NO_FUSED_MADD),
3516
3517 IX86_ATTR_YES ("ieee-fp",
3518 OPT_mieee_fp,
3519 MASK_IEEE_FP),
3520
3521 IX86_ATTR_YES ("inline-all-stringops",
3522 OPT_minline_all_stringops,
3523 MASK_INLINE_ALL_STRINGOPS),
3524
3525 IX86_ATTR_YES ("inline-stringops-dynamically",
3526 OPT_minline_stringops_dynamically,
3527 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3528
3529 IX86_ATTR_NO ("align-stringops",
3530 OPT_mno_align_stringops,
3531 MASK_NO_ALIGN_STRINGOPS),
3532
3533 IX86_ATTR_YES ("recip",
3534 OPT_mrecip,
3535 MASK_RECIP),
3536
3537 };
3538
3539 /* If this is a list, recurse to get the options. */
3540 if (TREE_CODE (args) == TREE_LIST)
3541 {
3542 bool ret = true;
3543
3544 for (; args; args = TREE_CHAIN (args))
3545 if (TREE_VALUE (args)
3546 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3547 ret = false;
3548
3549 return ret;
3550 }
3551
3552 else if (TREE_CODE (args) != STRING_CST)
3553 gcc_unreachable ();
3554
3555 /* Handle multiple arguments separated by commas. */
3556 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3557
3558 while (next_optstr && *next_optstr != '\0')
3559 {
3560 char *p = next_optstr;
3561 char *orig_p = p;
3562 char *comma = strchr (next_optstr, ',');
3563 const char *opt_string;
3564 size_t len, opt_len;
3565 int opt;
3566 bool opt_set_p;
3567 char ch;
3568 unsigned i;
3569 enum ix86_opt_type type = ix86_opt_unknown;
3570 int mask = 0;
3571
3572 if (comma)
3573 {
3574 *comma = '\0';
3575 len = comma - next_optstr;
3576 next_optstr = comma + 1;
3577 }
3578 else
3579 {
3580 len = strlen (p);
3581 next_optstr = NULL;
3582 }
3583
3584 /* Recognize no-xxx. */
3585 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3586 {
3587 opt_set_p = false;
3588 p += 3;
3589 len -= 3;
3590 }
3591 else
3592 opt_set_p = true;
3593
3594 /* Find the option. */
3595 ch = *p;
3596 opt = N_OPTS;
3597 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3598 {
3599 type = attrs[i].type;
3600 opt_len = attrs[i].len;
3601 if (ch == attrs[i].string[0]
3602 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3603 && memcmp (p, attrs[i].string, opt_len) == 0)
3604 {
3605 opt = attrs[i].opt;
3606 mask = attrs[i].mask;
3607 opt_string = attrs[i].string;
3608 break;
3609 }
3610 }
3611
3612 /* Process the option. */
3613 if (opt == N_OPTS)
3614 {
3615 error ("attribute(target(\"%s\")) is unknown", orig_p);
3616 ret = false;
3617 }
3618
3619 else if (type == ix86_opt_isa)
3620 ix86_handle_option (opt, p, opt_set_p);
3621
3622 else if (type == ix86_opt_yes || type == ix86_opt_no)
3623 {
3624 if (type == ix86_opt_no)
3625 opt_set_p = !opt_set_p;
3626
3627 if (opt_set_p)
3628 target_flags |= mask;
3629 else
3630 target_flags &= ~mask;
3631 }
3632
3633 else if (type == ix86_opt_str)
3634 {
3635 if (p_strings[opt])
3636 {
3637 error ("option(\"%s\") was already specified", opt_string);
3638 ret = false;
3639 }
3640 else
3641 p_strings[opt] = xstrdup (p + opt_len);
3642 }
3643
3644 else
3645 gcc_unreachable ();
3646 }
3647
3648 return ret;
3649}
3650
3651/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3652
3653tree
3654ix86_valid_target_attribute_tree (tree args)
3655{
3656 const char *orig_arch_string = ix86_arch_string;
3657 const char *orig_tune_string = ix86_tune_string;
3658 const char *orig_fpmath_string = ix86_fpmath_string;
3659 int orig_tune_defaulted = ix86_tune_defaulted;
3660 int orig_arch_specified = ix86_arch_specified;
3661 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3662 tree t = NULL_TREE;
3663 int i;
3664 struct cl_target_option *def
3665 = TREE_TARGET_OPTION (target_option_default_node);
3666
3667 /* Process each of the options on the chain. */
3668 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3669 return NULL_TREE;
3670
3671 /* If the changed options are different from the default, rerun override_options,
3672 and then save the options away. The string options are are attribute options,
3673 and will be undone when we copy the save structure. */
3674 if (ix86_isa_flags != def->ix86_isa_flags
3675 || target_flags != def->target_flags
3676 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3677 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3678 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3679 {
3680 /* If we are using the default tune= or arch=, undo the string assigned,
3681 and use the default. */
3682 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3683 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3684 else if (!orig_arch_specified)
3685 ix86_arch_string = NULL;
3686
3687 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3688 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3689 else if (orig_tune_defaulted)
3690 ix86_tune_string = NULL;
3691
3692 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3693 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3694 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3695 else if (!TARGET_64BIT && TARGET_SSE)
3696 ix86_fpmath_string = "sse,387";
3697
3698 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3699 override_options (false);
3700
3701 /* Add any builtin functions with the new isa if any. */
3702 ix86_add_new_builtins (ix86_isa_flags);
3703
3704 /* Save the current options unless we are validating options for
3705 #pragma. */
3706 t = build_target_option_node ();
3707
3708 ix86_arch_string = orig_arch_string;
3709 ix86_tune_string = orig_tune_string;
3710 ix86_fpmath_string = orig_fpmath_string;
3711
3712 /* Free up memory allocated to hold the strings */
3713 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3714 if (option_strings[i])
3715 free (option_strings[i]);
3716 }
3717
3718 return t;
3719}
3720
3721/* Hook to validate attribute((target("string"))). */
3722
3723static bool
3724ix86_valid_target_attribute_p (tree fndecl,
3725 tree ARG_UNUSED (name),
3726 tree args,
3727 int ARG_UNUSED (flags))
3728{
3729 struct cl_target_option cur_target;
3730 bool ret = true;
3731 tree old_optimize = build_optimization_node ();
3732 tree new_target, new_optimize;
3733 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3734